Re-posted from: https://tensorflowjulia.blogspot.com/2018/09/improving-neural-net-performance.html
This is the last exercise that uses the California housing dataset. We investigate several possibilities of optimizing neural nets:
- Different loss minimization algorithms
- Linear scaling of features
- Logarithmic scaling of features
- Clipping of features
- Z-score normalization
- Thresholding of data
The Jupyter notebook can be downloaded here.
In [0]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
In [1]:
using Plots
using StatPlots
using Distributions
gr()
using DataFrames
using TensorFlow
import CSV
import StatsBase
using PyCall
sess=Session(Graph())
california_housing_dataframe = CSV.read("california_housing_train.csv", delim=",");
california_housing_dataframe = california_housing_dataframe[shuffle(1:size(california_housing_dataframe, 1)),:];
In [2]:
function preprocess_features(california_housing_dataframe)
"""Prepares input features from California housing data set.
Args:
california_housing_dataframe: A DataFrame expected to contain data
from the California housing data set.
Returns:
A DataFrame that contains the features to be used for the model, including
synthetic features.
"""
selected_features = california_housing_dataframe[
[:latitude,
:longitude,
:housing_median_age,
:total_rooms,
:total_bedrooms,
:population,
:households,
:median_income]]
processed_features = selected_features
# Create a synthetic feature.
processed_features[:rooms_per_person] = (
california_housing_dataframe[:total_rooms] ./
california_housing_dataframe[:population])
return processed_features
end
function preprocess_targets(california_housing_dataframe)
"""Prepares target features (i.e., labels) from California housing data set.
Args:
california_housing_dataframe: A DataFrame expected to contain data
from the California housing data set.
Returns:
A DataFrame that contains the target feature.
"""
output_targets = DataFrame()
# Scale the target to be in units of thousands of dollars.
output_targets[:median_house_value] = (
california_housing_dataframe[:median_house_value] ./ 1000.0)
return output_targets
end
Out[2]:
In [3]:
# Choose the first 12000 (out of 17000) examples for training.
training_examples = preprocess_features(head(california_housing_dataframe,12000))
training_targets = preprocess_targets(head(california_housing_dataframe,12000))
# Choose the last 5000 (out of 17000) examples for validation.
validation_examples = preprocess_features(tail(california_housing_dataframe,5000))
validation_targets = preprocess_targets(tail(california_housing_dataframe,5000))
# Double-check that we've done the right thing.
println("Training examples summary:")
describe(training_examples)
println("Validation examples summary:")
describe(validation_examples)
println("Training targets summary:")
describe(training_targets)
println("Validation targets summary:")
describe(validation_targets)
Out[3]:
In [10]:
function construct_columns(input_features):
"""Construct the TensorFlow Feature Columns.
Args:
input_features: DataFrame of the numerical input features to use.
Returns:
A set of feature columns
"""
out=convert(Array, input_features[:,:])
return convert.(Float64,out)
end
Out[10]:
In [4]:
function create_batches(features, targets, steps, batch_size=5, num_epochs=0)
"""Create batches.
Args:
features: Input features.
targets: Target column.
steps: Number of steps.
batch_size: Batch size.
num_epochs: Number of epochs, 0 will let TF automatically calculate the correct number
Returns:
An extended set of feature and target columns from which batches can be extracted.
"""
if(num_epochs==0)
num_epochs=ceil(batch_size*steps/size(features,1))
end
names_features=names(features);
names_targets=names(targets);
features_batches=copy(features)
target_batches=copy(targets)
for i=1:num_epochs
select=shuffle(1:size(features,1))
if i==1
features_batches=(features[select,:])
target_batches=(targets[select,:])
else
append!(features_batches, features[select,:])
append!(target_batches, targets[select,:])
end
end
return features_batches, target_batches
end
function next_batch(features_batches, targets_batches, batch_size, iter)
"""Next batch.
Args:
features_batches: Features batches from create_batches.
targets_batches: Target batches from create_batches.
batch_size: Batch size.
iter: Number of the current iteration
Returns:
An extended set of feature and target columns from which batches can be extracted.
"""
select=mod((iter-1)*batch_size+1, size(features_batches,1)):mod(iter*batch_size, size(features_batches,1));
ds=features_batches[select,:];
target=targets_batches[select,:];
return ds, target
end
Out[4]:
In [6]:
function my_input_fn(features_batches, targets_batches, iter, batch_size=5, shuffle_flag=1):
"""Prepares a batch of features and labels for model training.
Args:
features_batches: Features batches from create_batches.
targets_batches: Target batches from create_batches.
iter: Number of the current iteration
batch_size: Batch size.
shuffle_flag: Determines wether data is shuffled before being returned
Returns:
Tuple of (features, labels) for next data batch
"""
# Construct a dataset, and configure batching/repeating.
ds, target = next_batch(features_batches, targets_batches, batch_size, iter)
# Shuffle the data, if specified.
if shuffle_flag==1
select=shuffle(1:size(ds, 1));
ds = ds[select,:]
target = target[select, :]
end
# Return the next batch of data.
return ds, target
end
Out[6]:
In [14]:
function train_nn_regression_model(my_optimizer,
steps,
batch_size,
hidden_units,
keep_probability,
training_examples,
training_targets,
validation_examples,
validation_targets)
"""Trains a neural network model of one feature.
Args:
my_optimizer: Optimizer function for the training step
learning_rate: A `float`, the learning rate.
steps: A non-zero `int`, the total number of training steps. A training step
consists of a forward and backward pass using a single batch.
batch_size: A non-zero `int`, the batch size.
hidden_units: A vector describing the layout of the neural network
keep_probability: A `float`, the probability of keeping a node active during one training step.
Returns:
p1: Plot of RMSE for the different periods
training_rmse: Training RMSE values for the different periods
validation_rmse: Validation RMSE values for the different periods
"""
periods = 10
steps_per_period = steps / periods
# Create feature columns.
feature_columns = placeholder(Float32, shape=[-1, size(construct_columns(training_examples),2)])
target_columns = placeholder(Float32, shape=[-1, size(construct_columns(training_targets),2)])
# Network parameters
push!(hidden_units,size(training_targets,2)) #create an output node that fits to the size of the targets
activation_functions = Vector{Function}(size(hidden_units,1))
activation_functions[1:end-1]=z->nn.dropout(nn.relu(z), keep_probability)
activation_functions[end] = identity #Last function should be idenity as we need the logits
# create network - professional template
Zs = [feature_columns]
for (ii,(hlsize, actfun)) in enumerate(zip(hidden_units, activation_functions))
Wii = get_variable("W_$ii"*randstring(4), [get_shape(Zs[end], 2), hlsize], Float32)
bii = get_variable("b_$ii"*randstring(4), [hlsize], Float32)
Zii = actfun(Zs[end]*Wii + bii)
push!(Zs, Zii)
end
y=Zs[end]
loss=reduce_sum((target_columns - y).^2)
features_batches, targets_batches = create_batches(training_examples, training_targets, steps, batch_size)
# Optimizer setup with gradient clipping
gvs = train.compute_gradients(my_optimizer, loss)
capped_gvs = [(clip_by_norm(grad, 5.), var) for (grad, var) in gvs]
my_optimizer = train.apply_gradients(my_optimizer,capped_gvs)
run(sess, global_variables_initializer())
# Train the model, but do so inside a loop so that we can periodically assess
# loss metrics.
println("Training model...")
println("RMSE (on training data):")
training_rmse = []
validation_rmse=[]
for period in 1:periods
# Train the model, starting from the prior state.
for i=1:steps_per_period
features, labels = my_input_fn(features_batches, targets_batches, convert(Int,(period-1)*steps_per_period+i), batch_size)
run(sess, my_optimizer, Dict(feature_columns=>construct_columns(features), target_columns=>construct_columns(labels)))
end
# Take a break and compute predictions.
training_predictions = run(sess, y, Dict(feature_columns=> construct_columns(training_examples)));
validation_predictions = run(sess, y, Dict(feature_columns=> construct_columns(validation_examples)));
# Compute loss.
training_mean_squared_error = mean((training_predictions- construct_columns(training_targets)).^2)
training_root_mean_squared_error = sqrt(training_mean_squared_error)
validation_mean_squared_error = mean((validation_predictions- construct_columns(validation_targets)).^2)
validation_root_mean_squared_error = sqrt(validation_mean_squared_error)
# Occasionally print the current loss.
println(" period ", period, ": ", training_root_mean_squared_error)
# Add the loss metrics from this period to our list.
push!(training_rmse, training_root_mean_squared_error)
push!(validation_rmse, validation_root_mean_squared_error)
end
println("Model training finished.")
# Output a graph of loss metrics over periods.
p1=plot(training_rmse, label="training", title="Root Mean Squared Error vs. Periods", ylabel="RMSE", xlabel="Periods")
p1=plot!(validation_rmse, label="validation")
#
println("Final RMSE (on training data): ", training_rmse[end])
println("Final RMSE (on validation data): ", validation_rmse[end])
return p1, training_rmse, validation_rmse
end
Out[14]:
In [11]:
p1, training_rmse, validation_rmse = train_nn_regression_model(
train.GradientDescentOptimizer(0.0007), #optimizer & learning rate
5000, #steps
70, #batch_size
[10, 10], #hidden_units
1.0, # keep probability
training_examples,
training_targets,
validation_examples,
validation_targets)
Out[11]:
In [12]:
plot(p1)
Out[12]:
In [13]:
function linear_scale(series)
min_val = minimum(series)
max_val = maximum(series)
scale = (max_val - min_val) / 2.0
return (series .- min_val) ./ scale .- 1.0
end
Out[13]:
In [15]:
function normalize_linear_scale(examples_dataframe):
"""Returns a version of the input `DataFrame` that has all its features normalized linearly."""
processed_features = DataFrame()
processed_features[:latitude] = linear_scale(examples_dataframe[:latitude])
processed_features[:longitude] = linear_scale(examples_dataframe[:longitude])
processed_features[:housing_median_age] = linear_scale(examples_dataframe[:housing_median_age])
processed_features[:total_rooms] = linear_scale(examples_dataframe[:total_rooms])
processed_features[:total_bedrooms] = linear_scale(examples_dataframe[:total_bedrooms])
processed_features[:population] = linear_scale(examples_dataframe[:population])
processed_features[:households] = linear_scale(examples_dataframe[:households])
processed_features[:median_income] = linear_scale(examples_dataframe[:median_income])
processed_features[:rooms_per_person] = linear_scale(examples_dataframe[:rooms_per_person])
return processed_features
end
normalized_dataframe = normalize_linear_scale(preprocess_features(california_housing_dataframe))
normalized_training_examples = head(normalized_dataframe, 12000)
normalized_validation_examples = tail(normalized_dataframe, 5000)
p1, graddescent_training_rmse, graddescent_validation_rmse = train_nn_regression_model(
train.GradientDescentOptimizer(0.005),
2000,
50,
[10, 10],
1.0,
normalized_training_examples,
training_targets,
normalized_validation_examples,
validation_targets)
Out[15]:
In [16]:
describe(normalized_dataframe)
Out[16]:
In [17]:
plot(p1)
Out[17]:
In [42]:
p1, momentum_training_rmse, momentum_validation_rmse = train_nn_regression_model(
train.MomentumOptimizer(0.005, 0.05),
2000,
50,
[10, 10],
1.0,
normalized_training_examples,
training_targets,
normalized_validation_examples,
validation_targets)
Out[42]:
In [43]:
plot(p1)
Out[43]:
In [52]:
p1, adam_training_rmse, adam_validation_rmse = train_nn_regression_model(
train.AdamOptimizer(0.2),
2000,
50,
[10, 10],
1.0,
normalized_training_examples,
training_targets,
normalized_validation_examples,
validation_targets)
Out[52]:
In [53]:
plot(p1)
Out[53]:
In [54]:
p2=plot(graddescent_training_rmse, label="Gradient descent training", ylabel="RMSE", xlabel="Periods", title="Root Mean Squared Error vs. Periods")
p2=plot!(graddescent_validation_rmse, label="Gradient descent validation")
p2=plot!(adam_training_rmse, label="Adam training")
p2=plot!(adam_validation_rmse, label="Adam validation")
p2=plot!(momentum_training_rmse, label="Momentum training")
p2=plot!(momentum_validation_rmse, label="Momentum validation")
Out[54]:
In [22]:
# I'd like a better solution to automate this, but all ideas for eval
# on quoted expressions failed :-()
hist1=histogram(normalized_training_examples[:latitude], bins=20, title="latitude" )
hist2=histogram(normalized_training_examples[:longitude], bins=20, title="longitude" )
hist3=histogram(normalized_training_examples[:housing_median_age], bins=20, title="housing_median_age" )
hist4=histogram(normalized_training_examples[:total_rooms], bins=20, title="total_rooms" )
hist5=histogram(normalized_training_examples[:total_bedrooms], bins=20, title="total_bedrooms" )
hist6=histogram(normalized_training_examples[:population], bins=20, title="population" )
hist7=histogram(normalized_training_examples[:households], bins=20, title="households" )
hist8=histogram(normalized_training_examples[:median_income], bins=20, title="median_income" )
hist9=histogram(normalized_training_examples[:rooms_per_person], bins=20, title="rooms_per_person" )
plot(hist1, hist2, hist3, hist4, hist5, hist6, hist7, hist8, hist9, layout=9, legend=false)
Out[22]:
In [23]:
function log_normalize(series)
return log.(series.+1.0)
end
function clip(series, clip_to_min, clip_to_max)
return min.(max.(series, clip_to_min), clip_to_max)
end
function z_score_normalize(series)
mean_val = mean(series)
std_dv = std(series, mean=mean_val)
return (series .- mean) ./ std_dv
end
function binary_threshold(series, threshold)
return map(x->(x > treshold ? 1 : 0), series)
end
Out[23]:
In [24]:
hist10=histogram(log_normalize(california_housing_dataframe[:households]), title="households")
hist11=histogram(log_normalize(california_housing_dataframe[:total_rooms]), title="total_rooms")
hist12=histogram(log_normalize(training_examples[:rooms_per_person]), title="rooms_per_person")
plot(hist10, hist11, hist12, layout=3, legend=false)
Out[24]:
In [46]:
function normalize_df(examples_dataframe)
"""Returns a version of the input `DataFrame` that has all its features normalized."""
processed_features = DataFrame()
processed_features[:households] = log_normalize(examples_dataframe[:households])
processed_features[:median_income] = log_normalize(examples_dataframe[:median_income])
processed_features[:total_bedrooms] = log_normalize(examples_dataframe[:total_bedrooms])
processed_features[:latitude] = linear_scale(examples_dataframe[:latitude])
processed_features[:longitude] = linear_scale(examples_dataframe[:longitude])
processed_features[:housing_median_age] = linear_scale(examples_dataframe[:housing_median_age])
processed_features[:population] = linear_scale(clip(examples_dataframe[:population], 0, 5000))
processed_features[:rooms_per_person] = linear_scale(clip(examples_dataframe[:rooms_per_person], 0, 5))
processed_features[:total_rooms] = linear_scale(clip(examples_dataframe[:total_rooms], 0, 10000))
return processed_features
end
normalized_dataframe = normalize_df(preprocess_features(california_housing_dataframe))
normalized_training_examples = head(normalized_dataframe,12000)
normalized_validation_examples = tail(normalized_dataframe,5000)
p1, adam_training_rmse, adam_validation_rmse = train_nn_regression_model(
train.AdamOptimizer(0.15),
2000,
50,
[10, 10],
1.0,
normalized_training_examples,
training_targets,
normalized_validation_examples,
validation_targets)
Out[46]:
In [47]:
plot(p1)
Out[47]:
In [35]:
function location_location_location(examples_dataframe)
"""Returns a version of the input `DataFrame` that keeps only the latitude and longitude."""
processed_features = DataFrame()
processed_features[:latitude] = linear_scale(examples_dataframe[:latitude])
processed_features[:longitude] = linear_scale(examples_dataframe[:longitude])
return processed_features
end
lll_dataframe = location_location_location(preprocess_features(california_housing_dataframe))
lll_training_examples = head(lll_dataframe,12000)
lll_validation_examples = tail(lll_dataframe,5000)
p1, lll_training_rmse, lll_validation_rmse = train_nn_regression_model(
train.AdamOptimizer(0.15),
500,
100,
[10, 10, 5, 5],
1.0,
lll_training_examples,
training_targets,
lll_validation_examples,
validation_targets)
Out[35]:
In [36]:
plot(p1)
Out[36]:
In [ ]:
#EOF