Linear Regression Using TF DNN Regressor Estimator API

In [1]:
# Import Dependencies
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
# Load Data
# Load Dataset
boston = load_boston()
In [3]:
# Seperate Data into Features and Labels and load them as a Pandas Dataframe
# Features
features_df = pd.DataFrame(np.array(boston.data), columns=[boston.feature_names])

features_df.head()
Out[3]:
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 396.90 4.98
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 396.90 9.14
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 392.83 4.03
3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 394.63 2.94
4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 396.90 5.33
In [4]:
# Labels
labels_df = pd.DataFrame(np.array(boston.target), columns=['labels'])
labels_df.head()
Out[4]:
labels
0 24.0
1 21.6
2 34.7
3 33.4
4 36.2
In [5]:
# Combined Data
combined_data = pd.concat([features_df,labels_df], axis=1)
combined_data.head()
Out[5]:
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT labels
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 396.90 4.98 24.0
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 396.90 9.14 21.6
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 392.83 4.03 34.7
3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 394.63 2.94 33.4
4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 396.90 5.33 36.2
In [6]:
# Train Test Split
from sklearn.model_selection import train_test_split
In [7]:
# Train Test Split
# Training Data = 80% of Dataset
# Test Data = 20% of Dataset
X_train, X_test, y_train, y_test = train_test_split(features_df, labels_df, test_size=0.2, random_state=101)
In [8]:
# Normalize Data
from sklearn.preprocessing import StandardScaler
In [9]:
# Define the Preprocessing Method and Fit Training Data to it
scaler = StandardScaler()
scaler.fit(X_train)
Out[9]:
StandardScaler(copy=True, with_mean=True, with_std=True)
In [10]:
# Make X_train to be the Scaled Version of Data
# This process scales all the values in all 6 columns and replaces them with the new values
X_train = pd.DataFrame(data=scaler.transform(X_train), columns=X_train.columns, index=X_train.index)
In [11]:
# Converting from Pandas Dataframe to Numpy Arrays
X_train = np.array(X_train)
y_train = np.array(y_train)
In [12]:
# Get the Type of Training Data
type(X_train), type(y_train)
Out[12]:
(numpy.ndarray, numpy.ndarray)
In [13]:
# Apply same Normalization for Test Features
scal = StandardScaler()
scal.fit(X_test)
Out[13]:
StandardScaler(copy=True, with_mean=True, with_std=True)
In [14]:
# Make X_test to be the Scaled Version of Data
# This process scales all the values in all columns and replaces them with the new values
X_test = pd.DataFrame(data=scal.transform(X_test), columns=X_test.columns, index=X_test.index)
In [15]:
# Convert test features and Labels to Numpy Arrays
X_test = np.array(X_test)
y_test = np.array(y_test)
In [16]:
# Get the Type of Test Data
type(X_test), type(y_test)
Out[16]:
(numpy.ndarray, numpy.ndarray)

Till now all the code has been the same i.e. load the dataset, train test split, preprocessing etc. From here we start defining the Tensorflow code to train the model on this dataset and get some inference from it.

Define TF DNN Regressor Model

In [17]:
# Define Feature Columns for the Linear Regressor
features_df.columns
Out[17]:
Index(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
       'PTRATIO', 'B', 'LSTAT'],
      dtype='object')
In [18]:
# Make Feature Columns
feat_cols = [tf.feature_column.numeric_column('x', shape=np.array(X_train).shape[1:])]
In [19]:
# Make Input Function
input_func = tf.estimator.inputs.numpy_input_fn({'x':X_train}, y_train, batch_size=1, num_epochs=2000, shuffle=True)
In [20]:
# Set up Estimator Training Inputs
train_input_func = tf.estimator.inputs.numpy_input_fn(X_train, y_train, batch_size=1, num_epochs=1000, shuffle=False)
In [21]:
# Set up Estimator Test Inputs
eval_input_func = tf.estimator.inputs.numpy_input_fn({'x': X_test}, y_test, batch_size=1, num_epochs=1, shuffle=False)
In [22]:
# Define DNN Regressor Model
dnn_model = tf.estimator.DNNRegressor(hidden_units=[10,10,10],feature_columns=feat_cols, optimizer='Adam')
INFO:tensorflow:Using default config.
WARNING:tensorflow:Using temporary folder as model directory: C:\Users\AD1026~1\AppData\Local\Temp\tmpz032gvsh
INFO:tensorflow:Using config: {'_task_id': 0, '_log_step_count_steps': 100, '_save_summary_steps': 100, '_tf_random_seed': None, '_session_config': None, '_num_worker_replicas': 1, '_is_chief': True, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_save_checkpoints_steps': None, '_num_ps_replicas': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001B276870BE0>, '_master': '', '_task_type': 'worker', '_model_dir': 'C:\\Users\\AD1026~1\\AppData\\Local\\Temp\\tmpz032gvsh', '_service': None, '_save_checkpoints_secs': 600}
In [23]:
# Train the DNN Regressor Estimator
dnn_model.train(input_fn=input_func, steps=2000)
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\AD1026~1\AppData\Local\Temp\tmpz032gvsh\model.ckpt.
INFO:tensorflow:step = 1, loss = 482.984
INFO:tensorflow:global_step/sec: 268.81
INFO:tensorflow:step = 101, loss = 0.0403325 (0.374 sec)
INFO:tensorflow:global_step/sec: 440.189
INFO:tensorflow:step = 201, loss = 0.0239809 (0.226 sec)
INFO:tensorflow:global_step/sec: 460.001
INFO:tensorflow:step = 301, loss = 0.655713 (0.217 sec)
INFO:tensorflow:global_step/sec: 410.858
INFO:tensorflow:step = 401, loss = 36.0159 (0.243 sec)
INFO:tensorflow:global_step/sec: 401.206
INFO:tensorflow:step = 501, loss = 28.3889 (0.249 sec)
INFO:tensorflow:global_step/sec: 396.394
INFO:tensorflow:step = 601, loss = 1.08228 (0.253 sec)
INFO:tensorflow:global_step/sec: 469.052
INFO:tensorflow:step = 701, loss = 0.00453171 (0.212 sec)
INFO:tensorflow:global_step/sec: 398.002
INFO:tensorflow:step = 801, loss = 22.3368 (0.253 sec)
INFO:tensorflow:global_step/sec: 403.109
INFO:tensorflow:step = 901, loss = 48.1616 (0.246 sec)
INFO:tensorflow:global_step/sec: 391.776
INFO:tensorflow:step = 1001, loss = 3.20024 (0.256 sec)
INFO:tensorflow:global_step/sec: 335.569
INFO:tensorflow:step = 1101, loss = 33.4987 (0.301 sec)
INFO:tensorflow:global_step/sec: 350.146
INFO:tensorflow:step = 1201, loss = 3.65411 (0.282 sec)
INFO:tensorflow:global_step/sec: 420.297
INFO:tensorflow:step = 1301, loss = 108.386 (0.239 sec)
INFO:tensorflow:global_step/sec: 406.924
INFO:tensorflow:step = 1401, loss = 28.4301 (0.246 sec)
INFO:tensorflow:global_step/sec: 407.143
INFO:tensorflow:step = 1501, loss = 0.148344 (0.245 sec)
INFO:tensorflow:global_step/sec: 404.069
INFO:tensorflow:step = 1601, loss = 11.0032 (0.248 sec)
INFO:tensorflow:global_step/sec: 398.313
INFO:tensorflow:step = 1701, loss = 7.52383 (0.253 sec)
INFO:tensorflow:global_step/sec: 469.719
INFO:tensorflow:step = 1801, loss = 33.8441 (0.212 sec)
INFO:tensorflow:global_step/sec: 472.295
INFO:tensorflow:step = 1901, loss = 28.3691 (0.211 sec)
INFO:tensorflow:Saving checkpoints for 2000 into C:\Users\AD1026~1\AppData\Local\Temp\tmpz032gvsh\model.ckpt.
INFO:tensorflow:Loss for final step: 0.471493.
Out[23]:
<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x1b276870860>
In [24]:
# Evaluate the Model
dnn_model.evaluate(input_fn=eval_input_func)
INFO:tensorflow:Starting evaluation at 2018-01-30-19:04:32
INFO:tensorflow:Restoring parameters from C:\Users\AD1026~1\AppData\Local\Temp\tmpz032gvsh\model.ckpt-2000
INFO:tensorflow:Finished evaluation at 2018-01-30-19:04:32
INFO:tensorflow:Saving dict for global step 2000: average_loss = 34.9208, global_step = 2000, loss = 34.9208
Out[24]:
{'average_loss': 34.920826, 'global_step': 2000, 'loss': 34.920826}
In [25]:
# Predictions
predictions = dnn_model.predict(input_fn=eval_input_func)
In [26]:
pred = list(predictions)
INFO:tensorflow:Restoring parameters from C:\Users\AD1026~1\AppData\Local\Temp\tmpz032gvsh\model.ckpt-2000
In [27]:
# Get Predicted Values
predicted_vals = []

for pred in dnn_model.predict(input_fn=eval_input_func):
    predicted_vals.append(pred['predictions'])
INFO:tensorflow:Restoring parameters from C:\Users\AD1026~1\AppData\Local\Temp\tmpz032gvsh\model.ckpt-2000
In [28]:
print(predicted_vals)
[array([ 35.00705719], dtype=float32), array([ 27.04526901], dtype=float32), array([ 13.91772175], dtype=float32), array([ 13.91772175], dtype=float32), array([ 25.65153503], dtype=float32), array([ 30.04853058], dtype=float32), array([ 46.24923706], dtype=float32), array([ 13.91772175], dtype=float32), array([ 34.85010529], dtype=float32), array([ 13.91772175], dtype=float32), array([ 24.87260818], dtype=float32), array([ 14.4454813], dtype=float32), array([ 19.76637268], dtype=float32), array([ 19.17050743], dtype=float32), array([ 22.16109085], dtype=float32), array([ 16.73081207], dtype=float32), array([ 13.91772175], dtype=float32), array([ 26.49022675], dtype=float32), array([ 25.59998703], dtype=float32), array([ 21.80430984], dtype=float32), array([ 14.05508327], dtype=float32), array([ 20.02637863], dtype=float32), array([ 21.47007751], dtype=float32), array([ 23.42528915], dtype=float32), array([ 27.26071167], dtype=float32), array([ 19.58972931], dtype=float32), array([ 30.69213486], dtype=float32), array([ 17.57925415], dtype=float32), array([ 25.92541122], dtype=float32), array([ 28.19125366], dtype=float32), array([ 19.28020096], dtype=float32), array([ 20.02760506], dtype=float32), array([ 29.88674545], dtype=float32), array([ 39.5360527], dtype=float32), array([ 32.2663269], dtype=float32), array([ 21.23168564], dtype=float32), array([ 13.91772175], dtype=float32), array([ 19.21705627], dtype=float32), array([ 13.91772175], dtype=float32), array([ 29.50681686], dtype=float32), array([ 20.41342163], dtype=float32), array([ 17.99222755], dtype=float32), array([ 37.71670532], dtype=float32), array([ 15.36322689], dtype=float32), array([ 19.54813766], dtype=float32), array([ 23.34739304], dtype=float32), array([ 30.88521957], dtype=float32), array([ 15.91166401], dtype=float32), array([ 22.37442398], dtype=float32), array([ 23.75032043], dtype=float32), array([ 33.80841446], dtype=float32), array([ 46.54713821], dtype=float32), array([ 19.86597824], dtype=float32), array([ 13.91772175], dtype=float32), array([ 30.2628212], dtype=float32), array([ 13.91772175], dtype=float32), array([ 20.18569756], dtype=float32), array([ 19.0000267], dtype=float32), array([ 22.21580887], dtype=float32), array([ 18.44823456], dtype=float32), array([ 26.15736008], dtype=float32), array([ 13.91772175], dtype=float32), array([ 17.63124847], dtype=float32), array([ 20.56375122], dtype=float32), array([ 13.91772175], dtype=float32), array([ 22.10385132], dtype=float32), array([ 21.35369492], dtype=float32), array([ 19.78640366], dtype=float32), array([ 13.91772175], dtype=float32), array([ 20.05997276], dtype=float32), array([ 20.80443192], dtype=float32), array([ 22.06957626], dtype=float32), array([ 23.048172], dtype=float32), array([ 19.23746109], dtype=float32), array([ 23.95395279], dtype=float32), array([ 14.89324093], dtype=float32), array([ 40.72409821], dtype=float32), array([ 13.91772175], dtype=float32), array([ 28.33085632], dtype=float32), array([ 13.91772175], dtype=float32), array([ 18.62916565], dtype=float32), array([ 20.52998734], dtype=float32), array([ 30.69400787], dtype=float32), array([ 14.79286098], dtype=float32), array([ 13.91772175], dtype=float32), array([ 20.0707283], dtype=float32), array([ 17.31237793], dtype=float32), array([ 25.36876297], dtype=float32), array([ 21.90352249], dtype=float32), array([ 14.10845661], dtype=float32), array([ 13.91772175], dtype=float32), array([ 13.91772175], dtype=float32), array([ 25.3900795], dtype=float32), array([ 37.3469429], dtype=float32), array([ 13.91772175], dtype=float32), array([ 40.28845978], dtype=float32), array([ 13.91772175], dtype=float32), array([ 32.60623932], dtype=float32), array([ 13.91772175], dtype=float32), array([ 19.47673035], dtype=float32), array([ 34.5189476], dtype=float32), array([ 20.11509323], dtype=float32)]
In [29]:
# Import Mean Squared Error from Scikit Learn
from sklearn.metrics import mean_squared_error

# Calculate the Mean Squared Error
mse = mean_squared_error(predicted_vals, y_test)
print('Mean Squared Error [DNNRegrssor]: ',mse)
Mean Squared Error [DNNRegrssor]:  34.9208151998