Linear Regression Using Tensorflow¶

# Import Dependencies
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

# Load Data
# Load Dataset
boston = load_boston()

# Seperate Data into Features and Labels and load them as a Pandas Dataframe
# Features
features_df = pd.DataFrame(np.array(boston.data), columns=[boston.feature_names])

features_df.head()

# Labels
labels_df = pd.DataFrame(np.array(boston.target), columns=['labels'])
labels_df.head()

# Train Test Split
from sklearn.model_selection import train_test_split

# Train Test Split
# Training Data = 80% of Dataset
# Test Data = 20% of Dataset
X_train, X_test, y_train, y_test = train_test_split(features_df, labels_df, test_size=0.2, random_state=101)

# Normalize Data
from sklearn.preprocessing import StandardScaler

# Define the Preprocessing Method and Fit Training Data to it
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

# Make X_train to be the Scaled Version of Data
# This process scales all the values in all 6 columns and replaces them with the new values
X_train = pd.DataFrame(data=scaler.transform(X_train), columns=X_train.columns, index=X_train.index)

# Converting from Pandas Dataframe to Numpy Arrays
X_train = np.array(X_train)
y_train = np.array(y_train)

# Get the Type of Training Data
type(X_train), type(y_train)

(numpy.ndarray, numpy.ndarray)

# Apply same Normalization for Test Features
scal = StandardScaler()
scal.fit(X_test)

StandardScaler(copy=True, with_mean=True, with_std=True)

# Make X_test to be the Scaled Version of Data
# This process scales all the values in all columns and replaces them with the new values
X_test = pd.DataFrame(data=scal.transform(X_test), columns=X_test.columns, index=X_test.index)

# Convert test features and Labels to Numpy Arrays
X_test = np.array(X_test)
y_test = np.array(y_test)

# Get the Type of Test Data
type(X_test), type(y_test)

(numpy.ndarray, numpy.ndarray)

Till now all the code has been the same i.e. load the dataset, train test split, preprocessing etc. From here we start defining the Tensorflow code to train the model on this dataset and get some inference from it.

Define Tensorflow Model¶

# Define Training Parameters

# Learning Rate
lr = 0.1

# Number of epochs for which the model will run
epochs = 1000

# Define Features and Label Placeholders

# Features
X = tf.placeholder(tf.float32,[None,X_train.shape[1]])

# Labels 
y = tf.placeholder(tf.float32,[None,1])

So, how do we define the placeholders in Tensorflow ?? And how do we define the shape of the placeholder ??

Well, remember the shape of the training data X_train i.e. (404,13). Since we might send the inputs in batches instead of all the samples at once, the number of columns is set to None so that it can be replace by the batch size. For columns, X_train has 13 columns, so that remains the same.

For the label placeholder, remember the shape of the labels y_train i.e. (404,1). Since, we might be using features in batches, the labels should equal to the batch size. Hence, we leave the rows i.e. the output values to None and leave the column to 1.

# Define Hyperparameters

# Weight
W = tf.Variable(tf.ones([X_train.shape[1], 1]))

# Bias
b = tf.Variable(tf.ones(X_train.shape[1]))

So, how do we define the hyperparameters i.e. the parameters for which we train the model to finetune them. So, we initialize the weights as an array of ones with a shape of "13". You may ask why 13 ??

Well, say if we send one row of values as input at a time. So, how many values do we get at the input ? It's 13 i.e. one value from each feature. So, number of weight values required for each input is equal to "13".

Similarly, the number of bias values required will be "13".

# Initiaize all Variables
init = tf.global_variables_initializer()

# Define Cost Function, Optimizer and the Output Predicitons Function

# Predictions
# y_hat = (W*X + b)
y_hat = tf.add(tf.matmul(X, W), b)

# Cost Function
# MSE
cost = tf.reduce_mean(tf.square(y - y_hat))

# Gradient Descent Optimizer to Minimize the Cost
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(cost)

# Tensor to store the cost after every Epoch
# Comes in handy while plotting the cost vs epochs
cost_history = np.empty(shape=[1],dtype=float)

Train the Model¶

with tf.Session() as sess:
    # Initialize all Variables
    sess.run(init)
    
    for epoch in range(0,epochs):
        # Run the optimizer and the cost functions
        result, err = sess.run([optimizer, cost], feed_dict={X: X_train, y: y_train})
        
        # Add the calculated cost to the array
        cost_history = np.append(cost_history,err)
        
        # Print the Loss/Error after every 100 epochs
        if epoch%100 == 0:
            print('Epoch: {0}, Error: {1}'.format(epoch, err))
    
    print('Epoch: {0}, Error: {1}'.format(epoch+1, err))
    
    # Values of Weight & Bias after Training
    new_W = sess.run(W)
    new_b = sess.run(b)
    
    # Predicted Labels
    y_pred = sess.run(y_hat, feed_dict={X: X_test})
    
    # Mean Squared Error
    mse = sess.run(tf.reduce_mean(tf.square(y_pred - y_test)))

Epoch: 0, Error: 585.472412109375
Epoch: 100, Error: 39.73207473754883
Epoch: 200, Error: 20.127155303955078
Epoch: 300, Error: 19.24439239501953
Epoch: 400, Error: 19.204633712768555
Epoch: 500, Error: 19.202842712402344
Epoch: 600, Error: 19.202762603759766
Epoch: 700, Error: 19.202756881713867
Epoch: 800, Error: 19.202756881713867
Epoch: 900, Error: 19.202756881713867
Epoch: 1000, Error: 19.202756881713867

# New Value of Weights 
print('Trained Weights: \n', new_W)

Trained Weights: 
 [[-0.78032321]
 [ 0.95225394]
 [ 0.11739694]
 [ 0.76530421]
 [-1.7784487 ]
 [ 2.69962621]
 [-0.02095268]
 [-3.0123353 ]
 [ 2.31343269]
 [-1.76823843]
 [-1.9098078 ]
 [ 0.7654348 ]
 [-3.76173115]]

# New Value of Biases
print('Trained Bias: \n', new_b)

Trained Bias: 
 [ 22.3368206  22.3368206  22.3368206  22.3368206  22.3368206  22.3368206
  22.3368206  22.3368206  22.3368206  22.3368206  22.3368206  22.3368206
  22.3368206]

Make Predictions on Test Data¶

# Predicted Values
print('Predicted Values: \n',y_pred)

Predicted Values: 
 [[ 37.8146019   37.8146019   37.8146019  ...,  37.8146019   37.8146019
   37.8146019 ]
 [ 26.75547409  26.75547409  26.75547409 ...,  26.75547409  26.75547409
   26.75547409]
 [ 16.88926888  16.88926888  16.88926888 ...,  16.88926888  16.88926888
   16.88926888]
 ..., 
 [ 20.34828568  20.34828568  20.34828568 ...,  20.34828568  20.34828568
   20.34828568]
 [ 31.68542671  31.68542671  31.68542671 ...,  31.68542671  31.68542671
   31.68542671]
 [ 21.35341454  21.35341454  21.35341454 ...,  21.35341454  21.35341454
   21.35341454]]

# Mean Squared Error
print('Mean Squared Error [TF Session]: ',mse)

Mean Squared Error [TF Session]:  34.8672284386

Cost vs Epochs Plot¶

plt.plot(range(len(cost_history)),cost_history)
plt.axis([0,epochs,0,np.max(cost_history)])
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.title('Cost vs Epochs', fontsize=25)
plt.show()

	CRIM	ZN	INDUS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1.0	296.0	15.3	396.90	4.98
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2.0	242.0	17.8	396.90	9.14
2	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2.0	242.0	17.8	392.83	4.03
3	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3.0	222.0	18.7	394.63	2.94
4	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3.0	222.0	18.7	396.90	5.33

	labels
0	24.0
1	21.6
2	34.7
3	33.4
4	36.2