# Import Dependencies
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
# Load Data
# Load Dataset
boston = load_boston()
# Seperate Data into Features and Labels and load them as a Pandas Dataframe
# Features
features_df = pd.DataFrame(np.array(boston.data), columns=[boston.feature_names])
features_df.head()
# Labels
labels_df = pd.DataFrame(np.array(boston.target), columns=['labels'])
labels_df.head()
# Combined Data
combined_data = pd.concat([features_df,labels_df], axis=1)
combined_data.head()
# Train Test Split
from sklearn.model_selection import train_test_split
# Train Test Split
# Training Data = 80% of Dataset
# Test Data = 20% of Dataset
X_train, X_test, y_train, y_test = train_test_split(features_df, labels_df, test_size=0.2, random_state=101)
# Normalize Data
from sklearn.preprocessing import StandardScaler
# Define the Preprocessing Method and Fit Training Data to it
scaler = StandardScaler()
scaler.fit(X_train)
# Make X_train to be the Scaled Version of Data
# This process scales all the values in all 6 columns and replaces them with the new values
X_train = pd.DataFrame(data=scaler.transform(X_train), columns=X_train.columns, index=X_train.index)
# Converting from Pandas Dataframe to Numpy Arrays
X_train = np.array(X_train)
y_train = np.array(y_train)
# Get the Type of Training Data
type(X_train), type(y_train)
# Apply same Normalization for Test Features
scal = StandardScaler()
scal.fit(X_test)
# Make X_test to be the Scaled Version of Data
# This process scales all the values in all columns and replaces them with the new values
X_test = pd.DataFrame(data=scal.transform(X_test), columns=X_test.columns, index=X_test.index)
# Convert test features and Labels to Numpy Arrays
X_test = np.array(X_test)
y_test = np.array(y_test)
# Get the Type of Test Data
type(X_test), type(y_test)
Till now all the code has been the same i.e. load the dataset, train test split, preprocessing etc. From here we start defining the Tensorflow code to train the model on this dataset and get some inference from it.
# Define Feature Columns for the Linear Regressor
features_df.columns
# Make Feature Columns
feat_cols = [tf.feature_column.numeric_column('x', shape=np.array(X_train).shape[1:])]
# Make Input Function
input_func = tf.estimator.inputs.numpy_input_fn({'x':X_train}, y_train, batch_size=1, num_epochs=2000, shuffle=True)
# Set up Estimator Training Inputs
train_input_func = tf.estimator.inputs.numpy_input_fn(X_train, y_train, batch_size=1, num_epochs=1000, shuffle=False)
# Set up Estimator Test Inputs
eval_input_func = tf.estimator.inputs.numpy_input_fn({'x': X_test}, y_test, batch_size=1, num_epochs=1, shuffle=False)
# Define DNN Regressor Model
dnn_model = tf.estimator.DNNRegressor(hidden_units=[10,10,10],feature_columns=feat_cols, optimizer='Adam')
# Train the DNN Regressor Estimator
dnn_model.train(input_fn=input_func, steps=2000)
# Evaluate the Model
dnn_model.evaluate(input_fn=eval_input_func)
# Predictions
predictions = dnn_model.predict(input_fn=eval_input_func)
pred = list(predictions)
# Get Predicted Values
predicted_vals = []
for pred in dnn_model.predict(input_fn=eval_input_func):
predicted_vals.append(pred['predictions'])
print(predicted_vals)
# Import Mean Squared Error from Scikit Learn
from sklearn.metrics import mean_squared_error
# Calculate the Mean Squared Error
mse = mean_squared_error(predicted_vals, y_test)
print('Mean Squared Error [DNNRegrssor]: ',mse)