# Import Dependencies
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
# Load Data
# Load Dataset
boston = load_boston()
# Seperate Data into Features and Labels and load them as a Pandas Dataframe
# Features
features_df = pd.DataFrame(np.array(boston.data), columns=[boston.feature_names])
features_df.head()
# Labels
labels_df = pd.DataFrame(np.array(boston.target), columns=['labels'])
labels_df.head()
# Combined Data
combined_data = pd.concat([features_df,labels_df], axis=1)
combined_data.head()
# Train Test Split
from sklearn.model_selection import train_test_split
# Train Test Split
# Training Data = 80% of Dataset
# Test Data = 20% of Dataset
X_train, X_test, y_train, y_test = train_test_split(features_df, labels_df, test_size=0.2, random_state=101)
# Normalize Data
from sklearn.preprocessing import StandardScaler
# Define the Preprocessing Method and Fit Training Data to it
scaler = StandardScaler()
scaler.fit(X_train)
# Make X_train to be the Scaled Version of Data
# This process scales all the values in all 6 columns and replaces them with the new values
X_train = pd.DataFrame(data=scaler.transform(X_train), columns=X_train.columns, index=X_train.index)
# Converting from Pandas Dataframe to Numpy Arrays
X_train = np.array(X_train)
y_train = np.array(y_train)
# Get the Type of Training Data
type(X_train), type(y_train)
# Apply same Normalization for Test Features
scal = StandardScaler()
scal.fit(X_test)
# Make X_test to be the Scaled Version of Data
# This process scales all the values in all columns and replaces them with the new values
X_test = pd.DataFrame(data=scal.transform(X_test), columns=X_test.columns, index=X_test.index)
# Convert test features and Labels to Numpy Arrays
X_test = np.array(X_test)
y_test = np.array(y_test)
# Get the Type of Test Data
type(X_test), type(y_test)
Till now all the code has been the same i.e. load the dataset, train test split, preprocessing etc. From here we start defining the Tensorflow code to train the model on this dataset and get some inference from it.
# Define Feature Columns for the Linear Regressor
features_df.columns
# Make Feature Columns
feat_cols = [tf.feature_column.numeric_column('x', shape=np.array(X_train).shape[1:])]
# Make Input Function
input_func = tf.estimator.inputs.numpy_input_fn({'x':X_train}, y_train, batch_size=1, num_epochs=2000, shuffle=True)
# Define Linear Regressor Model
# Supported Optimizers: ('Adagrad', 'Adam', 'Ftrl', 'RMSProp', 'SGD')
linear_model = tf.estimator.LinearRegressor(feature_columns=feat_cols, optimizer='Adam')
# Set up Estimator Training Inputs
train_input_func = tf.estimator.inputs.numpy_input_fn(X_train, y_train, batch_size=1, num_epochs=1000, shuffle=False)
# Set up Estimator Test Inputs
eval_input_func = tf.estimator.inputs.numpy_input_fn({'x': X_test}, y_test, batch_size=1, num_epochs=1, shuffle=False)
# Train the Linear Regressor Estimator
linear_model.train(input_fn=input_func, steps=2000)
# Test the Model
test_metrics = linear_model.evaluate(input_fn=eval_input_func, steps=100)
# Predicted Values
list(linear_model.predict(input_fn=eval_input_func))
# Predictions
predictions = linear_model.predict(input_fn=eval_input_func)
pred = list(predictions)
# Get Predicted Values as an Array
predicted_vals = []
for pred in linear_model.predict(input_fn=eval_input_func):
predicted_vals.append(pred['predictions'])
print(predicted_vals)