Data Preprocessing and Train Test Split

In [1]:
# Import Dependencies
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
In [2]:
# Load Data
# Load Dataset
boston = load_boston()

# Print out the Dataset
print(boston)
{'data': array([[  6.32000000e-03,   1.80000000e+01,   2.31000000e+00, ...,
          1.53000000e+01,   3.96900000e+02,   4.98000000e+00],
       [  2.73100000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          1.78000000e+01,   3.96900000e+02,   9.14000000e+00],
       [  2.72900000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          1.78000000e+01,   3.92830000e+02,   4.03000000e+00],
       ..., 
       [  6.07600000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          2.10000000e+01,   3.96900000e+02,   5.64000000e+00],
       [  1.09590000e-01,   0.00000000e+00,   1.19300000e+01, ...,
          2.10000000e+01,   3.93450000e+02,   6.48000000e+00],
       [  4.74100000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          2.10000000e+01,   3.96900000e+02,   7.88000000e+00]]), 'DESCR': "Boston House Prices dataset\n===========================\n\nNotes\n------\nData Set Characteristics:  \n\n    :Number of Instances: 506 \n\n    :Number of Attributes: 13 numeric/categorical predictive\n    \n    :Median Value (attribute 14) is usually the target\n\n    :Attribute Information (in order):\n        - CRIM     per capita crime rate by town\n        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.\n        - INDUS    proportion of non-retail business acres per town\n        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)\n        - NOX      nitric oxides concentration (parts per 10 million)\n        - RM       average number of rooms per dwelling\n        - AGE      proportion of owner-occupied units built prior to 1940\n        - DIS      weighted distances to five Boston employment centres\n        - RAD      index of accessibility to radial highways\n        - TAX      full-value property-tax rate per $10,000\n        - PTRATIO  pupil-teacher ratio by town\n        - B        1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town\n        - LSTAT    % lower status of the population\n        - MEDV     Median value of owner-occupied homes in $1000's\n\n    :Missing Attribute Values: None\n\n    :Creator: Harrison, D. and Rubinfeld, D.L.\n\nThis is a copy of UCI ML housing dataset.\nhttp://archive.ics.uci.edu/ml/datasets/Housing\n\n\nThis dataset was taken from the StatLib library which is maintained at Carnegie Mellon University.\n\nThe Boston house-price data of Harrison, D. and Rubinfeld, D.L. 'Hedonic\nprices and the demand for clean air', J. Environ. Economics & Management,\nvol.5, 81-102, 1978.   Used in Belsley, Kuh & Welsch, 'Regression diagnostics\n...', Wiley, 1980.   N.B. Various transformations are used in the table on\npages 244-261 of the latter.\n\nThe Boston house-price data has been used in many machine learning papers that address regression\nproblems.   \n     \n**References**\n\n   - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.\n   - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.\n   - many more! (see http://archive.ics.uci.edu/ml/datasets/Housing)\n", 'target': array([ 24. ,  21.6,  34.7,  33.4,  36.2,  28.7,  22.9,  27.1,  16.5,
        18.9,  15. ,  18.9,  21.7,  20.4,  18.2,  19.9,  23.1,  17.5,
        20.2,  18.2,  13.6,  19.6,  15.2,  14.5,  15.6,  13.9,  16.6,
        14.8,  18.4,  21. ,  12.7,  14.5,  13.2,  13.1,  13.5,  18.9,
        20. ,  21. ,  24.7,  30.8,  34.9,  26.6,  25.3,  24.7,  21.2,
        19.3,  20. ,  16.6,  14.4,  19.4,  19.7,  20.5,  25. ,  23.4,
        18.9,  35.4,  24.7,  31.6,  23.3,  19.6,  18.7,  16. ,  22.2,
        25. ,  33. ,  23.5,  19.4,  22. ,  17.4,  20.9,  24.2,  21.7,
        22.8,  23.4,  24.1,  21.4,  20. ,  20.8,  21.2,  20.3,  28. ,
        23.9,  24.8,  22.9,  23.9,  26.6,  22.5,  22.2,  23.6,  28.7,
        22.6,  22. ,  22.9,  25. ,  20.6,  28.4,  21.4,  38.7,  43.8,
        33.2,  27.5,  26.5,  18.6,  19.3,  20.1,  19.5,  19.5,  20.4,
        19.8,  19.4,  21.7,  22.8,  18.8,  18.7,  18.5,  18.3,  21.2,
        19.2,  20.4,  19.3,  22. ,  20.3,  20.5,  17.3,  18.8,  21.4,
        15.7,  16.2,  18. ,  14.3,  19.2,  19.6,  23. ,  18.4,  15.6,
        18.1,  17.4,  17.1,  13.3,  17.8,  14. ,  14.4,  13.4,  15.6,
        11.8,  13.8,  15.6,  14.6,  17.8,  15.4,  21.5,  19.6,  15.3,
        19.4,  17. ,  15.6,  13.1,  41.3,  24.3,  23.3,  27. ,  50. ,
        50. ,  50. ,  22.7,  25. ,  50. ,  23.8,  23.8,  22.3,  17.4,
        19.1,  23.1,  23.6,  22.6,  29.4,  23.2,  24.6,  29.9,  37.2,
        39.8,  36.2,  37.9,  32.5,  26.4,  29.6,  50. ,  32. ,  29.8,
        34.9,  37. ,  30.5,  36.4,  31.1,  29.1,  50. ,  33.3,  30.3,
        34.6,  34.9,  32.9,  24.1,  42.3,  48.5,  50. ,  22.6,  24.4,
        22.5,  24.4,  20. ,  21.7,  19.3,  22.4,  28.1,  23.7,  25. ,
        23.3,  28.7,  21.5,  23. ,  26.7,  21.7,  27.5,  30.1,  44.8,
        50. ,  37.6,  31.6,  46.7,  31.5,  24.3,  31.7,  41.7,  48.3,
        29. ,  24. ,  25.1,  31.5,  23.7,  23.3,  22. ,  20.1,  22.2,
        23.7,  17.6,  18.5,  24.3,  20.5,  24.5,  26.2,  24.4,  24.8,
        29.6,  42.8,  21.9,  20.9,  44. ,  50. ,  36. ,  30.1,  33.8,
        43.1,  48.8,  31. ,  36.5,  22.8,  30.7,  50. ,  43.5,  20.7,
        21.1,  25.2,  24.4,  35.2,  32.4,  32. ,  33.2,  33.1,  29.1,
        35.1,  45.4,  35.4,  46. ,  50. ,  32.2,  22. ,  20.1,  23.2,
        22.3,  24.8,  28.5,  37.3,  27.9,  23.9,  21.7,  28.6,  27.1,
        20.3,  22.5,  29. ,  24.8,  22. ,  26.4,  33.1,  36.1,  28.4,
        33.4,  28.2,  22.8,  20.3,  16.1,  22.1,  19.4,  21.6,  23.8,
        16.2,  17.8,  19.8,  23.1,  21. ,  23.8,  23.1,  20.4,  18.5,
        25. ,  24.6,  23. ,  22.2,  19.3,  22.6,  19.8,  17.1,  19.4,
        22.2,  20.7,  21.1,  19.5,  18.5,  20.6,  19. ,  18.7,  32.7,
        16.5,  23.9,  31.2,  17.5,  17.2,  23.1,  24.5,  26.6,  22.9,
        24.1,  18.6,  30.1,  18.2,  20.6,  17.8,  21.7,  22.7,  22.6,
        25. ,  19.9,  20.8,  16.8,  21.9,  27.5,  21.9,  23.1,  50. ,
        50. ,  50. ,  50. ,  50. ,  13.8,  13.8,  15. ,  13.9,  13.3,
        13.1,  10.2,  10.4,  10.9,  11.3,  12.3,   8.8,   7.2,  10.5,
         7.4,  10.2,  11.5,  15.1,  23.2,   9.7,  13.8,  12.7,  13.1,
        12.5,   8.5,   5. ,   6.3,   5.6,   7.2,  12.1,   8.3,   8.5,
         5. ,  11.9,  27.9,  17.2,  27.5,  15. ,  17.2,  17.9,  16.3,
         7. ,   7.2,   7.5,  10.4,   8.8,   8.4,  16.7,  14.2,  20.8,
        13.4,  11.7,   8.3,  10.2,  10.9,  11. ,   9.5,  14.5,  14.1,
        16.1,  14.3,  11.7,  13.4,   9.6,   8.7,   8.4,  12.8,  10.5,
        17.1,  18.4,  15.4,  10.8,  11.8,  14.9,  12.6,  14.1,  13. ,
        13.4,  15.2,  16.1,  17.8,  14.9,  14.1,  12.7,  13.5,  14.9,
        20. ,  16.4,  17.7,  19.5,  20.2,  21.4,  19.9,  19. ,  19.1,
        19.1,  20.1,  19.9,  19.6,  23.2,  29.8,  13.8,  13.3,  16.7,
        12. ,  14.6,  21.4,  23. ,  23.7,  25. ,  21.8,  20.6,  21.2,
        19.1,  20.6,  15.2,   7. ,   8.1,  13.6,  20.1,  21.8,  24.5,
        23.1,  19.7,  18.3,  21.2,  17.5,  16.8,  22.4,  20.6,  23.9,
        22. ,  11.9]), 'feature_names': array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'],
      dtype='<U7')}

Create Features and Labels Dataframes

In [3]:
# Seperate Data into Features and Labels and load them as a Pandas Dataframe
# Features
features_df = pd.DataFrame(np.array(boston.data), columns=[boston.feature_names])

features_df.head()
Out[3]:
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 396.90 4.98
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 396.90 9.14
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 392.83 4.03
3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 394.63 2.94
4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 396.90 5.33
In [4]:
# Labels
labels_df = pd.DataFrame(np.array(boston.target), columns=['labels'])
labels_df.head()
Out[4]:
labels
0 24.0
1 21.6
2 34.7
3 33.4
4 36.2

For this tutorial, we'll be doing the train test split first as we need to normalize only the train and test features and not the labels.

Train Test Split

In [6]:
# Train Test Split
from sklearn.model_selection import train_test_split

We'll be splitting the data into train and test set. The test set will comprise of only 20% of the dataset and the rest will be the training data.

In [7]:
# Train Test Split
# Training Data = 80% of Dataset
# Test Data = 20% of Dataset
X_train, X_test, y_train, y_test = train_test_split(features_df, labels_df, test_size=0.2, random_state=101)

Now, let's have a look at the shape and the type of the split dataset. This tells us that the split has been nicely done and the nu,ber of labels are equal to the number of features in the dataset. An unbalanced feature label combination can lead to errors in the future. So, check it now.

In [8]:
print(X_train.shape)
(404, 13)
In [9]:
print(type(X_train))
<class 'pandas.core.frame.DataFrame'>
In [10]:
print(y_train.shape)
(404, 1)
In [11]:
print(type(y_train))
<class 'pandas.core.frame.DataFrame'>
In [12]:
print(X_test.shape)
(102, 13)
In [13]:
print(type(X_test))
<class 'pandas.core.frame.DataFrame'>
In [14]:
print(y_test.shape)
(102, 1)
In [15]:
print(type(y_test))
<class 'pandas.core.frame.DataFrame'>

So, as we can see from above lines, the train features have 404 rows which is the same as those in the training labels. Similarly, the number of rows in the test features is equal to those in the test labels. Also, note that all the test and train data are of the same type i.e. they all are a Pandas Dataframe as expected.

Data Preprocessing

So, now that we have our separated training and test features, we can apply the normalization onto our data.

In [16]:
# Normalize Data
from sklearn.preprocessing import StandardScaler

The StandardScalar algorithm standardizes the features by removing the mean and scaling to unit variance. In this, the centering and scaling happens independently on each feature by computing the relevant statistics on the samples in the training set. Mean and standard deviation are then stored to be used on later data using the transform method.

In [17]:
# Define the Preprocessing Method and Fit Training Data to it
scaler = StandardScaler()
scaler.fit(X_train)
Out[17]:
StandardScaler(copy=True, with_mean=True, with_std=True)
In [18]:
# Make X_train to be the Scaled Version of Data
# This process scales all the values in all 6 columns and replaces them with the new values
X_train = pd.DataFrame(data=scaler.transform(X_train), columns=X_train.columns, index=X_train.index)
In [20]:
# Visualized the Normalized Data
print(X_train)
         CRIM        ZN     INDUS      CHAS       NOX        RM       AGE  \
288 -0.411893  1.844475 -0.854602 -0.267615 -1.294726  0.060300 -0.799422   
72  -0.406165 -0.497137 -0.054399 -0.267615 -1.224165 -0.311287 -2.138503   
471  0.088066 -0.497137  1.008165 -0.267615 -0.174563 -0.067526  0.798264   
176 -0.408848 -0.497137 -1.039712 -0.267615 -0.368607 -0.378173 -0.742741   
320 -0.396653 -0.497137 -0.554343 -0.267615 -0.518550  0.225285 -0.562072   
55  -0.415999  3.517055 -1.452203 -0.267615 -1.312367  1.448551 -1.639004   
224 -0.378154 -0.497137 -0.726336 -0.267615 -0.421528  2.960169  0.358989   
57  -0.415848  3.963076 -1.437627 -0.267615 -1.241805  0.804961 -0.980091   
312 -0.384871 -0.497137 -0.187037 -0.267615 -0.068721 -0.373714  0.787637   
233 -0.376133 -0.497137 -0.726336 -0.267615 -0.395068  2.931928  0.079128   
43  -0.397685 -0.497137 -0.622849 -0.267615 -0.915459 -0.094280 -2.184556   
36  -0.405439 -0.497137 -0.761317 -0.267615 -0.465629 -0.644230 -0.239700   
341 -0.416012  1.063938 -1.408476 -0.267615 -0.968380  1.436660 -0.668348   
91  -0.412717 -0.497137 -1.132996 -0.267615 -0.553831  0.194072  0.203117   
298 -0.409544  2.625012 -1.303531 -0.267615 -1.338827  0.104891 -1.702770   
194 -0.415839  2.178991 -1.202959 -0.267615 -1.330007  0.489855 -1.748823   
168 -0.129575 -0.497137  1.223885 -0.267615  0.469310  0.066246  0.989562   
201 -0.413327  3.182539 -1.334140 -0.267615 -1.206525 -0.167111 -1.054485   
450  0.423581 -0.497137  1.008165 -0.267615  1.421890  0.705376  0.865573   
315 -0.385889 -0.497137 -0.187037 -0.267615 -0.068721 -0.846373  0.337734   
255 -0.413198  3.071033 -1.099472 -0.267615 -1.409389 -0.592207 -1.738195   
274 -0.410573  1.286948 -0.695727  3.736705 -0.924279  0.718753 -1.249325   
56  -0.415068  3.294044 -1.522166 -0.267615 -1.250626  0.161372 -1.150133   
480  0.311667 -0.497137  1.008165 -0.267615 -0.174563 -0.048203 -0.122796   
427  4.298544 -0.497137  1.008165 -0.267615  1.122004 -0.107657  0.373159   
388  1.377286 -0.497137  1.008165 -0.267615  1.307228 -2.072612  1.127721   
156 -0.111257 -0.497137  1.223885 -0.267615  2.815479 -1.489963  0.915168   
53  -0.411404  0.439508 -0.807960 -0.267615 -0.994840 -0.410873 -1.656717   
311 -0.318662 -0.497137 -0.187037 -0.267615 -0.068721 -0.226565 -0.544359   
189 -0.407160  1.509959 -1.128624 -0.267615 -1.012481  1.353425 -1.036772   
..        ...       ...       ...       ...       ...       ...       ...   
478  0.863779 -0.497137  1.008165 -0.267615  0.548692 -0.132925  1.010817   
413  3.170761 -0.497137  1.008165 -0.267615  0.398749 -1.663866  1.127721   
371  0.738467 -0.497137  1.008165 -0.267615  0.698635 -0.086848  1.127721   
136 -0.377239 -0.497137  1.560582 -0.267615  0.636894 -0.494108  0.897456   
111 -0.405013 -0.497137 -0.171004 -0.267615 -0.042260  0.654840  0.475893   
339 -0.410757 -0.497137 -0.873550 -0.267615 -0.324506 -0.430195 -0.806507   
49  -0.390120 -0.497137 -0.622849 -0.267615 -0.915459 -0.999467 -0.218445   
498 -0.387697 -0.497137 -0.217646 -0.267615  0.292906 -0.379659 -0.101541   
477  1.463654 -0.497137  1.008165 -0.267615  0.548692 -1.442400  1.032072   
396  0.317682 -0.497137  1.008165 -0.267615  1.245486  0.194072  0.986019   
5   -0.413903 -0.497137 -1.312277 -0.267615 -0.827257  0.231230 -0.335349   
110 -0.404125 -0.497137 -0.382351 -0.267615 -0.280405 -0.118062 -0.487678   
192 -0.406791  1.509959 -1.128624 -0.267615 -1.012481  1.343020 -1.483132   
220 -0.372799 -0.497137 -0.726336  3.736705 -0.395068  1.005619  0.720328   
316 -0.377786 -0.497137 -0.187037 -0.267615 -0.068721 -0.535726  0.532574   
245 -0.393682  0.484110 -0.775893 -0.267615 -1.065402 -0.995008  0.072043   
424  0.683346 -0.497137  1.008165 -0.267615  0.284086 -1.054462  0.086214   
501 -0.409798 -0.497137  0.108848 -0.267615  0.187064  0.473505  0.033075   
132 -0.343752 -0.497137  1.560582 -0.267615  0.636894  0.145022  1.053327   
40  -0.413435  2.848023 -1.200044 -0.267615 -1.091862  1.114122 -1.855099   
461  0.044826 -0.497137  1.008165 -0.267615  1.421890  0.150968  0.716786   
393  0.664893 -0.497137  1.008165 -0.267615  1.245486 -0.121035  0.865573   
495 -0.395227 -0.497137 -0.217646 -0.267615  0.292906 -0.898396 -1.394569   
75  -0.405730 -0.497137  0.240029 -0.267615 -1.012481  0.017196 -0.820677   
87  -0.408686 -0.497137 -0.975579 -0.267615 -0.906638 -0.228052 -0.402657   
63  -0.401800  0.617916 -0.882295 -0.267615 -0.871358  0.724699 -0.877358   
326 -0.379639 -0.497137 -0.554343 -0.267615 -0.518550  0.055841 -1.391026   
337 -0.413833 -0.497137 -0.873550 -0.267615 -0.324506 -0.563967 -0.303466   
11  -0.402931  0.060390 -0.482923 -0.267615 -0.245125 -0.394523  0.521946   
351 -0.407686  2.178991 -1.383697 -0.267615 -1.241805  0.452697 -1.143048   

          DIS       RAD       TAX   PTRATIO         B     LSTAT  
288  1.674506 -0.403534 -0.673066 -0.853362  0.433923 -0.736065  
72   0.699753 -0.634616 -0.602036  0.357624  0.368614 -1.040342  
471 -0.350920  1.676198  1.534767  0.823387  0.416805  0.034870  
176 -0.132142 -0.519075 -0.655308 -0.853362  0.393909 -0.368883  
320  0.341093 -0.519075 -0.708581  0.543929  0.433923 -0.794579  
55   2.336891 -0.519075 -1.069647 -0.247869  0.423347 -1.144206  
224 -0.449313 -0.172453 -0.590198 -0.480751  0.304721 -1.242219  
57   2.158354 -0.519075 -0.892073 -1.552008  0.390311 -1.270013  
312 -0.478317 -0.634616 -0.607955 -0.014987  0.427381 -0.133360  
233 -0.085563 -0.172453 -0.590198 -0.480751  0.238213 -1.270013  
43   0.907966 -0.750156 -1.028213 -0.247869  0.407320 -0.759470  
36  -0.217137 -0.519075 -0.755934  0.357624  0.223057 -0.178710  
341  1.540387 -0.981238 -0.726338 -1.365702  0.410373 -1.044731  
91  -0.354378 -0.865697 -0.809206 -0.294446  0.397398 -0.648292  
298  1.919695 -0.519075 -0.288323 -1.691737  0.121440 -1.120800  
194  1.147441 -0.981238 -0.838801 -1.319126  0.213681 -1.207110  
168 -0.830782 -0.519075 -0.021962 -1.738313 -0.654315 -0.224059  
201  1.171643 -0.865697 -0.347514 -1.738313  0.399797 -0.760933  
450 -0.723410  1.676198  1.534767  0.823387 -3.890029  0.703403  
315  0.055183 -0.634616 -0.607955 -0.014987  0.428690 -0.165544  
255  2.588371 -0.981238 -0.542845 -0.946515  0.415170 -0.494690  
274  0.118857 -0.634616 -0.903912 -0.387598  0.433923 -1.331454  
56   2.572668 -0.865697 -0.554683 -0.527328  0.433923 -1.003770  
480 -0.194904  1.676198  1.534767  0.823387  0.433923 -0.276722  
427 -0.944637  1.676198  1.534767  0.823387 -3.688322  0.276244  
388 -1.075923  1.676198  1.534767  0.823387  0.172467  2.631471  
156 -1.005382 -0.519075 -0.021962 -1.738313 -2.927176  0.513230  
53   1.433207 -0.634616 -0.969022 -0.760209  0.433923 -0.614646  
311 -0.571331 -0.634616 -0.607955 -0.014987  0.433923 -0.973050  
189  0.353722 -0.519075 -0.051558 -1.505431  0.433923 -1.059360  
..        ...       ...       ...       ...       ...       ...  
478 -0.796928  1.676198  1.534767  0.823387  0.246390  0.789713  
413 -1.075971  1.676198  1.534767  0.823387 -1.593291  1.089602  
371 -1.277798  1.676198  1.534767  0.823387  0.098653 -0.453730  
136 -0.894696 -0.634616  0.179288  1.289151  0.230580  0.624408  
111 -0.553468 -0.403534  0.149692 -0.294446  0.419640 -0.361569  
339  0.471611 -0.519075 -1.081485  0.823387  0.433923 -0.423010  
49   1.084103 -0.750156 -1.028213 -0.247869  0.433923  0.522007  
498 -0.682353 -0.403534 -0.092992  0.357624  0.433923  0.042184  
477 -0.830446  1.676198  1.534767  0.823387 -0.083102  1.796170  
396 -1.034002  1.676198  1.534767  0.823387  0.433923  0.985738  
5    1.071858 -0.750156 -1.093324  0.124742  0.403613 -1.085691  
110 -0.505304 -0.519075 -0.134426  1.149422  0.396744  0.053887  
192  1.272389 -0.519075 -0.051558 -1.505431  0.364034 -1.428004  
220 -0.465015 -0.172453 -0.590198 -0.480751  0.377227 -0.427398  
316  0.080922 -0.634616 -0.607955 -0.014987  0.366324  0.833599  
245  1.980728 -0.287994 -0.454058  0.311047  0.349206  0.852616  
424 -0.848309  1.676198  1.534767  0.823387 -3.853722  0.662443  
501 -0.648979 -0.981238 -0.791448  1.195998  0.380389 -0.433250  
132 -0.721585 -0.634616  0.179288  1.289151  0.312463 -0.221133  
40   0.754399 -0.750156 -0.915750 -0.061564  0.419967 -1.558200  
461 -0.606481  1.676198  1.534767  0.823387  0.374283  0.295262  
393 -0.979067  1.676198  1.534767  0.823387  0.433923  0.371331  
495 -0.495316 -0.403534 -0.092992  0.357624  0.394563  0.726809  
75   0.322941 -0.519075 -0.051558  0.124742  0.284878 -0.540039  
87  -0.039608 -0.750156 -0.945345  0.031589  0.414843 -0.613183  
63   1.993214 -0.172453 -0.726338  0.590505  0.419531 -0.458118  
326  0.761506 -0.519075 -0.708581  0.543929  0.433923 -0.948181  
337  0.857114 -0.519075 -1.081485  0.823387  0.411136 -0.303054  
11   1.150851 -0.519075 -0.566522 -1.505431  0.433923  0.093385  
351  3.303866 -0.634616  0.025391 -0.061564  0.149134 -1.044731  

[404 rows x 13 columns]

Note that in the above line when we print the features, they no longer have values in different scales but rather they all are in the same scale.

For this tutorial, we'll be using the data for input as a numpy array. So, we'll just convert the data to a numpy array as follows.

In [21]:
# Converting from Pandas Dataframe to Numpy Arrays
X_train = np.array(X_train)
y_train = np.array(y_train)
In [22]:
# Get the Type of Training Data
type(X_train), type(y_train)
Out[22]:
(numpy.ndarray, numpy.ndarray)

Next, we'll apply the same normalization onto the test features so that they all also come onto the same scale.

In [23]:
# Apply same Normalization for Test Features
scal = StandardScaler()
scal.fit(X_test)
Out[23]:
StandardScaler(copy=True, with_mean=True, with_std=True)
In [24]:
# Make X_test to be the Scaled Version of Data
# This process scales all the values in all columns and replaces them with the new values
X_test = pd.DataFrame(data=scal.transform(X_test), columns=X_test.columns, index=X_test.index)
In [25]:
print(X_test)
         CRIM        ZN     INDUS      CHAS       NOX        RM       AGE  \
195 -0.435057  2.559632 -1.538110 -0.291730 -1.161905  1.923544 -1.382171   
4   -0.429837 -0.461719 -1.285981 -0.291730 -0.871823  1.020070 -0.578775   
434  0.878311 -0.461719  1.047677 -0.291730  1.182918 -0.145262  0.897735   
458  0.296144 -0.461719  1.047677 -0.291730  1.182918 -0.029846  0.488800   
39  -0.433751  2.370798 -1.173109 -0.291730 -1.113558  0.335019 -1.751298   
304 -0.431151  0.784589 -1.285981 -0.291730 -0.759014  1.130523 -1.052851   
225 -0.386572 -0.461719 -0.696703 -0.291730 -0.501164  2.978425  0.463467   
32  -0.305211 -0.461719 -0.412325 -0.291730 -0.227199 -0.465449  0.427278   
157 -0.320746 -0.461719  1.264625 -0.291730  0.312675  0.766899  0.984589   
404  3.487718 -0.461719  1.047677 -0.291730  1.021762 -0.985443  0.550321   
65  -0.432975  2.559632 -1.111543 -0.291730 -1.355292 -0.043497 -1.896054   
138 -0.412758 -0.461719  1.603240 -0.291730  0.465773 -0.580866  1.013540   
18  -0.360514 -0.461719 -0.412325 -0.291730 -0.227199 -1.078521 -1.215702   
352 -0.429517  1.804295 -1.357808 -0.291730 -1.250540 -0.547358 -1.870722   
114 -0.422915 -0.461719 -0.138208 -0.291730 -0.154678 -0.088174  0.506894   
407  0.692894 -0.461719  1.047677 -0.291730  0.747796 -0.889883  1.078680   
417  2.014757 -0.461719  1.047677 -0.291730  0.908953 -1.267158  0.684220   
290 -0.433053  2.559632 -0.879936 -0.291730 -1.250540  0.665134 -1.530546   
95  -0.424830 -0.461719 -1.181904 -0.291730 -0.976575  0.372250 -0.448495   
321 -0.419203 -0.461719 -0.523731 -0.291730 -0.589800  0.063232 -0.575156   
439  0.450956 -0.461719  1.047677 -0.291730  1.400479 -0.866304  0.857927   
12  -0.427500  0.010367 -0.451903 -0.291730 -0.340008 -0.541152 -1.128848   
505 -0.431882 -0.461719  0.143238 -0.291730  0.054825 -0.366166  0.383852   
252 -0.428594  0.369153 -0.746542 -0.291730 -1.089384  0.784274 -2.294133   
291 -0.428910  2.559632 -0.879936 -0.291730 -1.250540  1.021312 -1.537783   
361 -0.073820 -0.461719  1.047677 -0.291730  1.642213 -0.091897  0.756598   
234 -0.394039 -0.461719 -0.696703  3.427827 -0.476991  0.497594 -0.133651   
128 -0.405612 -0.461719  1.603240 -0.291730  0.465773  0.131489  1.035253   
372  0.344808 -0.461719  1.047677  3.427827  0.820317 -0.558527  0.702315   
198 -0.432801  2.559632 -1.382728 -0.291730 -1.306945  1.177682 -1.154180   
..        ...       ...       ...       ...       ...       ...       ...   
154 -0.302767 -0.461719  1.264625  3.427827  2.456052 -0.243304  0.933924   
130 -0.404230 -0.461719  1.603240 -0.291730  0.465773  0.164997  1.038872   
296 -0.431286 -0.461719  0.434945 -0.291730 -1.041037  0.277931 -0.694580   
155 -0.102340 -0.461719  1.264625  3.427827  2.456052 -0.214760  0.448992   
161 -0.298089 -0.461719  1.264625 -0.291730  0.312675  1.444505  0.745741   
48  -0.412374 -0.461719 -0.592626 -0.291730 -0.952402 -1.149260  0.908592   
308 -0.389780 -0.461719 -0.154333 -0.291730 -0.178852  0.384660  0.445373   
401  0.908812 -0.461719  1.047677 -0.291730  1.021762  0.022278  1.078680   
24  -0.365470 -0.461719 -0.412325 -0.291730 -0.227199 -0.497716  0.865165   
462  0.192459 -0.461719  1.047677 -0.291730  1.182918 -0.009989  0.463467   
175 -0.430065 -0.461719 -1.011864 -0.291730 -0.452817  0.274208 -1.342363   
443  0.505373 -0.461719  1.047677 -0.291730  1.400479  0.198505  1.078680   
8   -0.416402  0.010367 -0.451903 -0.291730 -0.340008 -0.861340  1.078680   
330 -0.432068 -0.461719 -1.130599 -0.291730 -0.855708 -0.224688 -1.374933   
479  0.918025 -0.461719  1.047677 -0.291730  0.385195 -0.119200  0.644412   
187 -0.428921  1.237791 -1.101282 -0.291730 -1.041037  0.567092 -1.052851   
38  -0.419821 -0.461719 -0.731884 -0.291730 -0.541453 -0.445593 -1.447311   
432  0.172534 -0.461719  1.047677 -0.291730  0.143461  0.124043  0.166718   
382  0.431717 -0.461719  1.047677 -0.291730  1.078167 -0.979238  1.078680   
143 -0.049200 -0.461719  1.264625 -0.291730  2.456052 -1.063628  1.078680   
360 -0.007197 -0.461719  1.047677 -0.291730  1.642213  0.090535  0.644412   
263 -0.358383  0.293619 -1.023591 -0.291730  0.651103  1.243457  0.879641   
398  3.187487 -0.461719  1.047677 -0.291730  1.021762 -1.082244  1.078680   
186 -0.431068 -0.461719 -1.244937 -0.291730 -0.630089  1.868939 -0.600489   
147 -0.212552 -0.461719  1.264625 -0.291730  2.456052 -1.736270  0.923067   
227 -0.397396 -0.461719 -0.696703 -0.291730 -0.501164  1.039927  0.351282   
405  5.981451 -0.461719  1.047677 -0.291730  1.021762 -0.796806  1.078680   
69  -0.424252  0.010367 -0.715759 -0.291730 -1.266656 -0.546117 -1.345982   
231 -0.392617 -0.461719 -0.696703 -0.291730 -0.501164  1.348945  0.242715   
104 -0.423171 -0.461719 -0.350759 -0.291730 -0.372239 -0.196144  0.716790   

          DIS       RAD       TAX   PTRATIO         B     LSTAT  
195  0.913879 -0.651309 -0.959881 -1.866931  0.440393 -1.189960  
4    1.103706 -0.764100 -1.158746  0.068394  0.470351 -0.902458  
434 -0.657862  1.604499  1.516892  0.743507 -2.853820  0.296281  
458 -0.400554  1.604499  1.516892  0.743507 -0.928680  0.425413  
39   0.800432 -0.764100 -0.977959 -0.111636  0.456102 -1.025499  
304  0.167781 -0.312938 -1.158746 -0.066629  0.434222 -0.707541  
225 -0.349496 -0.200148 -0.646518 -0.516704  0.303172 -0.987734  
32   0.153102 -0.651309 -0.646518  1.103568 -1.373107  1.823941  
157 -0.816083 -0.538519 -0.068002 -1.731908  0.094815 -0.992607  
404 -0.939897  1.604499  1.516892  0.743507 -0.286331  1.783739  
65   1.355693 -0.651309 -0.465732 -1.101803  0.470351 -0.982861  
138 -0.911822 -0.651309  0.136889  1.193583  0.415821  1.045492  
18   0.064335 -0.651309 -0.646518  1.103568 -0.740407 -0.127663  
352  3.235985 -0.651309 -0.019792 -0.111636  0.419075 -0.602773  
114 -0.642128 -0.425728  0.106758 -0.336674  0.378795 -0.278724  
407 -1.087704  1.604499  1.516892  0.743507 -0.256822 -0.074061  
417 -0.921501  1.604499  1.516892  0.743507 -2.553908  1.693590  
290  0.669966 -0.651309 -1.020143  0.293432  0.470351 -1.146104  
95  -0.073884 -0.876890 -0.833330 -0.246659  0.033666 -0.741651  
321  0.405593 -0.538519 -0.767042  0.473462  0.470351 -0.714850  
439 -0.843653  1.604499  1.516892  0.743507  0.470351  1.235536  
12   0.823278 -0.538519 -0.622413 -1.506871  0.398543  0.362065  
505 -0.528130 -0.989680 -0.851409  1.103568  0.470351 -0.591809  
252  2.408598 -0.312938 -0.507915  0.248424  0.349062 -1.121739  
291  0.669966 -0.651309 -1.020143  0.293432  0.470351 -1.118084  
361 -0.624237  1.604499  1.516892  0.743507 -0.048577  0.176894  
234 -0.001999 -0.200148 -0.646518 -0.516704  0.058574 -0.571099  
128 -0.845809 -0.651309  0.136889  1.193583  0.470351  0.323082  
372 -1.159084  1.604499  1.516892  0.743507 -0.079657 -0.469986  
198  1.675666 -0.876890 -0.513941 -2.677067  0.417617 -0.745306  
..        ...       ...       ...       ...       ...       ...  
154 -0.874756 -0.538519 -0.068002 -1.731908 -0.381028  0.290190  
130 -0.705434 -0.651309  0.136889  1.193583  0.449482 -0.016804  
296  1.057007 -0.651309 -0.754990 -1.146810  0.424910 -0.651502  
155 -0.876545 -0.538519 -0.068002 -1.731908 -2.995418  0.278007  
161 -0.773144 -0.538519 -0.068002 -1.731908  0.218236 -1.341020  
48   1.015536 -0.764100 -1.092457 -0.291667  0.470351  2.201592  
308 -0.155402 -0.651309 -0.664596 -0.066629  0.470351 -0.998698  
401 -0.955173  1.604499  1.516892  0.743507  0.470351  0.923669  
24   0.341002 -0.651309 -0.646518  1.103568  0.441515  0.433941  
462 -0.422895  1.604499  1.516892  0.743507  0.470351  0.152530  
175 -0.240361 -0.538519 -0.712806 -0.876765  0.403704 -0.902458  
443 -0.769704  1.604499  1.516892  0.743507  0.356243  0.744589  
8    1.112835 -0.538519 -0.622413 -1.506871  0.355121  2.094388  
330  1.017188 -0.651309  0.094706 -0.741742  0.152486 -0.444403  
479 -0.782181  1.604499  1.516892  0.743507  0.317982  0.045325  
187  0.060711 -0.538519 -0.098133 -1.506871  0.436354 -0.737996  
38   0.087639 -0.538519 -0.815252  0.293432  0.431417 -0.317707  
432 -0.667863  1.604499  1.516892  0.743507 -2.883890 -0.086244  
382 -0.952283  1.604499  1.516892  0.743507  0.470351  1.323248  
143 -1.029627 -0.538519 -0.068002 -1.731908  0.470351  1.666789  
360 -0.522075  1.604499  1.516892  0.743507  0.219695 -0.602773  
263 -0.723646 -0.538519 -0.905645 -2.497037  0.431305 -0.181265  
398 -0.993937  1.604499  1.516892  0.743507  0.470351  2.174791  
186 -0.209672 -0.764100 -1.333506 -0.336674  0.422441 -1.009662  
147 -1.007149 -0.538519 -0.068002 -1.731908  0.412119  2.045659  
227 -0.202102 -0.200148 -0.646518 -0.516704  0.191869 -0.776980  
405 -1.023388  1.604499  1.516892  0.743507  0.336496  1.247718  
69   1.303626 -0.651309 -0.417522  0.158409  0.470351 -0.480950  
231  0.006992 -0.200148 -0.646518 -0.516704  0.237422 -0.912203  
104 -0.566665 -0.538519 -0.182500  1.058560  0.423114 -0.049697  

[102 rows x 13 columns]
In [26]:
# Convert test features and Labels to Numpy Arrays
X_test = np.array(X_test)
y_test = np.array(y_test)
In [27]:
# Get the Type of Test Data
type(X_test), type(y_test)
Out[27]:
(numpy.ndarray, numpy.ndarray)