StandardScaler(copy=True, with_mean=True, with_std=True)
Data Standardization:
The process of standardizing the data to a common format and common range                                                                                                   X_train_standardized = scaler.transform(X_train)
import numpy as np                                                                                                                                                          print(X_train_standardized)
import pandas as pd
import sklearn.datasets                                                                                                                                                          [[ 1.40381088 1.79283426 1.37960065 ... 1.044121       0.52295995
from sklearn.preprocessing import StandardScaler                                                                                                                                    0.64990763]
from sklearn.model_selection import train_test_split                                                                                                                              [ 1.16565505 -0.14461158 1.07121375 ... 0.5940779     0.44153782
                                                                                                                                                                                   -0.85281516]
                                                                                                                                                                                  [-0.0307278 -0.77271123 -0.09822185 ... -0.64047556 -0.31161687
# loading the dataset                                                                                                                                                              -0.69292805]
dataset = sklearn.datasets.load_breast_cancer()                                                                                                                                   ...
                                                                                                                                                                                  [ 1.06478904 0.20084323 0.89267396 ... 0.01694621 3.06583565
                                                                                                                                                                                   -1.29952679]
# loading the data to a pandas dataframe                                                                                                                                          [ 1.51308238 2.3170559    1.67987211 ... 1.14728703 -0.16599653
df = pd.DataFrame(dataset.data, columns=dataset.feature_names)                                                                                                                      0.82816016]
                                                                                                                                                                                  [-0.73678981 -1.02636686 -0.74380549 ... -0.31826862 -0.40713129
                                                                                                                                                                                   -0.38233653]]
df.head()
                                                                                                                                                                            X_test_standardized = scaler.transform(X_test)
                                                                                                           mean                   mean
                         mean      mean        mean    mean           mean           mean        mean                 mean               radius   texture   perimeter
                                                                                                        concave                fractal
                       radius   texture   perimeter    area     smoothness    compactness   concavity             symmetry                error     error       error   e
                                                                                                         points              dimension                                      print(X_train_standardized.std())
                 0      17.99     10.38      122.80   1001.0        0.11840       0.27760      0.3001   0.14710     0.2419     0.07871   1.0950    0.9053       8.589   1        1.0
                 1      20.57     17.77      132.90   1326.0        0.08474       0.07864      0.0869   0.07017     0.1812     0.05667   0.5435    0.7339       3.398
                 2      19.69     21.25      130.00   1203.0        0.10960       0.15990      0.1974   0.12790     0.2069     0.05999   0.7456    0.7869       4.585       print(X_test_standardized.std())
                 3      11.42     20.38       77.58    386.1        0.14250       0.28390      0.2414   0.10520     0.2597     0.09744   0.4956    1.1560       3.445            0.8654541077212674
                 4      20.29     14.34      135.10   1297.0        0.10030       0.13280      0.1980   0.10430     0.1809     0.05883   0.7572    0.7813       5.438
df.shape
                 (569, 30)
X = df
Y = dataset.target
print(X)
                        mean radius   mean texture    ...      worst symmetry   worst fractal dimension
                 0            17.99          10.38    ...              0.4601                   0.11890
                 1            20.57          17.77    ...              0.2750                   0.08902
                 2            19.69          21.25    ...              0.3613                   0.08758
                 3            11.42          20.38    ...              0.6638                   0.17300
                 4            20.29          14.34    ...              0.2364                   0.07678
                 ..             ...            ...    ...                 ...                       ...
                 564          21.56          22.39    ...              0.2060                   0.07115
                 565          20.13          28.25    ...              0.2572                   0.06637
                 566          16.60          28.08    ...              0.2218                   0.07820
                 567          20.60          29.33    ...              0.4087                   0.12400
                 568           7.76          24.54    ...              0.2871                   0.07039
                 [569 rows x 30 columns]
Splitting the data into training data and test data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)
print(X.shape, X_train.shape, X_test.shape)
                 (569, 30) (455, 30) (114, 30)
Standardize the data
print(dataset.data.std())
account_circle   228.29740508276657
                                                                                   Code          Text
scaler = StandardScaler()
scaler.fit(X_train)