Free Shipping

Secure Payment

easy returns

24/7 support

  • Home
  • Blog
  • Principal Component Analysis Case Study 1

Principal Component Analysis Case Study 1

 July 7  | 0 Comments
# Plot ad hoc mnist instances
from keras.datasets import mnist
import numpy as np
# import tools and libraries
import matplotlib.pyplot as plt
%matplotlib inline
# load (downloaded if needed) the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X = X_train / 255.0
y = y_train
# making suitable adjustments 
X = X.reshape(X.shape[0],-1)
# Labeling
import pandas as pd
feat_cols = [ 'pixel'+str(i) for i in range(X.shape[1]) ]
df = pd.DataFrame(X,columns=feat_cols)
df['label'] = y
df['label'] = df['label'].apply(lambda i: str(i))
rndperm = np.random.permutation(df.shape[0])
# Plot the graphs
plt.gray()
fig = plt.figure( figsize=(16,7) )
for i in range(0,30):
ax = fig.add_subplot(3,10,i+1, title='Digit: ' + str(df.loc[rndperm[i],'label']) )
ax.matshow(df.loc[rndperm[i],feat_cols].values.reshape((28,28)).astype(float))
plt.show()

 

 

Free Step-by-step Guide To Become A Data Scientist

Subscribe and get this detailed guide absolutely FREE

# Build the PCA model
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca_result = pca.fit_transform(df[feat_cols])
df['pca-one'] = pca_result[:,0]
df['pca-two'] = pca_result[:,1]
print ('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

Output:

Explained variation per principal component: [0.09704664 0.07095924]

# Visualisation
from ggplot import *
chart = ggplot( df.loc[rndperm[:3000],:], aes(x='pca-one', y='pca-two', color='label') ) \
+ geom_point(size=75,alpha=0.8) \
+ ggtitle("First and Second Principal Components colored by digit")
Chart

 

>