# Plot ad hoc mnist instances from keras.datasets import mnist import numpy as np # import tools and libraries import matplotlib.pyplot as plt %matplotlib inline # load (downloaded if needed) the MNIST dataset (X_train, y_train), (X_test, y_test) = mnist.load_data() X = X_train / 255.0 y = y_train # making suitable adjustments X = X.reshape(X.shape[0],-1) # Labeling import pandas as pd feat_cols = [ 'pixel'+str(i) for i in range(X.shape[1]) ] df = pd.DataFrame(X,columns=feat_cols) df['label'] = y df['label'] = df['label'].apply(lambda i: str(i)) rndperm = np.random.permutation(df.shape[0]) # Plot the graphs plt.gray() fig = plt.figure( figsize=(16,7) ) for i in range(0,30): ax = fig.add_subplot(3,10,i+1, title='Digit: ' + str(df.loc[rndperm[i],'label']) ) ax.matshow(df.loc[rndperm[i],feat_cols].values.reshape((28,28)).astype(float)) plt.show()

Free Step-by-step Guide To Become A Data Scientist

Subscribe and get this detailed guide absolutely FREE

# Build the PCA model from sklearn.decomposition import PCA pca = PCA(n_components=2) pca_result = pca.fit_transform(df[feat_cols]) df['pca-one'] = pca_result[:,0] df['pca-two'] = pca_result[:,1] print ('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))

Output:

Explained variation per principal component: [0.09704664 0.07095924]

# Visualisation from ggplot import * chart = ggplot( df.loc[rndperm[:3000],:], aes(x='pca-one', y='pca-two', color='label') ) \ + geom_point(size=75,alpha=0.8) \ + ggtitle("First and Second Principal Components colored by digit") Chart