Import all the required libraries and the MNIST dataset
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.externals import joblib
import matplotlib.pyplot as plt
from sklearn import metrics
import keras
from keras.datasets import mnist
(Image_train, Number_train), (Image_test, Number_test) = mnist.load_data()
Randomly select images of numbers to display in the next step
def plot_mnist_image(instances, images_per_row=10, **options):
size = 28
images_per_row = min(len(instances), images_per_row)
images = [instance.reshape(size,size) for instance in instances]
n_rows = (len(instances) - 1) // images_per_row + 1
row_images = []
n_empty = n_rows * images_per_row - len(instances)
images.append(np.zeros((size, size * n_empty)))
for row in range(n_rows):
rimages = images[row * images_per_row : (row + 1) * images_per_row]
row_images.append(np.concatenate(rimages, axis=1))
image = np.concatenate(row_images, axis=0)
plt.imshow(image, cmap = matplotlib.cm.binary, **options)
plt.axis("off")
Display the images to explain the complexity associated with the classification task
values = np.asarray(Number_train)
i0=np.random.choice(np.where(values == 0)[0],size=5,replace=False)
for i in range(1, 10):
i1=np.random.choice(np.where(values == i)[0],size=5,replace=False)
i0=np.concatenate((i0,i1),axis=0, out=None)
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.figure(figsize=(10,10))
images = np.r_[Image_train[i0]]
plot_mnist_image(images, images_per_row=10)
plt.show()
Reshape and normalize data for modeling (popeye effect)
Image_train = Image_train.reshape((60000, 28 * 28))
Image_train = Image_train.astype('float32') / 255
Image_test = Image_test.reshape((10000, 28 * 28))
Image_test = Image_test.astype('float32') / 255
from sklearn.linear_model import LogisticRegression
LR=LogisticRegression()
LR.fit(Image_train, Number_train)
Number_pred_LR = LR.predict(Image_test)
from sklearn.metrics import accuracy_score
accuracy_score(Number_test, Number_pred_LR)
Logistic regression - estimated probabilities of digits (a test image)
plt.imshow(Image_test[10].reshape(28,28), cmap = matplotlib.cm.binary)
LR.predict_proba(Image_test[10].reshape(1,-1))
Confusion matrix - logistic regression
import seaborn as sns
cm = metrics.confusion_matrix(Number_test, Number_pred_LR)
plt.figure(figsize=(9,9))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual Number');
plt.xlabel('Predicted Number');
all_sample_title = 'Confusion Matrix : Logistic Regression'
plt.title(all_sample_title, size = 15);
Define the model parameters and train the random forest model
classifier = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 42)
classifier.fit(Image_train, Number_train)
plt.imshow(classifier.feature_importances_.reshape(28,28))
Find the accuracy of the random forest model on the test dataset
Number_pred_RF = classifier.predict(Image_test)
from sklearn.metrics import accuracy_score
accuracy_score(Number_test, Number_pred_RF)
Make a confusion matrix for the random forest model on the test set
import seaborn as sns
cm = metrics.confusion_matrix(Number_test, Number_pred_RF)
plt.figure(figsize=(9,9))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual Number');
plt.xlabel('Predicted Number');
all_sample_title = 'Confusion Matrix : Random Forest'
plt.title(all_sample_title, size = 15);
from keras import models
from keras import layers
mnist_nn = models.Sequential()
mnist_nn.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
mnist_nn.add(layers.Dense(512, activation='relu'))
mnist_nn.add(layers.Dense(10, activation='softmax'))
mnist_nn.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
Transform the output variables to categories since they are defined as numeric (0,1,..,9)
from keras.utils import to_categorical
Number_train = to_categorical(Number_train)
Number_test = to_categorical(Number_test)
Train the model
mnist_nn.fit(Image_train, Number_train, epochs=5, batch_size=128)
Make the predictions for test dataset using the neural networks model
test_loss, test_acc = mnist_nn.evaluate(Image_test, Number_test)
Test the accuracy of the model on the test dataset
print('test_acc:', test_acc)
Make a confusion matrix for the test dataset to evaluate the prediction accracy
Number_predict = mnist_nn.predict_classes(Image_test)
import seaborn as sns
cm = metrics.confusion_matrix(Number_test.argmax(1), Number_predict)
plt.figure(figsize=(9,9))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues_r');
plt.ylabel('Actual Number');
plt.xlabel('Predicted Number');
all_sample_title = 'Confusion Matrix : Neural Network'
plt.title(all_sample_title, size = 15);