1.1 Load the digit dataset¶

digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target

digits.keys()

dict_keys(['data', 'target', 'target_names', 'images', 'DESCR'])

1.2 Spliting data into train and test sets¶

print(X_digits.shape)
print(y_digits.shape)
print(type(X_digits))
print(type(y_digits))

# Spliting data into train and test
X_train, X_test, y_train, y_test = train_test_split(X_digits, y_digits, test_size=0.33, random_state=42)

(1797, 64)
(1797,)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>

1.3 Checking the shape of the input data¶

print('x_train:\t{}' .format(X_train.shape))
print('y_train:\t{}' .format(y_train.shape))
print('x_test:\t\t{}'.format(X_test.shape))
print('y_test:\t\t{}'.format(y_test.shape))

x_train:	(1203, 64)
y_train:	(1203,)
x_test:		(594, 64)
y_test:		(594,)

1.4 Plot first 64 digits¶

# set up the figure
fig = plt.figure(figsize=(6, 6))
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

# plot the digits: each image is 8x8 pixels
for i in range(64):
    ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
    ax.imshow(digits.images[i], cmap=plt.cm.binary, interpolation='nearest')
    
    # label the image with the target value
    ax.text(0, 7, str(digits.target[i]))