1.1 Load the digit dataset

In [2]:
digits = datasets.load_digits()
X_digits = digits.data
y_digits = digits.target

digits.keys()
Out[2]:
dict_keys(['data', 'target', 'target_names', 'images', 'DESCR'])

1.2 Spliting data into train and test sets

In [3]:
print(X_digits.shape)
print(y_digits.shape)
print(type(X_digits))
print(type(y_digits))

# Spliting data into train and test
X_train, X_test, y_train, y_test = train_test_split(X_digits, y_digits, test_size=0.33, random_state=42)
(1797, 64)
(1797,)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>

1.3 Checking the shape of the input data

In [4]:
print('x_train:\t{}' .format(X_train.shape))
print('y_train:\t{}' .format(y_train.shape))
print('x_test:\t\t{}'.format(X_test.shape))
print('y_test:\t\t{}'.format(y_test.shape))
x_train:	(1203, 64)
y_train:	(1203,)
x_test:		(594, 64)
y_test:		(594,)

1.4 Plot first 64 digits

In [5]:
# set up the figure
fig = plt.figure(figsize=(6, 6))
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

# plot the digits: each image is 8x8 pixels
for i in range(64):
    ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
    ax.imshow(digits.images[i], cmap=plt.cm.binary, interpolation='nearest')
    
    # label the image with the target value
    ax.text(0, 7, str(digits.target[i]))