1.1 Fix random seed for reproducibility

In [2]:
seed = 7
np.random.seed(seed)

1.2 We need to generate two sets of data

In [3]:
X,y = make_moons(n_samples = 6000, noise = 0.08)
print(X.shape)
print(y.shape)
print(type(X))
print(type(y))

# Spliting data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
(6000, 2)
(6000,)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>

1.3 Data visualization

In [4]:
plt.figure(figsize=(12,8))
plt.scatter(X_train[:,0],X_train[:,1], color = 'b', label = 'class X_train')
plt.scatter(X_test[:,0],X_test[:,1], color = 'r', label = 'class X_test')
plt.xlabel('feature1')
plt.ylabel('feature2')
plt.legend()
plt.axis('equal')
plt.show()