import numpy as np
import matplotlib.pyplot as plt
Create a couple of numpy arrays
X contains the feature values
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y contains the target values
Y = np.array([1, 1, 1, 2, 2, 2])
data plotted and points colored by target value
plt.scatter(X[:, 0], X[:, 1], c=Y)
feature values for new data
new_obs = np.array([[-0.8, -1], [1, 0.8], [1, -0.5]])
new data plotted and given a different color (no target value)
plt.scatter(new_obs[:, 0], new_obs[:, 1])
plt.show()
Import Gaussian naive bayesian classifier
from sklearn.naive_bayes import GaussianNB
Create an instance of the classifier
clf = GaussianNB()
Fit the classifier to the data
clf.fit(X, Y)
Make predictions on new feature values based on the fit
predictions = clf.predict(new_obs)
print("Predicted targets (labels) for new data:", predictions, "\n")
Extend feature and target vectors with new data and predicted targets
new_X = np.append(X, new_obs, axis=0)
new_Y = np.append(Y, predictions)
Create new scatterplot with new data points colored according to their predicted classification
plt.scatter(new_X[:, 0], new_X[:, 1], c=new_Y)
plt.show()
from sklearn import datasets
iris = datasets.load_iris()
gnb = GaussianNB()
gnb.fit(iris.data, iris.target)
y_pred = gnb.predict(iris.data)
print("Iris dataset predictions based on GaussianNB classifier")
for pred, targ in zip(y_pred, iris.target):
print('Predicted:', pred, 'Target:', targ)
print("\nNumber of mislabeled points out of a total %d points : %d"
% (iris.data.shape[0], (iris.target != y_pred).sum()))
Split iris dataset into data and target arrays
X = iris.data
y = iris.target
Split iris data into train and test data for the purposes of cross validation
using numpy.random.permutation
to split the data randomly.
Run code several times for different randomly assigned train and
test samples.
indices = np.random.permutation(len(X))
Assign all but the last 10 randomly permuted indices for training
X_train = X[indices[:-10]]
y_train = y[indices[:-10]]
Assign the remaining 10 indices for testing
X_test = X[indices[-10:]]
y_test = y[indices[-10:]]
Create and fit a nearest-neighbor classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
Predict the target for the test data
pred = knn.predict(X_test)
print('\n\nPredicted values for test data based on KNN classifier:\n',
pred, '\n')
The true targets for the test data
print('True values for test data:\n', y_test, '\n')
Use the classifier's .score
method for measuring the accuracy of predictions
score = knn.score(X_test, y_test)
print('Accuracy is', score)