#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# compare the number of repeats for repeated k-fold cross-validation
from scipy.stats import sem
from numpy import mean
from numpy import std
from sklearn.datasets import load_wine
from sklearn.model_selection import KFold, train_test_split, cross_val_score
from sklearn.metrics import plot_confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
import numpy as np

# evaluate a model with a given number of repeats
def evaluate_model(X, y, repeats):
	# prepare the cross-validation procedure
    #	cv = RepeatedKFold(n_splits=5, n_repeats=1)
    cv = KFold(n_splits=5)
	# create model
    model = KNeighborsClassifier(n_neighbors=repeats)
	# evaluate model
    scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
    return scores

# create dataset
    
wine=load_wine()
X=wine.data
y=wine.target

#X, y = make_classification(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=1)

# configurations to test
neighbors = range(3,20)
results = list()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.3)

for r in neighbors:
	# evaluate using a given number of repeats
	scores = evaluate_model(X_train, y_train, r)
	# summarize
	print('>%d mean=%.4f se=%.3f' % (r, mean(scores), sem(scores)))
	# store
	results.append(mean(scores))


# plot the results
plt.plot(neighbors,results)
plt.show()

# Final Model and confusion matrix:
classifier = KNeighborsClassifier(n_neighbors=10).fit(X_train, y_train)

np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
titles_options = [("Confusion matrix, without normalization", None),
                  ("Normalized confusion matrix", 'true')]
for title, normalize in titles_options:
    disp = plot_confusion_matrix(classifier, X_test, y_test,
                                 cmap=plt.cm.Blues,
                                 normalize=normalize)
    disp.ax_.set_title(title)

    print(title)
    print(disp.confusion_matrix)

plt.show()