from cProfile import label
import imp
from re import L
from sklearn.datasets import make_regression
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import PolynomialFeatures

import ssl

try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
else:
# Handle target environment that doesn't support HTTPS verification
ssl._create_default_https_context = _create_unverified_https_context


#Probleme de regression linéaire
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
m = 100
X = np.linspace(0,10,m).reshape(m,1)
y = X + np.random.randn(m,1)
#plt.scatter(X,y)
#plt.show()

from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X,y)
score = model.score(X,y)
print(score)
predict_y = model.predict(X)
#print(predict_y)

#plt.scatter(X,y)
#plt.plot(X,predict_y,c="r")
#plt.show()

#meilleur modele
from sklearn.svm import SVR
model = SVR(C=100)
model.fit(X,y)
score = model.score(X,y)
print(score)

#pip install seaborn
import seaborn as sns
titanic = sns.load_dataset('titanic')

#print(titanic.shape)
#print(titanic.head())
titanic = titanic[['survived','pclass','sex','age']]
titanic.dropna(axis=0,inplace=True)
titanic['sex'].replace(['male','female'],[0,1],inplace=True)
#print(titanic.head())

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=5)#n_neighbors nb voisins
y = titanic['survived']
X = titanic.drop('survived',axis=1)

model.fit(X,y)
score = model.score(X,y)
print(score)

def survie(model,pclass=1,sex=0,age=50):
x = np.array([pclass,sex,age]).reshape(1,3)
print(model.predict(x))
print(model.predict_proba(x))
 

survie(model)

########

from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data
y = iris.target
print(X.shape)
#plt.scatter(X[:,0],X[:,1],c=y,alpha=0.8)
#plt.show()

from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
#Attention, il ne faut pas que les classes soit désiquilibrées!

cv = KFold(5,random_state=0,shuffle=True)
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))

#Evaluation sur 1 donnée
from sklearn.model_selection import LeaveOneOut
cv = LeaveOneOut()
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))

#Evaluation sur 1 donnée
from sklearn.model_selection import ShuffleSplit
cv = ShuffleSplit(4,test_size=0.2)
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))

#Le choix par defaut (reparti de manière régulière chaque classe)
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(4)#nombre de groupe que l'on souhaite
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))

#Par groupe
from sklearn.model_selection import GroupKFold
cv = GroupKFold(5).get_n_splits(X,y,groups=X[:,0]) #nombre de groupe que l'on souhaite
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))

##############################
#MÉTRIQUES de RÉGRESSIONS R^2 mae mse

from sklearn.metrics import *
y = np.array([1,2,2,3,5,2])
y_pred = np.array([1,2,2,5,7,1000])
print("MAE",mean_absolute_error(y,y_pred))
#importance aux grandes erreurs
print("MSE",mean_squared_error(y,y_pred))
print("Mediane Absolute",median_absolute_error(y,y_pred))

from sklearn.datasets import load_boston
boston = load_boston()
X = boston.data
y = boston.target
model = LinearRegression()
model.fit(X,y)
y_pred = model.predict(X)
#plt.scatter(X[:,5],y,label='y')
#plt.scatter(X[:,5],y_pred,alpha=0.8,label='y_pred')
#plt.show()

err_hist = np.abs(y-y_pred)
#plt.hist(err_hist,bins=50)
#plt.show()

score = model.score(X,y)
print("R^2",score)

#############################
#Sklearn make_scorer tutoriel : Créer vos propres métriques.
np.random.seed(0)
m=100
X = np.linspace(0,4,m).reshape(m,1)
y = 2+X**1.3 * np.random.randn(m,1)
y = y.ravel()
print(X.shape)
print(y.shape)


#plt.scatter(X,y)
#plt.xlabel('x')
#plt.ylabel('y')
#plt.show()

from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X,y)
y_predict = model.predict(X)

#plt.scatter(X,y)
#plt.plot(X,y_predict,c='r',lw=4)
#plt.show()

from sklearn.metrics import mean_absolute_error

print(mean_absolute_error(y,y_predict))

#le client demande une tolerance de 20%
#Representation graphique

#plt.scatter(X,y)
#plt.plot(X,y_predict,c='r',lw=4)
#plt.plot(X,y*0.8,c='g',ls='--')
#plt.plot(X,y*1.2,c='g',ls='--')
#plt.show()

def custom_metric(y,y_pred):
returnnp.sum((y_pred<y*1.2) & (y_pred>y*0.8))/y.size

print(custom_metric(y,y_predict))

from sklearn.metrics import make_scorer

custom_score = make_scorer(custom_metric,greater_is_better=True)#greater_is_better croissant ou non

from sklearn.model_selection import cross_val_predict

#score =cross_val_predict(LeaveOneOut,X,y,cv=3,scoring=custom_score)
#print(score)

from sklearn.svm import SVR
model = SVR(kernel='rbf',degree=3)
params = {'gamma':np.arange(0.1,1,0.05)}

grid = GridSearchCV(model,param_grid=params,cv=3,scoring=custom_score)

grid.fit(X,y)

best_model = grid.best_estimator_

y_pred = best_model.predict(X)

print(custom_metric(y,y_pred))


En poursuivant votre navigation sur mon site, vous acceptez l’utilisation des Cookies et autres traceurs  pour réaliser des statistiques de visites et enregistrer sur votre machine vos activités pédagogiques. En savoir plus.