from cProfile import label
import imp
from re import L
from sklearn.datasets import make_regression
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import PolynomialFeatures
import ssl
try:
_create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
# Legacy Python that doesn't verify HTTPS certificates by default
pass
else:
# Handle target environment that doesn't support HTTPS verification
ssl._create_default_https_context = _create_unverified_https_context
#Probleme de regression linéaire
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
m = 100
X = np.linspace(0,10,m).reshape(m,1)
y = X + np.random.randn(m,1)
#plt.scatter(X,y)
#plt.show()
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X,y)
score = model.score(X,y)
print(score)
predict_y = model.predict(X)
#print(predict_y)
#plt.scatter(X,y)
#plt.plot(X,predict_y,c="r")
#plt.show()
#meilleur modele
from sklearn.svm import SVR
model = SVR(C=100)
model.fit(X,y)
score = model.score(X,y)
print(score)
#pip install seaborn
import seaborn as sns
titanic = sns.load_dataset('titanic')
#print(titanic.shape)
#print(titanic.head())
titanic = titanic[['survived','pclass','sex','age']]
titanic.dropna(axis=0,inplace=True)
titanic['sex'].replace(['male','female'],[0,1],inplace=True)
#print(titanic.head())
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=5)#n_neighbors nb voisins
y = titanic['survived']
X = titanic.drop('survived',axis=1)
model.fit(X,y)
score = model.score(X,y)
print(score)
def survie(model,pclass=1,sex=0,age=50):
x = np.array([pclass,sex,age]).reshape(1,3)
print(model.predict(x))
print(model.predict_proba(x))
survie(model)
########
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
print(X.shape)
#plt.scatter(X[:,0],X[:,1],c=y,alpha=0.8)
#plt.show()
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
#Attention, il ne faut pas que les classes soit désiquilibrées!
cv = KFold(5,random_state=0,shuffle=True)
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))
#Evaluation sur 1 donnée
from sklearn.model_selection import LeaveOneOut
cv = LeaveOneOut()
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))
#Evaluation sur 1 donnée
from sklearn.model_selection import ShuffleSplit
cv = ShuffleSplit(4,test_size=0.2)
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))
#Le choix par defaut (reparti de manière régulière chaque classe)
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(4)#nombre de groupe que l'on souhaite
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))
#Par groupe
from sklearn.model_selection import GroupKFold
cv = GroupKFold(5).get_n_splits(X,y,groups=X[:,0]) #nombre de groupe que l'on souhaite
print(cross_val_score(KNeighborsClassifier(),X,y,cv=cv))
##############################
#MÉTRIQUES de RÉGRESSIONS R^2 mae mse
from sklearn.metrics import *
y = np.array([1,2,2,3,5,2])
y_pred = np.array([1,2,2,5,7,1000])
print("MAE",mean_absolute_error(y,y_pred))
#importance aux grandes erreurs
print("MSE",mean_squared_error(y,y_pred))
print("Mediane Absolute",median_absolute_error(y,y_pred))
from sklearn.datasets import load_boston
boston = load_boston()
X = boston.data
y = boston.target
model = LinearRegression()
model.fit(X,y)
y_pred = model.predict(X)
#plt.scatter(X[:,5],y,label='y')
#plt.scatter(X[:,5],y_pred,alpha=0.8,label='y_pred')
#plt.show()
err_hist = np.abs(y-y_pred)
#plt.hist(err_hist,bins=50)
#plt.show()
score = model.score(X,y)
print("R^2",score)
#############################
#Sklearn make_scorer tutoriel : Créer vos propres métriques.
np.random.seed(0)
m=100
X = np.linspace(0,4,m).reshape(m,1)
y = 2+X**1.3 * np.random.randn(m,1)
y = y.ravel()
print(X.shape)
print(y.shape)
#plt.scatter(X,y)
#plt.xlabel('x')
#plt.ylabel('y')
#plt.show()
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X,y)
y_predict = model.predict(X)
#plt.scatter(X,y)
#plt.plot(X,y_predict,c='r',lw=4)
#plt.show()
from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(y,y_predict))
#le client demande une tolerance de 20%
#Representation graphique
#plt.scatter(X,y)
#plt.plot(X,y_predict,c='r',lw=4)
#plt.plot(X,y*0.8,c='g',ls='--')
#plt.plot(X,y*1.2,c='g',ls='--')
#plt.show()
def custom_metric(y,y_pred):
returnnp.sum((y_pred<y*1.2) & (y_pred>y*0.8))/y.size
print(custom_metric(y,y_predict))
from sklearn.metrics import make_scorer
custom_score = make_scorer(custom_metric,greater_is_better=True)#greater_is_better croissant ou non
from sklearn.model_selection import cross_val_predict
#score =cross_val_predict(LeaveOneOut,X,y,cv=3,scoring=custom_score)
#print(score)
from sklearn.svm import SVR
model = SVR(kernel='rbf',degree=3)
params = {'gamma':np.arange(0.1,1,0.05)}
grid = GridSearchCV(model,param_grid=params,cv=3,scoring=custom_score)
grid.fit(X,y)
best_model = grid.best_estimator_
y_pred = best_model.predict(X)
print(custom_metric(y,y_pred))