Open In Colab
23/30 Sklearn : Feature Selection
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
#1. Variance Threshold
from sklearn.feature_selection import VarianceThreshold
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data
y = iris.target

plt.plot(X)
plt.legend(iris.feature_names)


X.var(axis=0)

selector = VarianceThreshold(threshold=0.2)
selector.fit(X)
VarianceThreshold(threshold=0.2)
selector.get_support()

np.array(iris.feature_names)[selector.get_support()]

selector.variances_

#2. SelectKBest
from sklearn.feature_selection import SelectKBest, chi2, f_classif
chi2(X, y)

selector = SelectKBest(f_classif, k=2)
selector.fit(X, y)
selector.scores_

np.array(iris.feature_names)[selector.get_support()]


#3. Recursive feature Elimination
from sklearn.feature_selection import RFECV
from sklearn.linear_model import SGDClassifier
selector = RFECV(SGDClassifier(random_state=0), step=1, min_features_to_select=2, cv=5)
selector.fit(X, y)
print(selector.ranking_)
print(selector.grid_scores_)


np.array(iris.feature_names)[selector.get_support()]


#4. SelectFromModel
from sklearn.feature_selection import SelectFromModel
X = iris.data
y = iris.target
selector = SelectFromModel(SGDClassifier(random_state=0), threshold='mean')
selector.fit(X, y)
selector.estimator_.coef_

np.array(iris.feature_names)[selector.get_support()]


En poursuivant votre navigation sur mon site, vous acceptez l’utilisation des Cookies et autres traceurs  pour réaliser des statistiques de visites et enregistrer sur votre machine vos activités pédagogiques. En savoir plus.