RandomForest for Iris classification
In [1]:
Copied!
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn import datasets
from sklearn import metrics
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn import datasets
from sklearn import metrics
In [2]:
Copied!
iris = datasets.load_iris()
iris = datasets.load_iris()
In [3]:
Copied!
data=pd.DataFrame({
'sepal_length':iris.data[:,0],
'sepal_width':iris.data[:,1],
'petal_length':iris.data[:,2],
'petal_width':iris.data[:,3],
'species':iris.target
})
data.head()
data=pd.DataFrame({
'sepal_length':iris.data[:,0],
'sepal_width':iris.data[:,1],
'petal_length':iris.data[:,2],
'petal_width':iris.data[:,3],
'species':iris.target
})
data.head()
Out[3]:
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 |
In [4]:
Copied!
X=data[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y=data['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X=data[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y=data['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
In [5]:
Copied!
from sklearn.ensemble import RandomForestClassifier
clf=RandomForestClassifier(n_estimators=100)
clf.fit(X_train,y_train)
y_pred=clf.predict(X_test)
from sklearn.ensemble import RandomForestClassifier
clf=RandomForestClassifier(n_estimators=100)
clf.fit(X_train,y_train)
y_pred=clf.predict(X_test)
In [6]:
Copied!
from sklearn import metrics
print("Accuracy :",metrics.accuracy_score(y_test, y_pred))
from sklearn import metrics
print("Accuracy :",metrics.accuracy_score(y_test, y_pred))
Accuracy : 0.9555555555555556
In [7]:
Copied!
clf
clf
Out[7]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', max_depth=None, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None, oob_score=False, random_state=None, verbose=0, warm_start=False)
In [8]:
Copied!
feature_imp = pd.Series(clf.feature_importances_,index=iris.feature_names).sort_values(ascending=False)
feature_imp
feature_imp = pd.Series(clf.feature_importances_,index=iris.feature_names).sort_values(ascending=False)
feature_imp
Out[8]:
petal width (cm) 0.501632 petal length (cm) 0.379573 sepal length (cm) 0.096711 sepal width (cm) 0.022084 dtype: float64
In [9]:
Copied!
sns.barplot(x=feature_imp, y=feature_imp.index)
plt.xlabel('Feature Importance Score')
plt.ylabel('Features')
plt.title("Visualizing Important Features")
#plt.legend()
plt.show()
sns.barplot(x=feature_imp, y=feature_imp.index)
plt.xlabel('Feature Importance Score')
plt.ylabel('Features')
plt.title("Visualizing Important Features")
#plt.legend()
plt.show()
In [10]:
Copied!
X=data[['petal_length', 'petal_width']]
y=data['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.70, random_state=5)
X=data[['petal_length', 'petal_width']]
y=data['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.70, random_state=5)
In [11]:
Copied!
clf_sf=RandomForestClassifier(n_estimators=100)
clf_sf.fit(X_train,y_train)
y_pred=clf_sf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
clf_sf=RandomForestClassifier(n_estimators=100)
clf_sf.fit(X_train,y_train)
y_pred=clf_sf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
Accuracy: 0.9523809523809523
In [ ]:
Copied!