Dsbda 10
Dsbda 10
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [3]: Iris.shape
Out[3]: (150, 6)
In [4]: Iris.describe()
In [5]: Iris.dtypes
Out[5]: Id int64
SepalLengthCm float64
SepalWidthCm float64
PetalLengthCm float64
PetalWidthCm float64
Species object
dtype: object
In [6]: Iris.isnull().sum()
Out[6]: Id 0
SepalLengthCm 0
SepalWidthCm 0
PetalLengthCm 0
PetalWidthCm 0
Species 0
dtype: int64
In [7]: print(Iris.groupby('Species').size())
Species
Iris-setosa 50
Iris-versicolor 50
Iris-virginica 50
dtype: int64
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target
iris_df.hist()
plt.show()
In [15]: iris_df.boxplot()
plt.show()
In [16]: iris_df.describe()
Out[16]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
In [19]: Q1 = Iris.SepalWidthCm.quantile(0.25)
Q3 = Iris.SepalWidthCm.quantile(0.75)
IQR = Q3-Q1
print(IQR)
0.5
In [20]: data = Iris[Iris.SepalWidthCm < (Q1 - 1.5 * IQR) / (Iris.SepalWidthCm > (Q3 + 1.5 * IQR))]
In [21]: data
Out[21]: Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
In [ ]: