Load Dataset: Import As
Load Dataset: Import As
Load dataset
In [2]:
In [3]:
import pandas as pd
# %matplotlib inline
import matplotlib.pyplot as plt
data=pd.read_csv("D:\8 semester\Data warehousing and data mining\Labs\LAB6\Advertising.csv"
In [4]:
Task NO 1
In [5]:
data.shape
Out[5]:
(200, 4)
localhost:8888/notebooks/lab06.ipynb# 1/9
5/7/2019 lab06
In [6]:
data.describe()
Out[6]:
In [7]:
data.max()
Out[7]:
TV 296.4
Radio 49.6
Newspaper 114.0
Sales 27.0
dtype: float64
In [8]:
data.min()
Out[8]:
TV 0.7
Radio 0.0
Newspaper 0.3
Sales 1.6
dtype: float64
localhost:8888/notebooks/lab06.ipynb# 2/9
5/7/2019 lab06
In [9]:
data.mean()
Out[9]:
TV 147.0425
Radio 23.2640
Newspaper 30.5540
Sales 14.0225
dtype: float64
In [10]:
data.count()
Out[10]:
TV 200
Radio 200
Newspaper 200
Sales 200
dtype: int64
In [11]:
#data.count
In [12]:
data.columns.values
Out[12]:
In [13]:
data.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 200 entries, 1 to 200
Data columns (total 4 columns):
TV 200 non-null float64
Radio 200 non-null float64
Newspaper 200 non-null float64
Sales 200 non-null float64
dtypes: float64(4)
memory usage: 7.8 KB
localhost:8888/notebooks/lab06.ipynb# 3/9
5/7/2019 lab06
In [14]:
data.dtypes
Out[14]:
TV float64
Radio float64
Newspaper float64
Sales float64
dtype: object
In [15]:
data.ndim
Out[15]:
In [16]:
data.size
Out[16]:
800
In [17]:
data.values
Out[17]:
localhost:8888/notebooks/lab06.ipynb# 4/9
5/7/2019 lab06
In [18]:
data.empty
Out[18]:
False
Task no 3
In [19]:
fig,ax=plt.subplots(1,4,figsize=(15, 3))
data['Radio'].plot(kind="hist", ax=ax[0],color ='blue',alpha=0.6)
data['Sales'].plot(kind="hist", ax=ax[1],color='green',alpha=0.6)
data['Newspaper'].plot( kind="hist",ax=ax[2],color='cyan',alpha=0.6)
data['TV'].plot( kind="hist",ax=ax[3],color='red',alpha=0.6)
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x20e1e4f78d0>
In [20]:
In [21]:
y = data['Sales']
localhost:8888/notebooks/lab06.ipynb# 5/9
5/7/2019 lab06
In [22]:
In [23]:
In [24]:
In [25]:
lr = LinearRegression()
In [26]:
lr.fit(X_train, y_train)
Out[26]:
In [27]:
print(lr.intercept_)
2.8769666223179318
In [28]:
print(lr.coef_)
In [29]:
y_pred = lr.predict(X_test)
localhost:8888/notebooks/lab06.ipynb# 6/9
5/7/2019 lab06
In [30]:
1.0668917082595215
Contribution
How Sales are related with other variables using scatter plot
In [31]:
localhost:8888/notebooks/lab06.ipynb# 7/9
5/7/2019 lab06
In [32]:
list(set(data.dtypes.tolist()))
Out[32]:
[dtype('float64')]
In [33]:
Out[33]:
In [34]:
localhost:8888/notebooks/lab06.ipynb# 8/9