from sklearn.
linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from sklearn.model_selection import train_test_split
import pandas as pd
data=pd.read_csv("Housing.csv")
data
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 yes no no no yes 2
1 12250000 8960 4 4 4 yes no no no yes 3
2 12250000 9960 3 2 2 yes no yes no no 2
3 12215000 7500 4 2 2 yes no yes no yes 3
4 11410000 7420 4 1 2 yes yes yes no yes 2
... ... ... ... ... ... ... ... ... ... ... ...
540 1820000 3000 2 1 1 yes no yes no no 2
541 1767150 2400 3 1 1 no no no no no 0
542 1750000 3620 2 1 1 yes no no no no 0
543 1750000 2910 3 1 1 no no no no no 0
544 1750000 3850 3 1 2 yes no no no no 0
545 rows × 13 columns
data.head(5) #first 5 rows will be printed.
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 yes no no no yes 2 yes
1 12250000 8960 4 4 4 yes no no no yes 3 no
2 12250000 9960 3 2 2 yes no yes no no 2 yes
3 12215000 7500 4 2 2 yes no yes no yes 3 yes
4 11410000 7420 4 1 2 yes yes yes no yes 2 no
data.head(10)
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 yes no no no yes 2 yes
1 12250000 8960 4 4 4 yes no no no yes 3
2 12250000 9960 3 2 2 yes no yes no no 2 yes
3 12215000 7500 4 2 2 yes no yes no yes 3 yes
4 11410000 7420 4 1 2 yes yes yes no yes 2
5 10850000 7500 3 3 1 yes no yes no yes 2 yes
6 10150000 8580 4 3 4 yes no no no yes 2 yes
7 10150000 16200 5 3 2 yes no no no no 0
8 9870000 8100 4 1 2 yes yes yes no yes 2 yes
9 9800000 5750 3 2 4 yes yes no no yes 1 yes
data.shape #tells us the number of rows and columns present in the csv file.
(545, 13)
data.info() #this returns not null values,column,datatype,and information about the data.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 price 545 non-null int64
1 area 545 non-null int64
2 bedrooms 545 non-null int64
3 bathrooms 545 non-null int64
4 stories 545 non-null int64
5 mainroad 545 non-null object
6 guestroom 545 non-null object
7 basement 545 non-null object
8 hotwaterheating 545 non-null object
9 airconditioning 545 non-null object
10 parking 545 non-null int64
11 prefarea 545 non-null object
12 furnishingstatus 545 non-null object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB
from sklearn.preprocessing import LabelEncoder, MinMaxScaler #this command will convert object datatype into integer
le=LabelEncoder() #it converts the categorical entries into numerical entries.
data["mainroad"]=le.fit_transform(data["mainroad"])
data
#change raw feature vectors into a representation that is more suitable for the downstream estimators-sklearn.preproc
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 1 no no no yes 2
1 12250000 8960 4 4 4 1 no no no yes 3
2 12250000 9960 3 2 2 1 no yes no no 2
3 12215000 7500 4 2 2 1 no yes no yes 3
4 11410000 7420 4 1 2 1 yes yes no yes 2
... ... ... ... ... ... ... ... ... ... ... ...
540 1820000 3000 2 1 1 1 no yes no no 2
541 1767150 2400 3 1 1 0 no no no no 0
542 1750000 3620 2 1 1 1 no no no no 0
543 1750000 2910 3 1 1 0 no no no no 0
544 1750000 3850 3 1 2 1 no no no no 0
545 rows × 13 columns
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
data["guestroom"]=le.fit_transform(data["guestroom"])
data.head(5)
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 1 0 no no yes 2 yes
1 12250000 8960 4 4 4 1 0 no no yes 3 no
2 12250000 9960 3 2 2 1 0 yes no no 2 yes
3 12215000 7500 4 2 2 1 0 yes no yes 3 yes
4 11410000 7420 4 1 2 1 1 yes no yes 2 no
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
data["basement"]=le.fit_transform(data["basement"])
data.head(5)
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 1 0 0 no yes 2 yes
1 12250000 8960 4 4 4 1 0 0 no yes 3 no
2 12250000 9960 3 2 2 1 0 1 no no 2 yes
3 12215000 7500 4 2 2 1 0 1 no yes 3 yes
4 11410000 7420 4 1 2 1 1 1 no yes 2 no
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
data["hotwaterheating"]=le.fit_transform(data["hotwaterheating"])
data.head(5)
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 1 0 0 0 yes 2 yes
1 12250000 8960 4 4 4 1 0 0 0 yes 3 no
2 12250000 9960 3 2 2 1 0 1 0 no 2 yes
3 12215000 7500 4 2 2 1 0 1 0 yes 3 yes
4 11410000 7420 4 1 2 1 1 1 0 yes 2 no
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
data["prefarea"]=le.fit_transform(data["prefarea"])
data.head(5)
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 1 0 0 0 1 2
1 12250000 8960 4 4 4 1 0 0 0 1 3
2 12250000 9960 3 2 2 1 0 1 0 0 2
3 12215000 7500 4 2 2 1 0 1 0 1 3
4 11410000 7420 4 1 2 1 1 1 0 1 2
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
data["furnishingstatus"]=le.fit_transform(data["furnishingstatus"])
data.head(5)
price area bedrooms bathrooms stories mainroad guestroom basement hotwaterheating airconditioning parking prefarea
0 13300000 7420 4 2 3 1 0 0 0 1 2
1 12250000 8960 4 4 4 1 0 0 0 1 3
2 12250000 9960 3 2 2 1 0 1 0 0 2
3 12215000 7500 4 2 2 1 0 1 0 1 3
4 11410000 7420 4 1 2 1 1 1 0 1 2
x=data.drop(columns=["price"])
y=data["price"]
y=y.values.reshape(-1,1)
scaler=MinMaxScaler()
x=scaler.fit_transform(x)
y=scaler.fit_transform(y)
lr=LinearRegression()
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
lr.fit(x_train,y_train)
y_predict=lr.predict(x_test)
mae=mean_absolute_error(y_test,y_predict)
mse=mean_squared_error(y_test,y_predict)
r2=r2_score(y_test,y_predict)
print(mae,mse,r2)
0.06995281320799962 0.007960782075320859 0.6594122430015953
Loading [MathJax]/jax/output/CommonHTML/fonts/TeX/fontdata.js