import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
data = pd.read_csv('/Users/srunjith/Desktop/cardata.csv')
print("Shape of the Data:")
print(data.shape)
print("\nData Type:")
print(type(data))
print("\nFirst 10 Rows of the Data:")
print(data.head(10))
#print(data.describe())
plt.title("Car Data")
sns.displot(data['Selling_Price'])
plt.show()
plt.scatter(data['Selling_Price'], data['Present_Price'])
plt.title("Selling Price vs Present Price")
plt.xlabel("Selling Price")
plt.ylabel("Present Price")
plt.box(False)
plt.show()
# Split the data set into dependent and independent variables
#X = data.iloc[:, :1]
#Y = data.iloc[:, 1:]
X = data['Selling_Price']
Y = data['Present_Price']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2,
random_state=0)
# regression model
regressor = LinearRegression()
regressor.fit(X_train, Y_train)
# Predicting the test set results
y_pred_test = regressor.predict(X_test)
y_pred_train = regressor.predict(X_train)
print("\nPredicted Values:")
print(y_pred_test)
print("\nPredicted Values for Training Set:")
print(y_pred_train)
# plot training set data vs predicted values
plt.scatter(X_train, Y_train, color='red', label='Training Data')
plt.plot(X_train, y_pred_train, color='blue', label='Predicted Values')
plt.title('Training Set: Actual vs Predicted Values')
plt.xlabel('Selling_Price')
plt.ylabel('Present_Price')
#plt.legend(['X_train/Pred(Y_test)', 'X_train/Y_train'], title ='Selling/Present',
facecolor='blue', loc ='best')
plt.legend(loc='best')
plt.box(False)
plt.show()
# plot test set data vs predicted values
plt.scatter(X_test, Y_test, color='green')
plt.plot(X_train, y_pred_train, color='blue')
plt.title('Test Set: Actual vs Predicted Values')
plt.xlabel('Selling_Price')
plt.ylabel('Present_Price')
plt.legend(loc='best')
plt.box(False)
plt.show()
#regressor coefficients and intercepts
print(f'\nCoefficients: {regressor.coef_}')
print(f'Intercept: {regressor.intercept_}')