Assignment 1
-Submitted by: Vaibhav Singh
(CSC)
(14B00033)
Task: Linear Regression for predicting salary for a given random test
sample
Code:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets, linear_model
def get_data(filename):
data = pd.read_csv(filename)
x = []
y = []
for yearExperience, salary in zip(data['YearsExperience'], data['Salary']):
x.append([float(yearExperience)])
y.append(float(salary))
return x, y
def print_data(filename):
data = pd.read_csv(filename)
return data
print(print_data('Salary_Data.csv'))
def linear_regression_model(x, y, predict_value):
regression = linear_model.LinearRegression()
regression.fit(x, y)
predict_output = regression.predict(predict_value)
predictions = {}
predictions['intercept'] = regression.intercept_
predictions['coefficient'] = regression.coef_
predictions['predicted_value'] = predict_output
return predictions
x, y = get_data('Salary_Data.csv')
predict_value = 2.03
res = linear_regression_model(x, y, predict_value)
print("Intercept value ", res['intercept'])
print("coefficient ", res['coefficient'])
print("Predicted value: ", res['predicted_value'])
def show_graph(x, y):
regression = linear_model.LinearRegression()
regression.fit(x, y)
plt.scatter(x, y, color='blue')
plt.plot(x, regression.predict(x), color='black', linewidth=1)
plt.xticks(())
plt.yticks(())
plt.show()
show_graph(x, y)
Graph: