3/16/23, 10:13 PM Untitled44 - Jupyter Notebook
Linear Regression
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
dataset = pd.read_csv('Salary_Data.csv')
dataset.head()
Out[1]:
YearsExperience Salary
0 1.1 39343.0
1 1.3 46205.0
2 1.5 37731.0
3 2.0 43525.0
4 2.2 39891.0
In [2]:
X = dataset.iloc[:,:-1].values #independent variable array
y = dataset.iloc[:,1].values #dependent variable vector
In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=1/3,random_state=0)
In [4]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train,y_train) #actually produces the linear eqn for the data
Out[4]:
LinearRegression()
localhost:8888/notebooks/Untitled44.ipynb?kernel_name=python3 1/6
3/16/23, 10:13 PM Untitled44 - Jupyter Notebook
In [5]:
y_pred = regressor.predict(X_test)
y_pred
Out[5]:
array([ 40835.10590871, 123079.39940819, 65134.55626083, 63265.3677722
1,
115602.64545369, 108125.8914992 , 116537.23969801, 64199.9620165
2,
76349.68719258, 100649.1375447 ])
In [6]:
y_test
Out[6]:
array([ 37731., 122391., 57081., 63218., 116969., 109431., 112635.,
55794., 83088., 101302.])
In [7]:
#plot for the TRAIN
plt.scatter(X_train, y_train, color='red') # plotting the observation line
plt.plot(X_train, regressor.predict(X_train), color='blue') # plotting the regression li
plt.title("Salary vs Experience (Training set)") # stating the title of the graph
plt.xlabel("Years of experience") # adding the name of x-axis
plt.ylabel("Salaries") # adding the name of y-axis
plt.show() # specifies end of graph
localhost:8888/notebooks/Untitled44.ipynb?kernel_name=python3 2/6
3/16/23, 10:13 PM Untitled44 - Jupyter Notebook
In [8]:
#plot for the TEST
plt.scatter(X_test, y_test, color='red')
plt.plot(X_train, regressor.predict(X_train), color='blue') # plotting the regression li
plt.title("Salary vs Experience (Testing set)")
plt.xlabel("Years of experience")
plt.ylabel("Salaries")
plt.show()
Logistic Regression
In [9]:
import re
In [10]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
%matplotlib inline
digits=load_digits()
In [11]:
print("Image Data Shape",digits.data.shape)
print("Label Data Shape",digits.target.shape)
Image Data Shape (1797, 64)
Label Data Shape (1797,)
localhost:8888/notebooks/Untitled44.ipynb?kernel_name=python3 3/6
3/16/23, 10:13 PM Untitled44 - Jupyter Notebook
In [13]:
import numpy as np
import matplotlib.pyplot as plt
plt.figure(figsize=(20,4))
for index, (image, label) in enumerate(zip(digits.data[0:5], digits.target[0:5])):
plt.subplot(1, 5, index+1)
plt.imshow(np.reshape(image, (8,8)), cmap=plt.cm.gray)
plt.title('Training: %i\n' % label, fontsize=20)
In [14]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(digits.data,digits.target,test_size=0.23,
In [15]:
print(x_train.shape)
(1383, 64)
In [16]:
print(y_train.shape)
(1383,)
In [17]:
print(x_test.shape)
(414, 64)
In [18]:
print(y_test.shape)
(414,)
In [19]:
from sklearn.linear_model import LogisticRegression
localhost:8888/notebooks/Untitled44.ipynb?kernel_name=python3 4/6
3/16/23, 10:13 PM Untitled44 - Jupyter Notebook
In [20]:
logisticRegr=LogisticRegression()
logisticRegr.fit(x_train,y_train)
C:\Users\HP\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.p
y:763: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown i
n:
https://scikit-learn.org/stable/modules/preprocessing.html (https://s
cikit-learn.org/stable/modules/preprocessing.html)
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-re
gression (https://scikit-learn.org/stable/modules/linear_model.html#logis
tic-regression)
n_iter_i = _check_optimize_result(
Out[20]:
LogisticRegression()
In [21]:
print(logisticRegr.predict(x_test[0].reshape(1,-1)))
[4]
In [22]:
logisticRegr.predict(x_test[0:10])
Out[22]:
array([4, 0, 9, 1, 8, 7, 1, 5, 1, 6])
In [23]:
predictions=logisticRegr.predict(x_test)
In [25]:
score=logisticRegr.score(x_test,y_test)
print(score)
0.9516908212560387
localhost:8888/notebooks/Untitled44.ipynb?kernel_name=python3 5/6
3/16/23, 10:13 PM Untitled44 - Jupyter Notebook
In [36]:
index = 0
misclassifiedIndex = []
for predict, actual in zip(predictions, y_test):
if predict != actual:
misclassifiedIndex.append(index)
index += 1
plt.figure(figsize=(20, 3))
for plotIndex, wrong in enumerate(misclassifiedIndex[0:6]):
plt.subplot(1, 6, plotIndex + 1)
plt.imshow(np.reshape(x_test[wrong], (8, 8)), cmap=plt.cm.gray)
plt.title("Predicted: {}, Actual: {}".format(predictions[wrong], y_test[wrong]), fon
In [ ]:
localhost:8888/notebooks/Untitled44.ipynb?kernel_name=python3 6/6