DATA CLEANING AND PREPROSSING:
import numpy as np
import pandas as pd
dataset = pd.read_csv("D:/book1.csv",delimiter=',')
print(dataset)
dataset.info()
dataset.isna()
dataset_1=dataset.dropna()
print(dataset_1)
x=dataset.iloc[:,[0,1,2]]
y=dataset.iloc[:,[3]]
print(x)
print(y)
from sklearn.preprocessing import LabelEncoder
lEncoder=LabelEncoder()
x.iloc[:,0]=lEncoder.fit_transform(x.iloc[:,0])
print(x)
OUTPUT:
= RESTART: C:/Users/AMAR PAAPU/pre1.py
COUNTRY AGE SALARY PURCHASED
0 France 44 72000 no
1 Spain 27 48000 yes
2 Germany 30 64000 no
3 Spain 38 61000 no
4 Germany 40 Nan yes
5 France 38 54000 yes
6 Spain Nan 62000 no
7 France 48 74000 yes
8 Germany 50 83000 no
9 France 37 67000 yes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 COUNTRY 10 non-null object
1 AGE 10 non-null object
2 SALARY 10 non-null object
3 PURCHASED 10 non-null object
dtypes: object(4)
memory usage: 452.0+ bytes
COUNTRY AGE SALARY PURCHASED
0 France 44 72000 no
1 Spain 27 48000 yes
2 Germany 30 64000 no
3 Spain 38 61000 no
4 Germany 40 Nan yes
5 France 38 54000 yes
6 Spain Nan 62000 no
7 France 48 74000 yes
8 Germany 50 83000 no
9 France 37 67000 yes
COUNTRY AGE SALARY
0 France 44 72000
1 Spain 27 48000
2 Germany 30 64000
3 Spain 38 61000
4 Germany 40 Nan
5 France 38 54000
6 Spain Nan 62000
7 France 48 74000
8 Germany 50 83000
9 France 37 67000
PURCHASED
0 no
1 yes
2 no
3 no
4 yes
5 yes
6 no
7 yes
8 no
9 yes
COUNTRY AGE SALARY
0 0 44 72000
1 2 27 48000
2 1 30 64000
3 2 38 61000
4 1 40 Nan
5 0 38 54000
6 2 Nan 62000
7 0 48 74000
8 1 50 83000
9 0 37 67000
Data from database:
import mysql.connector
# Create the connection object
myconn = mysql.connector.connect(host = "localhost", user = "root",passwd = "",database="SampleDB")
# Creating the cursor object
cur = myconn.cursor()
# Executing the query
cur.execute("select * from students")
# Fetching the rows from the cursor object
result = cur.fetchall()
print("Student Details are :")
# Printing the result
for x in result:
print(x);
# Commit the transaction
myconn.commit()
# Close the connection
myconn.close()
Output:
K-means clustering:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
x = [4, 5, 10, 4, 3, 11, 14 , 6, 10, 12]
y = [21, 19, 24, 17, 16, 25, 24, 22, 21, 21]
#plt.scatter(x, y)
data = list(zip(x, y))
inertias = []
for i in range(1,11):
kmeans = KMeans(n_clusters=i)
kmeans.fit(data)
inertias.append(kmeans.inertia_)
plt.plot(range(1,11), inertias, marker='o')
plt.title('Elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()
Output:
K-nearest neighbours:
# Import necessary modules
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import random
# Loading data
data_iris = load_iris()
# To get list of target names
label_target = data_iris.target_names
print()
print("Sample Data from Iris Dataset")
print("*"*30)
# to display the sample data from the iris dataset
for i in range(10):
rn = random.randint(0,120)
print(data_iris.data[rn],"===>",label_target[data_iris.target[rn]])
# Create feature and target arrays
X = data_iris.data
y = data_iris.target
# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size = 0.3, random_state=1)
print("The Training dataset length: ",len(X_train))
print("The Testing dataset length: ",len(X_test))
try:
nn = int(input("Enter number of neighbors :"))
knn = KNeighborsClassifier(nn)
knn.fit(X_train, y_train)
# to display the score
print("The Score is :",knn.score(X_test, y_test))
# To get test data from the user
test_data = input("Enter Test Data :").split(",")
for i in range(len(test_data)):
test_data[i] = float(test_data[i])
print()
v = knn.predict([test_data])
print("Predicted output is :",label_target[v])
except:
print("Please supply valid input......")
Output:
Linear regression:
import numpy as np
import matplotlib.pyplot as plt
def estimate_coef(x, y):
# number of observations/points
n = np.size(x)
# mean of x and y vector
m_x = np.mean(x)
m_y = np.mean(y)
# calculating cross-deviation and deviation about x
SS_xy = np.sum(y*x) - n*m_y*m_x
SS_xx = np.sum(x*x) - n*m_x*m_x
# calculating regression coefficients
b_1 = SS_xy / SS_xx
b_0 = m_y - b_1*m_x
return (b_0, b_1)
def plot_regression_line(x, y, b):
# plotting the actual points as scatter plot
plt.scatter(x, y, color = "m",
marker = "o", s = 30)
# predicted response vector
y_pred = b[0] + b[1]*x
# plotting the regression line
plt.plot(x, y_pred, color = "g")
# putting labels
plt.xlabel('x')
plt.ylabel('y')
# function to show plot
plt.show()
def main():
# observations / data
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
# estimating coefficients
b = estimate_coef(x, y)
print("Estimated coefficients:\nb_0 = {} \
\nb_1 = {}".format(b[0], b[1]))
# plotting regression line
plot_regression_line(x, y, b)
#if __name__ == "__main__":
main()
Output:
SVM program:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.dataset import make_blobs
x,y=make_blobs(n_samples=500,centers=2,random_state=0,cluster_std=0.40)
xfit=np.linspace(-1,3.5)
plt.scatter(x[:,0],x[:,1],c=y,s=50,cmap='spring')
for m,b,d in [(1,0.65,0.33),(0.5,1.6,0.55),(-0.2,2.9,0.2)]:
yfit=m*xfit+b
plt.plot(xfit,yfit,'-k')
plt.fill_between(xfit,yfit-d,yfit+d,edgecolor='none',color='#AAAAAA',alpha=0.4)
plt.xlim(-1,3.5);
plt.show()