# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# Load the dataset from CSV
df = pd.read_csv("house_prices.csv")
print("Dataset Preview:\n", df.head())
# Splitting features and target
X = df[['Area (sq ft)']] # Independent variable
y = df['Price ($)'] # Dependent variable
# Splitting into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Predictions
y_pred = model.predict(X_test)
# Predict price for a new house of 1250 sq ft
new_area = np.array([[1250]])
predicted_price = model.predict(new_area)
print("\nPredicted price for a 1250 sq ft house: $", predicted_price[0])
# Plot the regression line
plt.scatter(X, y, color='blue', label="Actual Prices")
plt.plot(X, model.predict(X), color='red', linewidth=2, label="Regression Line")
#plots the regression line
plt.xlabel("Area (sq ft)")
plt.ylabel("Price ($)")
plt.legend()
plt.title("House Price Prediction using Linear Regression")
plt.show()
Explanation of Each Line
# Import libraries
import numpy as np #A library used for working with arrays and numerical
operations.
import pandas as pd #A library for data manipulation and analysis, especially
with tabular data (DataFrames).
import matplotlib.pyplot as plt #A plotting library used for creating
visualizations (graphs, charts).
from sklearn.model_selection import train_test_split #A function from
sklearn.model_selection that splits the dataset into training and testing sets.
from sklearn.linear_model import LinearRegression #A model from
sklearn.linear_model that performs simple or multiple linear regression.
# Load the dataset from CSV
df = pd.read_csv("house_prices.csv")
print("Dataset Preview:\n", df.head())
# Splitting features and target
X = df[['Area (sq ft)']] # Independent variable
y = df['Price ($)'] # Dependent variable
# Splitting into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
#Explanation for above line
#X_train and y_train: These are the features and target values for the training
data.
#X_test and y_test: These are the features and target values for the testing data.
#test_size=0.2: This means 20% of the data will be used for testing, and 80% will
be used for training.
#random_state=42: This ensures the randomness of the split is controlled. Using the
same random state will result in the same split every time you run the code.
# Train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)
# Predictions
y_pred = model.predict(X_test)
# Predict price for a new house of 1250 sq ft
new_area = np.array([[1250]])
predicted_price = model.predict(new_area)
print("\nPredicted price for a 1250 sq ft house: $", predicted_price[0])
# Plot the regression line
plt.scatter(X, y, color='blue', label="Actual Prices") #creates a scatter plot
with the actual data points (area vs. price)
plt.plot(X, model.predict(X), color='red', linewidth=2, label="Regression Line")
#plots the regression line
plt.xlabel("Area (sq ft)")
plt.ylabel("Price ($)")
plt.legend() #Shows legend to label the scatter points and the regression
line.
plt.title("House Price Prediction using Linear Regression")
plt.show() #displays the plot