External
External
External
Create NumPy arrays from Python Data Structures, Intrinsic NumPy objects and Random Functions
import numpy as np
identity = np.eye(4)
linspace = np.linspace(0, 1, 5)
# Random Functions
rand = np.random.rand(3, 3)
normal = np.random.normal(0, 1, 5)
# Output
print("Arange:", arange)
print("Linspace:", linspace)
print("\nRandom Functions:")
import pandas as pd
import numpy as np
# Outputs
print(df_from_dict)
print(df_from_list)
print(df_from_numpy)
3,4. Develop a model on residual analysis of simple linear regression.
import numpy as np
import pandas as pd
california = fetch_california_housing()
y = california.target
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Calculate residuals
# Residual Analysis
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.ylabel('Residuals')
plt.subplot(1, 2, 2)
plt.title('Histogram of Residuals')
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()
plt.figure(figsize=(6, 6))
plt.show()
5. Import any CSV file to Pandas DataFrame and perform the following:
(a) Handle missing data by detecting and dropping/ filling missing values.
import numpy as np
df = pd.read_csv(file_path)
print(df.head())
print(df.isnull().sum())
df_dropped = df.dropna()
print(df_dropped.head())
df_filled = df.copy()
print(df_filled.head())
print(df[[column]].head())
6. Visualize data using Line Plots, Bar Plots, Histograms, Density Plots and Scatter Plots.
df = sns.load_dataset('iris')
# Line Plot
plt.figure(figsize=(10, 6))
sns.lineplot(x=df.index, y=df['sepal_length'])
plt.show()
# Bar Plot
plt.figure(figsize=(10, 6))
plt.show()
# Histogram
plt.figure(figsize=(10, 6))
plt.show()
plt.figure(figsize=(10, 6))
sns.kdeplot(df['sepal_length'], fill=True)
plt.show()
# Scatter Plot
plt.figure(figsize=(10, 6))
plt.show()
import numpy as np
# Indexing
print(arr[2])
# Slicing
print(arr2[1:4])
# Reshaping
print(arr3.reshape(2, 3))
# Joining
print(np.concatenate((arr4, arr5)))
# Splitting
print(np.split(arr6, 3))
8. Import any CSV file to Pandas DataFrame and perform the following:
import pandas as pd
print("First 10 records:")
print(df.head(10))
print("\nLast 10 records:")
print(df.tail(10))
print("\nShape:", df.shape)
print("Index:", df.index)
print("Columns:", df.columns)
import numpy as np
import pandas as pd
# Load data
X = df[['age']].values # Use 'age' as the feature (replace with any other numerical column you prefer)
model = LinearRegression()
model.fit(X, y)
# Predictions
y_pred = model.predict(X)
# Residuals
residuals = y - y_pred
# Plot residuals
plt.scatter(X, residuals)
plt.xlabel('Age')
plt.ylabel('Residuals')
plt.title('Residual Plot')
plt.show()
10. Computation on NumPy arrays using Universal Functions and Mathematical methods.
import numpy as np
# Create array
print(np.sqrt(arr))
print(np.exp(arr))
print(np.log(arr))
# Mathematical methods
print(np.sum(arr))
print(np.mean(arr))
print(np.median(arr))
print(np.std(arr))
print(np.var(arr))
11. import any CSV file to Pandas DataFrame and perform the following:
import pandas as pd
import numpy as np
df = pd.read_csv(r'C:\Users\peral\Downloads\train.csv')
column_name = 'age'
Q1 = df[column_name].quantile(0.25)
Q3 = df[column_name].quantile(0.75)
IQR = Q3 - Q1
filtered_df = df[(df[column_name] >= (Q1 - 1.5 * IQR)) & (df[column_name] <= (Q3 + 1.5 * IQR))]
print(filtered_df)
string_column = 'gender'
df[string_column] = df[string_column].str.lower()
print(df[[string_column]].head())
12. Download the House Pricing dataset from Kaggle and map the values to 23 Aesthetics.
import pandas as pd
file_path = 'path_to_your_downloaded_folder/train.csv'
df = pd.read_csv(file_path)
aesthetic_mapping = {
'ExterCond': {'Po': 'Poor', 'Fa': 'Fair', 'TA': 'Average', 'Gd': 'Good', 'Ex': 'Excellent'},
'ExterQual': {'Po': 'Poor', 'Fa': 'Fair', 'TA': 'Average', 'Gd': 'Good', 'Ex': 'Excellent'},
'PoolQC': {'NA': 'No Pool', 'Ex': 'Excellent', 'Gd': 'Good', 'TA': 'Average', 'Fa': 'Fair'},
df[column] = df[column].map(mapping).fillna(df[column])