0% found this document useful (0 votes)

24 views13 pages

Code

NOTHING

Uploaded by

hodcse

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

24 views13 pages

Code

NOTHING

Uploaded by

hodcse

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 13

import pandas as pd

import re
from textblob import TextBlob
import matplotlib.pyplot as plt
# 2
# Load dataset
file_path = '/content/Nri_Textual_Survey_Data.csv' # Replace with your
file path
survey_data = pd.read_csv(file_path)
# 3
# ### 1. Text Preprocessing ###

from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

# Update preprocessing function to include stop word removal

def preprocess_text_with_stopwords(text):
"""Cleans text by converting to lowercase, removing punctuation,
extra whitespace, and stop words."""
try:
# Convert to lowercase and remove punctuation
text = str(text).lower()
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
text = re.sub(r'\d+', '', text) # Remove digits
text = re.sub(r'http\S+|www\S+|https\S+', '', text) # Remove
URLs
text = re.sub(r'<.*?>', '', text) # Remove HTML tags
text = re.sub(r'\n', ' ', text) # Replace newlines with spaces
text = re.sub(r'[^\x00-\x7F]+', '', text) # Remove non-ASCII
characters

# Remove stop words

text = " ".join([word for word in text.split() if word not in
ENGLISH_STOP_WORDS])
return text
except Exception as e:
return text

# Apply updated preprocessing

processed_data = survey_data.copy()
for col in processed_data.columns:
processed_data[col] =
processed_data[col].apply(preprocess_text_with_stopwords)
# 4
processed_data.to_csv('preprocessed_data.csv', index=False) # Save to
Colab environment

# Download the file

from google.colab import files
files.download('preprocessed_data.csv')
# 5
### 2. Sentiment Analysis ###
def analyze_sentiment(text):
"""Classifies sentiment as 'happy', 'neutral', or 'unhappy'."""
try:
blob = TextBlob(text)
polarity = blob.sentiment.polarity # Polarity ranges from -1
(negative) to 1 (positive)
if polarity > 0:
return 'happy'
elif polarity == 0:
return 'neutral'
else:
return 'unhappy'
except Exception:
return 'neutral'

# Add sentiment columns for each facility

sentiment_data = processed_data.copy()
for col in sentiment_data.columns:
sentiment_data[col + '_sentiment'] =
sentiment_data[col].apply(analyze_sentiment)
# 6
### 3. Sentiment Analysis Summary ###
# Count sentiments for each facility
facility_sentiment_cols = [col for col in sentiment_data.columns if
'_sentiment' in col]
sentiment_summary =
sentiment_data[facility_sentiment_cols].apply(pd.Series.value_counts).f
illna(0).astype(int)
sentiment_summary = sentiment_summary.T
sentiment_summary.columns = ['happy', 'neutral', 'unhappy']

# Overall sentiment counts

overall_sentiment_counts = sentiment_summary.sum()
# 7
### Visualization ###
# Overall Sentiment Distribution (Bar and Pie Charts)
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
overall_sentiment_counts.plot(kind='bar', color=['green', 'orange',
'red'], ax=axes[0])
axes[0].set_title('Overall Sentiment Distribution (Bar Chart)')
axes[0].set_xlabel('Sentiment')
axes[0].set_ylabel('Count')
overall_sentiment_counts.plot(kind='pie', autopct='%1.1f%%',
colors=['green', 'orange', 'red'], ax=axes[1])
axes[1].set_title('Overall Sentiment Distribution (Pie Chart)')
axes[1].set_ylabel('')
plt.tight_layout()
plt.show()
# 8
# Facility-Wise Sentiment Distribution (Stacked Bar Chart)
sentiment_summary.plot(
kind='bar',
stacked=True,
figsize=(12, 8),
title='Facility-Wise Sentiment Distribution',
color=['green', 'orange', 'red']
)
plt.xlabel('Facilities')
plt.ylabel('Count')
plt.legend(title='Sentiment')
plt.show()
# 9
# Individual Facility Sentiment Pie Charts
rows = (len(sentiment_summary) // 3) + (1 if len(sentiment_summary) % 3
else 0)
fig, axes = plt.subplots(rows, 3, figsize=(18, 5 * rows))

axes = axes.flatten()
for idx, facility in enumerate(sentiment_summary.index):
sentiment_summary.loc[facility].plot(
kind='pie',
ax=axes[idx],
autopct='%1.1f%%',
colors=['green', 'orange', 'red'],
title=f'{facility} Sentiment Distribution'
)
axes[idx].set_ylabel('')

for ax in axes[len(sentiment_summary):]:
ax.axis('off')

plt.tight_layout()
plt.show()
# 10
from sklearn.feature_extraction.text import CountVectorizer,
TfidfVectorizer
import matplotlib.pyplot as plt
import pandas as pd

# Assuming 'processed_data' contains the preprocessed text data

# Dynamically set the column name
column_name = 'Internship' # Replace this with your desired column

# Bag-of-Words Extraction
bow_vectorizer = CountVectorizer(max_features=1000) # Limit vocabulary
size
bow_features =
bow_vectorizer.fit_transform(processed_data[column_name]) # Use
dynamic column name
bow_term_frequencies = bow_features.sum(axis=0).A1 # Convert sparse
matrix to array

# Create BoW DataFrame

bow_term_df = pd.DataFrame({
'Term': bow_vectorizer.get_feature_names_out(),
'Frequency': bow_term_frequencies
}).sort_values(by='Frequency', ascending=False)

# TF-IDF Extraction
tfidf_vectorizer = TfidfVectorizer(max_features=1000) # Limit
vocabulary size
tfidf_features =
tfidf_vectorizer.fit_transform(processed_data[column_name]) # Use
dynamic column name
tfidf_term_scores = tfidf_features.sum(axis=0).A1 # Convert sparse
matrix to array

# Create TF-IDF DataFrame

tfidf_term_df = pd.DataFrame({
'Term': tfidf_vectorizer.get_feature_names_out(),
'TF-IDF Score': tfidf_term_scores
}).sort_values(by='TF-IDF Score', ascending=False)

# Merge BoW and TF-IDF for comparison

comparison_df = pd.merge(
bow_term_df.rename(columns={"Frequency":
"Frequency_BoW"}).head(20),
tfidf_term_df.rename(columns={"TF-IDF Score": "Frequency_TF-
IDF"}).head(20),
on="Term",
how="outer"
).fillna(0)

# Sort by Bag-of-Words Frequency for consistency

comparison_df = comparison_df.sort_values(by="Frequency_BoW",
ascending=False)

# Plot the comparison graph

plt.figure(figsize=(12, 8))

# Bar width for side-by-side bars

bar_width = 0.35
index = range(len(comparison_df))

# Bag-of-Words Bar
plt.bar(index, comparison_df["Frequency_BoW"], bar_width, label="Bag-
of-Words", color="skyblue")

# TF-IDF Bar
plt.bar([i + bar_width for i in index], comparison_df["Frequency_TF-
IDF"], bar_width, label="TF-IDF", color="orange")

# Add labels and title

plt.xlabel("Terms")
plt.ylabel("Frequency/TF-IDF Score")
plt.title(f"Comparison of Bag-of-Words and TF-IDF Representations for
'{column_name}'")
plt.xticks([i + bar_width / 2 for i in index], comparison_df["Term"],
rotation=45, ha="right")
plt.legend()
plt.tight_layout()
plt.show()

# 11
from sklearn.model_selection import train_test_split

# Target variable based on the dynamic column name

target_column_name = column_name + '_sentiment' # Append '_sentiment'
dynamically
y = sentiment_data[target_column_name] # Use the dynamic sentiment
column name

# Split for BoW features

X_train_bow, X_test_bow, y_train, y_test = train_test_split(
bow_features, y, test_size=0.2, random_state=42
)

# Split for TF-IDF features

X_train_tfidf, X_test_tfidf, _, _ = train_test_split(
tfidf_features, y, test_size=0.2, random_state=42
)

# 12

from sklearn.linear_model import LogisticRegression

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Initialize models
log_reg = LogisticRegression(max_iter=1000, random_state=42)
random_forest = RandomForestClassifier(random_state=42)
svm = SVC(kernel='linear', random_state=42)

# Train models on BoW features

log_reg.fit(X_train_bow, y_train)
random_forest.fit(X_train_bow, y_train)
svm.fit(X_train_bow, y_train)
# 13
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score

# Dynamically calculate model accuracies

bow_accuracies = [
accuracy_score(y_test, log_reg.predict(X_test_bow)),
accuracy_score(y_test, random_forest.predict(X_test_bow)),
accuracy_score(y_test, svm.predict(X_test_bow))
]

tfidf_accuracies = [
accuracy_score(y_test, log_reg.predict(X_test_tfidf)),
accuracy_score(y_test, random_forest.predict(X_test_tfidf)),
accuracy_score(y_test, svm.predict(X_test_tfidf))
]

# Plotting model accuracy comparison

model_names = ['Logistic Regression', 'Random Forest', 'SVM']
x = range(len(model_names))
bar_width = 0.35

plt.figure(figsize=(10, 6))
plt.bar(x, bow_accuracies, width=bar_width, label='BoW',
color='skyblue')
plt.bar([i + bar_width for i in x], tfidf_accuracies, width=bar_width,
label='TF-IDF', color='orange')

# Add labels and title

plt.xticks([i + bar_width / 2 for i in x], model_names)
plt.ylabel('Accuracy')
plt.title('Model Accuracy Comparison (BoW vs. TF-IDF)')
plt.legend()
plt.tight_layout()
plt.show()
# 14
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score

# Dynamically calculate metrics for a specific model (e.g., Logistic

Regression)
bow_scores = [
precision_score(y_test, log_reg.predict(X_test_bow),
average='weighted'),
recall_score(y_test, log_reg.predict(X_test_bow),
average='weighted'),
f1_score(y_test, log_reg.predict(X_test_bow), average='weighted')
]

tfidf_scores = [
precision_score(y_test, log_reg.predict(X_test_tfidf),
average='weighted'),
recall_score(y_test, log_reg.predict(X_test_tfidf),
average='weighted'),
f1_score(y_test, log_reg.predict(X_test_tfidf), average='weighted')
]

# Plot grouped bar chart

metrics = ['Precision', 'Recall', 'F1-Score']
x = np.arange(len(metrics))
bar_width = 0.35

plt.figure(figsize=(10, 6))
plt.bar(x, bow_scores, width=bar_width, label='BoW', color='skyblue')
plt.bar(x + bar_width, tfidf_scores, width=bar_width, label='TF-IDF',
color='orange')

# Add labels and title

plt.xticks(x + bar_width / 2, metrics)
plt.ylabel('Score')
plt.title('Model Performance Metrics (BoW vs. TF-IDF)')
plt.legend()
plt.tight_layout()
plt.show()

# 15
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Predictions for BoW features

y_pred_log_reg_bow = log_reg.predict(X_test_bow)
y_pred_rf_bow = random_forest.predict(X_test_bow)
y_pred_svm_bow = svm.predict(X_test_bow)

# Predictions for TF-IDF features

y_pred_log_reg_tfidf = log_reg.predict(X_test_tfidf)
y_pred_rf_tfidf = random_forest.predict(X_test_tfidf)
y_pred_svm_tfidf = svm.predict(X_test_tfidf)

# Confusion Matrices
cm_log_reg_bow = confusion_matrix(y_test, y_pred_log_reg_bow)
cm_rf_bow = confusion_matrix(y_test, y_pred_rf_bow)
cm_svm_bow = confusion_matrix(y_test, y_pred_svm_bow)

cm_log_reg_tfidf = confusion_matrix(y_test, y_pred_log_reg_tfidf)

cm_rf_tfidf = confusion_matrix(y_test, y_pred_rf_tfidf)
cm_svm_tfidf = confusion_matrix(y_test, y_pred_svm_tfidf)

# Plotting all confusion matrices

fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Titles for the matrices

titles = [
"Logistic Regression (BoW)", "Random Forest (BoW)", "SVM (BoW)",
"Logistic Regression (TF-IDF)", "Random Forest (TF-IDF)", "SVM (TF-
IDF)"
]

# All confusion matrices

conf_matrices = [
cm_log_reg_bow, cm_rf_bow, cm_svm_bow,
cm_log_reg_tfidf, cm_rf_tfidf, cm_svm_tfidf
]

# Plotting each heatmap

for i, ax in enumerate(axes.flat):
sns.heatmap(
conf_matrices[i], annot=True, fmt='d', cmap='Blues',
xticklabels=['Happy', 'Neutral', 'Unhappy'],
yticklabels=['Happy', 'Neutral', 'Unhappy'], ax=ax
)
ax.set_title(titles[i])
ax.set_xlabel("Predicted")
ax.set_ylabel("Actual")

plt.tight_layout()
plt.show()

from sklearn.metrics.pairwise import cosine_similarity

# 16

# Step 1: Use the already defined `processed_data` from your script.

# Step 2: Combine text from all columns to build a unified vocabulary

from sklearn.metrics.pairwise import cosine_similarity

combined_text_all = processed_data.apply(
lambda row: ' '.join(row.astype(str)), axis=1
)

# Fit the TF-IDF vectorizer on the combined text

tfidf_vectorizer_all = TfidfVectorizer()
tfidf_vectorizer_all.fit(combined_text_all)

# Step 3: Transform each column using the unified vocabulary

tfidf_vectors_all = {col:
tfidf_vectorizer_all.transform(processed_data[col].astype(str)) for col
in processed_data.columns}

# Step 4: Compute Pairwise Cosine Similarity for All Labels

similarity_matrix_all = np.zeros((len(processed_data.columns),
len(processed_data.columns)))

for i, col1 in enumerate(processed_data.columns):

for j, col2 in enumerate(processed_data.columns):
if i == j: # Self-similarity
similarity_matrix_all[i, j] = 1.0
else: # Pairwise similarity
similarity_matrix_all[i, j] = cosine_similarity(
tfidf_vectors_all[col1], tfidf_vectors_all[col2]
).mean()

# Step 5: Visualize the Similarity Matrix for All Labels

plt.figure(figsize=(12, 10))
sns.heatmap(
similarity_matrix_all,
xticklabels=processed_data.columns,
yticklabels=processed_data.columns,
cmap='coolwarm',
annot=True,
fmt=".2f",
annot_kws={"size": 10}, # Customize annotation font size
cbar_kws={"shrink": 0.8, "label": "Similarity Score"} # Color bar
customization
)
plt.title("Text Similarity Between All Labels (Cosine Similarity)",
fontsize=16)
plt.xlabel("Labels", fontsize=12)
plt.ylabel("Labels", fontsize=12)
plt.xticks(rotation=45, ha='right', fontsize=10) # Rotate x-axis
labels for better readability
plt.yticks(fontsize=10)
plt.tight_layout()
plt.show()

# Step 6: Identify the Most and Least Similar Pairs Across All Labels
similarity_df_all = pd.DataFrame(
similarity_matrix_all,
index=processed_data.columns,
columns=processed_data.columns
)

# Melt the matrix for pairwise comparison

similarity_melted_all = similarity_df_all.reset_index().melt(
id_vars='index',
var_name='Label 2',
value_name='Similarity'
).rename(columns={'index': 'Label 1'})

# Remove self-similarity (diagonal values)

similarity_melted_all =
similarity_melted_all[similarity_melted_all['Label 1'] !=
similarity_melted_all['Label 2']]

# Sort for most and least similar pairs

most_similar_all = similarity_melted_all.sort_values(by='Similarity',
ascending=False).head(1)
least_similar_all = similarity_melted_all.sort_values(by='Similarity',
ascending=True).head(1)

# Output results
print("Most Similar Pair:")
print(most_similar_all)

print("\nLeast Similar Pair:")

print(least_similar_all)
# 17
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay,
accuracy_score
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import Dataset, DataLoader
import torch
from torch.optim import AdamW
from tqdm import tqdm
import matplotlib.pyplot as plt

# Step 1: Load and Preprocess Dataset

file_path = '/content/Nri_Textual_Survey_Data.csv' # Replace with your
dataset path
data = pd.read_csv(file_path)

# Preprocess text
def preprocess_text_dl(text_dl):
"""Clean text."""
text_dl = str(text_dl).lower()
text_dl = re.sub(r'[^\w\s]', '', text_dl) # Remove punctuation
text_dl = re.sub(r'\s+', ' ', text_dl).strip() # Remove extra
spaces
return text_dl

# Apply preprocessing to text column dynamically

text_column = data.columns[0] # Dynamically use the first column as
text
data[text_column] = data[text_column].apply(preprocess_text_dl)

# Analyze sentiment dynamically

def analyze_sentiment(text_dl):
"""Classify sentiment using polarity."""
from textblob import TextBlob
try:
blob = TextBlob(text_dl)
polarity = blob.sentiment.polarity
if polarity > 0:
return 0 # Happy
elif polarity == 0:
return 1 # Neutral
else:
return 2 # Unhappy
except:
return 1

data['label'] = data[text_column].apply(analyze_sentiment)

# Step 2: Split Data

train_texts, test_texts, train_labels, test_labels = train_test_split(
data[text_column], data['label'], test_size=0.2, random_state=42
)

# Step 3: Tokenize Using BERT

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
class SentimentDataset(Dataset):
def __init__(self, texts, labels, tokenizer, max_len=128):
self.texts = texts
self.labels = labels
self.tokenizer = tokenizer
self.max_len = max_len

def __len__(self):
return len(self.texts)

def getitem(self, idx):

text = self.texts.iloc[idx]
label = self.labels.iloc[idx]
encoding = self.tokenizer(
text,
truncation=True,
padding='max_length',
max_length=self.max_len,
return_tensors="pt"
)
return {
'input_ids': encoding['input_ids'].squeeze(0),
'attention_mask': encoding['attention_mask'].squeeze(0),
'labels': torch.tensor(label, dtype=torch.long)
}

train_dataset = SentimentDataset(train_texts, train_labels, tokenizer)

test_dataset = SentimentDataset(test_texts, test_labels, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

test_loader = DataLoader(test_dataset, batch_size=16)

# Step 4: Define BERT Model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained('bert-base-
uncased', num_labels=3)
model.to(device)

# Step 5: Train Model

optimizer = AdamW(model.parameters(), lr=5e-5)
epochs = 1
model.train()

for epoch in range(epochs):

total_loss = 0
for batch in tqdm(train_loader, desc=f"Training Epoch {epoch +
1}"):
optimizer.zero_grad()
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)

outputs = model(input_ids, attention_mask=attention_mask,

labels=labels)
loss = outputs.loss
total_loss += loss.item()
loss.backward()
optimizer.step()
print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}")

# Step 6: Evaluate Model

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
for batch in tqdm(test_loader, desc="Evaluating"):
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)

outputs = model(input_ids, attention_mask=attention_mask)

preds = torch.argmax(outputs.logits, axis=1)

all_preds.extend(preds.cpu().numpy())
all_labels.extend(labels.cpu().numpy())

# Calculate Accuracy
accuracy = accuracy_score(all_labels, all_preds)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds, labels=[0, 1, 2])
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
display_labels=["Happy", "Neutral", "Unhappy"])
disp.plot(cmap="Blues")
plt.title("Confusion Matrix - Sentiment Analysis")
plt.show()

Code
No ratings yet
Code
18 pages
Sma Exp 10 Code Print
No ratings yet
Sma Exp 10 Code Print
7 pages
Q 3
No ratings yet
Q 3
2 pages
Topic Classifierby David Caleb
No ratings yet
Topic Classifierby David Caleb
7 pages
AIML IA3 Loki & SG
No ratings yet
AIML IA3 Loki & SG
31 pages
Sentimental Analysis
No ratings yet
Sentimental Analysis
3 pages
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
No ratings yet
NLP Transformer-Based Models Used For Sentiment Analysis: 1. BERT
98 pages
Foundations of Python For AI
No ratings yet
Foundations of Python For AI
67 pages
Machine Learning Code Explanation
No ratings yet
Machine Learning Code Explanation
33 pages
Adithiyaa BR 23MBA0018 SMA DA Text Mining PDF
No ratings yet
Adithiyaa BR 23MBA0018 SMA DA Text Mining PDF
6 pages
Sentiment Analysis with NLTK
No ratings yet
Sentiment Analysis with NLTK
4 pages
Python CA 4
No ratings yet
Python CA 4
9 pages
Social Media Sentimental Analysis 1
No ratings yet
Social Media Sentimental Analysis 1
30 pages
Bert Sentiment
No ratings yet
Bert Sentiment
7 pages
Ex 2
No ratings yet
Ex 2
5 pages
Cyberbullying Code
No ratings yet
Cyberbullying Code
6 pages
Sma Exp 03 Code Print
No ratings yet
Sma Exp 03 Code Print
5 pages
Part C - Assignment No. 2 Mini-Project On Twitter
No ratings yet
Part C - Assignment No. 2 Mini-Project On Twitter
7 pages
WDM - Week - I
No ratings yet
WDM - Week - I
24 pages
Problem Statement
No ratings yet
Problem Statement
10 pages
Sma 3
No ratings yet
Sma 3
3 pages
DS - Lab Report.
No ratings yet
DS - Lab Report.
25 pages
Transformer Models for Sentiment Analysis
No ratings yet
Transformer Models for Sentiment Analysis
45 pages
17 - Source Code - nlp-2-5
No ratings yet
17 - Source Code - nlp-2-5
4 pages
Amazon Review Sentiment Analysis
No ratings yet
Amazon Review Sentiment Analysis
4 pages
Shreya Srivastava-27
No ratings yet
Shreya Srivastava-27
3 pages
Sentimental
No ratings yet
Sentimental
11 pages
Efficient Python Tricks and Tools For Data Scientists - by Khuyen Tran
No ratings yet
Efficient Python Tricks and Tools For Data Scientists - by Khuyen Tran
20 pages
2023 Aug How To Prepare Data For A Neural Network A Step-by-Step Guide
No ratings yet
2023 Aug How To Prepare Data For A Neural Network A Step-by-Step Guide
7 pages
Ke Record 2k21
No ratings yet
Ke Record 2k21
48 pages
LabAssignment 03ai
No ratings yet
LabAssignment 03ai
7 pages
Sentiment Analysis
No ratings yet
Sentiment Analysis
5 pages
Class Xii PDF For Practical
No ratings yet
Class Xii PDF For Practical
24 pages
DSBA+Master+Codebook+ +Text+Mining+&+TSF
No ratings yet
DSBA+Master+Codebook+ +Text+Mining+&+TSF
11 pages
British Airways Forage Report
No ratings yet
British Airways Forage Report
12 pages
Sentiment Analysis for Tweets
No ratings yet
Sentiment Analysis for Tweets
11 pages
NLP Sentimental Analysis 1736351356
No ratings yet
NLP Sentimental Analysis 1736351356
32 pages
Twitter Sentiment Analysis Dss
No ratings yet
Twitter Sentiment Analysis Dss
14 pages
AI Lab Report BIM
No ratings yet
AI Lab Report BIM
34 pages
2023 Aug How To Produce Data For A Neural networkORG
No ratings yet
2023 Aug How To Produce Data For A Neural networkORG
6 pages
Emotion Classification With DistilBERT
No ratings yet
Emotion Classification With DistilBERT
25 pages
Manual
No ratings yet
Manual
48 pages
Report On - Social Media Research Topic Modeling
No ratings yet
Report On - Social Media Research Topic Modeling
26 pages
Ds File
No ratings yet
Ds File
58 pages
121a1114 D2 Sma Exp3
No ratings yet
121a1114 D2 Sma Exp3
9 pages
Gokul
No ratings yet
Gokul
10 pages
Aiml 5-8
No ratings yet
Aiml 5-8
19 pages
7 Aiml
No ratings yet
7 Aiml
4 pages
Experiment 1
No ratings yet
Experiment 1
19 pages
Dataset Description: Amazon Reviews of Unlocked Phone
No ratings yet
Dataset Description: Amazon Reviews of Unlocked Phone
4 pages
Sentiment Analysis With NLP Deep Learning
No ratings yet
Sentiment Analysis With NLP Deep Learning
8 pages
ML Week10.1
No ratings yet
ML Week10.1
5 pages
R002 KrishAhuja BDA Lab9.Ipynb - Colab
No ratings yet
R002 KrishAhuja BDA Lab9.Ipynb - Colab
3 pages
Kindle Review Sentiment Analysis - Ipynb - Colab
No ratings yet
Kindle Review Sentiment Analysis - Ipynb - Colab
5 pages
Code Shabab Error 7
No ratings yet
Code Shabab Error 7
5 pages
Sentiment Analysis Project Documentation
No ratings yet
Sentiment Analysis Project Documentation
2 pages
Black and White Blank Note Document
No ratings yet
Black and White Blank Note Document
57 pages
Software Project Management: Aamir Anwar Lecturer Computer Science SZABIST, Islamabad
100% (1)
Software Project Management: Aamir Anwar Lecturer Computer Science SZABIST, Islamabad
25 pages
TCS NQT Resources
No ratings yet
TCS NQT Resources
4 pages
19427-Maly - Manua - L 2023 Tyco Fv411f Bez Kamery Plamenny - Hla - Sic
No ratings yet
19427-Maly - Manua - L 2023 Tyco Fv411f Bez Kamery Plamenny - Hla - Sic
10 pages
Může Robot Napsat Divadelní Hru? Can A Robot Write A Theatre Play?
No ratings yet
Může Robot Napsat Divadelní Hru? Can A Robot Write A Theatre Play?
12 pages
Modbus For Grundfos Pumps: Functional Profile and User Manual
No ratings yet
Modbus For Grundfos Pumps: Functional Profile and User Manual
54 pages
Shri Saibaba Sansthan Trust, Shirdi (Offical Booking Portal)
No ratings yet
Shri Saibaba Sansthan Trust, Shirdi (Offical Booking Portal)
1 page
OCS351 AIML Unit 2
No ratings yet
OCS351 AIML Unit 2
37 pages
Social Media The Convergence of Public and Personal Communication 2nd Edition Graham Meikle Download
No ratings yet
Social Media The Convergence of Public and Personal Communication 2nd Edition Graham Meikle Download
52 pages
ARPANET's Origins and Evolution
No ratings yet
ARPANET's Origins and Evolution
20 pages
Primer 6 Permanova REUPLOAD Serial Key PDF
No ratings yet
Primer 6 Permanova REUPLOAD Serial Key PDF
3 pages
Free MS Office 2013 Keys
100% (1)
Free MS Office 2013 Keys
3 pages
Kubernetes Notes
No ratings yet
Kubernetes Notes
17 pages
Interview Questions For P6
No ratings yet
Interview Questions For P6
5 pages
Holistic Exam
No ratings yet
Holistic Exam
1 page
PhonePe Statement Jun2025 Jul2025
No ratings yet
PhonePe Statement Jun2025 Jul2025
3 pages
Typing Ergonomics
100% (1)
Typing Ergonomics
29 pages
TLE10 - (CSS) - Q3 - CLAS4 - Following - 5S-AND-3Rs-Environmental-Policies - v2 (FOR QA) - RHEA ROMERO PDF
100% (2)
TLE10 - (CSS) - Q3 - CLAS4 - Following - 5S-AND-3Rs-Environmental-Policies - v2 (FOR QA) - RHEA ROMERO PDF
14 pages
Materiels XR
No ratings yet
Materiels XR
5 pages
Geberit Pluvia Siphonic Roof Drainage System. Stands Up To Every Kind of Rainfall
No ratings yet
Geberit Pluvia Siphonic Roof Drainage System. Stands Up To Every Kind of Rainfall
6 pages
Textual Lyrics Based Emotion Analysis of Bengali Songs
No ratings yet
Textual Lyrics Based Emotion Analysis of Bengali Songs
6 pages
Blum Catalogue 2022-2023 EN - V4
No ratings yet
Blum Catalogue 2022-2023 EN - V4
758 pages
STG Android Admin Guide
No ratings yet
STG Android Admin Guide
22 pages
Exam 220-1101 (Core 1)
100% (2)
Exam 220-1101 (Core 1)
252 pages
Gen Ed 1 and 2
No ratings yet
Gen Ed 1 and 2
84 pages
Teaching Presentation Guide
No ratings yet
Teaching Presentation Guide
23 pages
CIS1000 Digital Disruption Course
No ratings yet
CIS1000 Digital Disruption Course
16 pages
Implementation and Validation of NSGA-II Algorithm For Constrained and Unconstrained Multi-Objective Optimization Problem
No ratings yet
Implementation and Validation of NSGA-II Algorithm For Constrained and Unconstrained Multi-Objective Optimization Problem
7 pages
HTML Tags Reference Guide
No ratings yet
HTML Tags Reference Guide
12 pages
PowerDash-Logistics Diagnostics Tool
No ratings yet
PowerDash-Logistics Diagnostics Tool
15 pages
Homework Difference Quotient Practice
100% (1)
Homework Difference Quotient Practice
6 pages

Code

Uploaded by

Code

Uploaded by

import pandas as pd

from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

# Update preprocessing function to include stop word removal

# Remove stop words

# Apply updated preprocessing

# Download the file

# Add sentiment columns for each facility

# Overall sentiment counts

# Assuming 'processed_data' contains the preprocessed text data

# Create BoW DataFrame

# Create TF-IDF DataFrame

# Merge BoW and TF-IDF for comparison

# Sort by Bag-of-Words Frequency for consistency

# Plot the comparison graph

# Bar width for side-by-side bars

# Add labels and title

# Target variable based on the dynamic column name

# Split for BoW features

# Split for TF-IDF features

from sklearn.linear_model import LogisticRegression

# Train models on BoW features

# Dynamically calculate model accuracies

# Plotting model accuracy comparison

# Add labels and title

# Dynamically calculate metrics for a specific model (e.g., Logistic

# Plot grouped bar chart

# Add labels and title

# Predictions for BoW features

# Predictions for TF-IDF features

cm_log_reg_tfidf = confusion_matrix(y_test, y_pred_log_reg_tfidf)

# Plotting all confusion matrices

# Titles for the matrices

# All confusion matrices

# Plotting each heatmap

from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Use the already defined `processed_data` from your script.

# Step 2: Combine text from all columns to build a unified vocabulary

# Fit the TF-IDF vectorizer on the combined text

# Step 3: Transform each column using the unified vocabulary

# Step 4: Compute Pairwise Cosine Similarity for All Labels

for i, col1 in enumerate(processed_data.columns):

# Step 5: Visualize the Similarity Matrix for All Labels

# Melt the matrix for pairwise comparison

# Remove self-similarity (diagonal values)

# Sort for most and least similar pairs

print("\nLeast Similar Pair:")

# Step 1: Load and Preprocess Dataset

# Apply preprocessing to text column dynamically

# Analyze sentiment dynamically

# Step 2: Split Data

# Step 3: Tokenize Using BERT

def __getitem__(self, idx):

train_dataset = SentimentDataset(train_texts, train_labels, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Step 4: Define BERT Model

# Step 5: Train Model

for epoch in range(epochs):

outputs = model(input_ids, attention_mask=attention_mask,

# Step 6: Evaluate Model

outputs = model(input_ids, attention_mask=attention_mask)

You might also like

def getitem(self, idx):