0% found this document useful (0 votes)

42 views13 pages

Natural Language Processing Lab 9

The document outlines a lab assignment focused on Natural Language Processing, requiring the implementation of text pre-processing on the Brown and Gutenberg corpora. It includes tasks such as tokenization, counting unique and rare words, and stop-word identification, followed by text summarization, POS tagging, and NER modeling using various neural network architectures like Feed Forward, RNN, LSTM, and Transformers. The document provides code snippets for each task, detailing the necessary libraries and methods for processing the text data.

Uploaded by

ragebhanukiran

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

42 views13 pages

Natural Language Processing Lab 9

Uploaded by

ragebhanukiran

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 13

Natural Language Processing

Lab Assignment

R.BhanuKiran
22BCE9560
L45+L46
1. Implement Text pre-processing on Brown corpus &
Gutenberg corpus and display the list of tokens(count), list
of sentences(count), count of paragraphs, list & count of
unique words, list & count of rare words, list & count of
stop-words separately for each corpus.
2. import nltk
3. from nltk.corpus import brown, gutenberg, stopwords
4. from nltk.tokenize import word_tokenize, sent_tokenize
5. from collections import Counter
6. import string
7. nltk.download('punkt_tab')
8. nltk.download('brown')
9. nltk.download('gutenberg')
10. nltk.download('punkt')
11. nltk.download('stopwords')
12. stop_words = set(stopwords.words('english'))
13. punctuations = set(string.punctuation)
14. def preprocess_and_analyze_corpus(corpus,
corpus_name="Corpus"):
15. print(f"\n=== 📘 Analyzing {corpus_name} ===")
16. if corpus_name == "Brown":
17. raw_text = " ".join(brown.words())
18. fileids = brown.fileids()
19. elif corpus_name == "Gutenberg":
20. raw_text = " ".join(gutenberg.words())
21. fileids = gutenberg.fileids()
22. else:
23. return
24. tokens = word_tokenize(raw_text)
25. sents = sent_tokenize(raw_text)
26. cleaned_tokens = [
27. token.lower() for token in tokens
28. if token.lower() not in stop_words and token not in
punctuations and token.isalpha()
29. ]b
30. token_counts = Counter(cleaned_tokens)
31. unique_words = list(token_counts.keys())
32. rare_words = [word for word, count in
token_counts.items() if count == 1]
33. stopword_list = [token for token in tokens if
token.lower() in stop_words]
34. print(f"Total Tokens: {len(tokens)}")
35. print(f"Sample Tokens: {tokens[:10]}")
36. print(f"Total Sentences: {len(sents)}")
37. print(f"Sample Sentences: {sents[:2]}")
38. print(f"Total Paragraphs (FileIDs): {len(fileids)}")
39. print(f"FileIDs (Used as Paragraphs): {fileids[:5]}")
40. print(f"Unique Words Count: {len(unique_words)}")
41. print(f"Unique Words Sample: {unique_words[:10]}")
42. print(f"Rare Words Count: {len(rare_words)}")
43. print(f"Rare Words Sample: {rare_words[:10]}")
44. print(f"Stopwords Count: {len(stopword_list)}")
45. print(f"Stopwords Sample: {stopword_list[:10]}")
46. print("\n" + "-"*60)
47. preprocess_and_analyze_corpus(brown, corpus_name="Brown")
48. preprocess_and_analyze_corpus(gutenberg,
corpus_name="Gutenberg")
49.
OUTPUT:

2. Perform Text summarization, POS

Tagging and NER modelling on the post-processed corpus by the following neural
networks.

 1.Feed forward Neural network

 import torch
 import torch.nn as nn
 import torch.optim as optim
 from collections import OrderedDict

 data = [
 (["I", "love", "coding"], ["PRON", "VERB", "NOUN"]),
 (["She", "writes", "code"], ["PRON", "VERB", "NOUN"]),
 (["They", "play", "football"], ["PRON", "VERB", "NOUN"]),
 ]

 words = sorted(set(word for sent, _ in data for word in sent))
 tags = sorted(set(tag for _, t in data for tag in t))

 word2idx = {word: i for i, word in enumerate(words)}
 tag2idx = {tag: i for i, tag in enumerate(tags)}
 idx2tag = {i: tag for tag, i in tag2idx.items()}

 X = []
 y = []

 for sent, tag_seq in data:
 for word, tag in zip(sent, tag_seq):
 X.append(word2idx[word])
 y.append(tag2idx[tag])

 X = torch.tensor(X)
 y = torch.tensor(y)

 class FFNN_POS(nn.Module):
 def __init__(self, vocab_size, tagset_size, emb_dim=32,
hidden_dim=64):
 super(FFNN_POS, self).__init__()
 self.embedding = nn.Embedding(vocab_size, emb_dim)
 self.fc1 = nn.Linear(emb_dim, hidden_dim)
 self.relu = nn.ReLU()
 self.fc2 = nn.Linear(hidden_dim, tagset_size)

 def forward(self, x):
 x = self.embedding(x)
 x = self.relu(self.fc1(x))
 x = self.fc2(x)
 return x

 model = FFNN_POS(len(word2idx), len(tag2idx))
 criterion = nn.CrossEntropyLoss()
 optimizer = optim.Adam(model.parameters(), lr=0.01)

 # Training
 for epoch in range(100):
 optimizer.zero_grad()
 outputs = model(X)
 loss = criterion(outputs, y)
 loss.backward()
 optimizer.step()

 # Testing
 model.eval()
 with torch.no_grad():
 test_word = "love"
 test_input = torch.tensor([word2idx[test_word]]) # shape:
[1]
 pred = model(test_input)
 predicted_tag = idx2tag[torch.argmax(pred).item()]
 print(f"Prediction for '{test_word}': {predicted_tag}")

OUTPUT:

 2.Recurrent Neural Networks

 import torch
 import torch.nn as nn
 import torch.optim as optim

 data = [
 (["I", "love", "coding"], ["PRON", "VERB", "NOUN"]),
 (["She", "writes", "code"], ["PRON", "VERB", "NOUN"]),
 (["They", "play", "football"], ["PRON", "VERB", "NOUN"]),
 ]

 word_set = set(word for sentence, _ in data for word in sentence)
 tag_set = set(tag for _, tags in data for tag in tags)

 word2idx = {word: i + 1 for i, word in enumerate(word_set)}
 word2idx["<PAD>"] = 0
 tag2idx = {tag: i for i, tag in enumerate(tag_set)}
 idx2tag = {i: tag for tag, i in tag2idx.items()}

 EMBEDDING_DIM = 32
 HIDDEN_DIM = 64
 EPOCHS = 100

 def encode_sentence(sentence, tag_seq, max_len):
 word_ids = [word2idx[word] for word in sentence]
 tag_ids = [tag2idx[tag] for tag in tag_seq]

 # Padding
 while len(word_ids) < max_len:
 word_ids.append(word2idx["<PAD>"])
 tag_ids.append(-1)

 return word_ids, tag_ids

 max_len = max(len(s) for s, _ in data)
 X, y = zip(*[encode_sentence(s, t, max_len) for s, t in data])
 X = torch.tensor(X)
 y = torch.tensor(y)

 class RNN_POS(nn.Module):
 def __init__(self, vocab_size, tagset_size, emb_dim,
hidden_dim):
 super(RNN_POS, self).__init__()
 self.embedding = nn.Embedding(vocab_size, emb_dim,
padding_idx=0)
 self.rnn = nn.RNN(emb_dim, hidden_dim, batch_first=True)
 self.fc = nn.Linear(hidden_dim, tagset_size)

 def forward(self, x):
 x = self.embedding(x)
 output, _ = self.rnn(x)
 output = self.fc(output)
 return output

 model = RNN_POS(len(word2idx), len(tag2idx), EMBEDDING_DIM,
HIDDEN_DIM)
 criterion = nn.CrossEntropyLoss(ignore_index=-1)
 optimizer = optim.Adam(model.parameters(), lr=0.01)

 for epoch in range(EPOCHS):
 model.train()
 optimizer.zero_grad()
 output = model(X) # (batch, seq_len, tagset_size)
 output = output.view(-1, output.shape[-1])
 y_flat = y.view(-1)
 loss = criterion(output, y_flat)
 loss.backward()
 optimizer.step()

 def predict(sentence):
 model.eval()
 tokens = [word2idx.get(word, 0) for word in sentence]
 while len(tokens) < max_len:
 tokens.append(0)
 input_tensor = torch.tensor([tokens])
 with torch.no_grad():
 predictions = model(input_tensor)
 pred_tags = torch.argmax(predictions, dim=2)[0]
 return [idx2tag[idx.item()] for idx in
pred_tags[:len(sentence)]]

 test_sentence = ["She", "plays", "football"]
 print("Sentence:", test_sentence)
 print("Predicted POS:", predict(test_sentence))


OUTPUT:
 3.Long Short Term Memory(LSTM’s)
 import torch
 import torch.nn as nn
 import torch.optim as optim

 data = [
 (["I", "love", "coding"], ["PRON", "VERB", "NOUN"]),
 (["She", "writes", "code"], ["PRON", "VERB", "NOUN"]),
 (["They", "play", "football"], ["PRON", "VERB", "NOUN"]),
 ]

 word_set = set(word for sentence, _ in data for word in sentence)
 tag_set = set(tag for _, tags in data for tag in tags)

 word2idx = {word: i + 1 for i, word in enumerate(word_set)} # +1
for padding
 word2idx["<PAD>"] = 0
 tag2idx = {tag: i for i, tag in enumerate(tag_set)}
 idx2tag = {i: tag for tag, i in tag2idx.items()}

 EMBEDDING_DIM = 32
 HIDDEN_DIM = 64
 EPOCHS = 100

 def encode_sentence(sentence, tag_seq, max_len):
 word_ids = [word2idx[word] for word in sentence]
 tag_ids = [tag2idx[tag] for tag in tag_seq]

 while len(word_ids) < max_len:
 word_ids.append(word2idx["<PAD>"])
 tag_ids.append(-1) # Ignore index for padding

 return word_ids, tag_ids

 max_len = max(len(s) for s, _ in data)
 X, y = zip(*[encode_sentence(s, t, max_len) for s, t in data])
 X = torch.tensor(X)
 y = torch.tensor(y)

 class LSTM_POS(nn.Module):
 def __init__(self, vocab_size, tagset_size, emb_dim,
hidden_dim):
 super(LSTM_POS, self).__init__()
 self.embedding = nn.Embedding(vocab_size, emb_dim,
padding_idx=0)
 self.lstm = nn.LSTM(emb_dim, hidden_dim,
batch_first=True)
 self.fc = nn.Linear(hidden_dim, tagset_size)

 def forward(self, x):
 x = self.embedding(x)
 lstm_out, _ = self.lstm(x)
 out = self.fc(lstm_out)
 return out

 model = LSTM_POS(len(word2idx), len(tag2idx), EMBEDDING_DIM,
HIDDEN_DIM)
 criterion = nn.CrossEntropyLoss(ignore_index=-1)
 optimizer = optim.Adam(model.parameters(), lr=0.01)

 for epoch in range(EPOCHS):
 model.train()
 optimizer.zero_grad()
 output = model(X) # (batch, seq_len, tagset_size)
 output = output.view(-1, output.shape[-1])
 y_flat = y.view(-1)
 loss = criterion(output, y_flat)
 loss.backward()
 optimizer.step()

 def predict(sentence):
 model.eval()
 tokens = [word2idx.get(word, 0) for word in sentence]
 while len(tokens) < max_len:
 tokens.append(0)
 input_tensor = torch.tensor([tokens])
 with torch.no_grad():
 predictions = model(input_tensor)
 pred_tags = torch.argmax(predictions, dim=2)[0]
 return [idx2tag[idx.item()] for idx in
pred_tags[:len(sentence)]]

 test_sentence = ["I", "play", "football"]
 print("Sentence:", test_sentence)
 print("Predicted POS:", predict(test_sentence))


OUTPUT:

 4.Any Transformer using encoder

architecture
 # 2.4
 import torch
 import torch.nn as nn
 import torch.optim as optim
 import math

 # Sample data
 data = [
 (["I", "love", "coding"], ["PRON", "VERB", "NOUN"]),
 (["She", "writes", "code"], ["PRON", "VERB", "NOUN"]),
 (["They", "play", "football"], ["PRON", "VERB", "NOUN"]),
 ]

 word_set = set(w for sent, _ in data for w in sent)
 tag_set = set(tag for _, tags in data for tag in tags)

 word2idx = {w: i + 1 for i, w in enumerate(word_set)}
 word2idx["<PAD>"] = 0
 tag2idx = {t: i for i, t in enumerate(tag_set)}
 idx2tag = {i: t for t, i in tag2idx.items()}

 class PositionalEncoding(nn.Module):
 def __init__(self, d_model, max_len=512):
 super(PositionalEncoding, self).__init__()
 pe = torch.zeros(max_len, d_model).float()
 position = torch.arange(0, max_len).unsqueeze(1).float()
 div_term = torch.exp(torch.arange(0, d_model, 2).float()
* (-math.log(10000.0) / d_model))
 pe[:, 0::2] = torch.sin(position * div_term)
 pe[:, 1::2] = torch.cos(position * div_term)
 self.pe = pe.unsqueeze(0)

 def forward(self, x):
 return x + self.pe[:, :x.size(1)].to(x.device)

 def encode(sentence, tags, max_len):
 word_ids = [word2idx[w] for w in sentence]
 tag_ids = [tag2idx[t] for t in tags]
 while len(word_ids) < max_len:
 word_ids.append(word2idx["<PAD>"])
 tag_ids.append(-1)
 return word_ids, tag_ids

 max_len = max(len(s) for s, _ in data)
 X, y = zip(*[encode(s, t, max_len) for s, t in data])
 X = torch.tensor(X)
 y = torch.tensor(y)

 class TransformerPOSTagger(nn.Module):
 def __init__(self, vocab_size, tagset_size, emb_dim=64,
num_heads=2, num_layers=2, ff_dim=128):
 super().__init__()
 self.embedding = nn.Embedding(vocab_size, emb_dim,
padding_idx=0)
 self.positional_encoding = PositionalEncoding(emb_dim)
 encoder_layer =
nn.TransformerEncoderLayer(d_model=emb_dim, nhead=num_heads,
dim_feedforward=ff_dim)
 self.transformer_encoder =
nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
 self.fc = nn.Linear(emb_dim, tagset_size)

 def forward(self, x):
 x = self.embedding(x) # (batch_size, seq_len, emb_dim)
 x = self.positional_encoding(x)
 x = x.permute(1, 0, 2) # Transformer expects (seq_len,
batch_size, emb_dim)
 x = self.transformer_encoder(x)
 x = x.permute(1, 0, 2) # Back to (batch_size, seq_len,
emb_dim)
 return self.fc(x)

 model = TransformerPOSTagger(len(word2idx), len(tag2idx))
 criterion = nn.CrossEntropyLoss(ignore_index=-1)
 optimizer = optim.Adam(model.parameters(), lr=0.001)

 for epoch in range(100):
 model.train()
 optimizer.zero_grad()
 outputs = model(X)
 outputs = outputs.view(-1, outputs.shape[-1])
 loss = criterion(outputs, y.view(-1))
 loss.backward()
 optimizer.step()
 if epoch % 10 == 0:
 print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

 # Inference
 def predict(sentence):
 model.eval()
 tokens = [word2idx.get(w, 0) for w in sentence]
 while len(tokens) < max_len:
 tokens.append(0)
 input_tensor = torch.tensor([tokens])
 with torch.no_grad():
 out = model(input_tensor)
 pred = torch.argmax(out, dim=-1)[0]
 return [idx2tag[i.item()] for i in pred[:len(sentence)]]

 # Test
 test_sentence = ["I", "write", "code"]
 print("Sentence:", test_sentence)
 print("Predicted POS:", predict(test_sentence))


OUTPUT:

3.Implement Text
classification(sensitive data or normal data) using a linear SVM algorithm based on
a sensitive program from week 7/8.

import nltk
import re
from nltk.corpus import brown
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
from tabulate import tabulate

nltk.download('brown')
nltk.download('punkt')

corpus_sentences = [" ".join(sentence) for sentence in brown.sents()

[:2000]]

sensitive_keywords = {
"personal": ["name", "email", "address", "dob", "birth", "phone",
"gender"],
"financial": ["credit", "debit", "card", "account", "balance",
"bank", "income", "salary"],
"social": ["facebook", "twitter", "instagram", "friends", "social",
"media", "relationship"]
}

category_score = {
"personal": 5,
"financial": 4,
"social": 3
}

def classify_sensitivity(text):
text = text.lower()
max_sensitivity = 0
matched_words = []

for category, keywords in sensitive_keywords.items():

for keyword in keywords:
if keyword in text:
matched_words.append((keyword,
category_score[category]))
max_sensitivity = max(max_sensitivity,
category_score[category])

classification = "Sensitive" if max_sensitivity > 0 else "Normal"

return classification, matched_words

processed_sentences = []
labels = []
matched_keywords = []

for sentence in corpus_sentences:

label, found = classify_sensitivity(sentence)
processed_sentences.append(sentence)
labels.append(label)
matched_keywords.append(found)

sample_data = []
for i in range(5):
sample_data.append({
"Sentence": processed_sentences[i],
"Label": labels[i],
"Sensitive Terms": ", ".join([f"{term}({score})" for term,
score in matched_keywords[i]]) if matched_keywords[i] else "-"
})

sample_df = pd.DataFrame(sample_data)
print("\n📌 Sample Sensitivity Analysis:\n")
print(tabulate(sample_df, headers='keys', tablefmt='grid',
showindex=True))

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(processed_sentences)
y = [1 if label == "Sensitive" else 0 for label in labels] # Convert
to binary labels

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.2, random_state=42)

svm_classifier = LinearSVC()
svm_classifier.fit(X_train, y_train)

y_pred = svm_classifier.predict(X_test)
report = classification_report(y_test, y_pred, target_names=["Normal",
"Sensitive"], output_dict=True)
report_df = pd.DataFrame(report).transpose()

print("\n📊 Model Evaluation Report:\n")

print(tabulate(report_df, headers='keys', tablefmt='grid',
floatfmt=".2f"))

OUTPUT:

Next Word Prediction With NLP and Deep Learning
No ratings yet
Next Word Prediction With NLP and Deep Learning
13 pages
Cs 224N: Assignment #4: 1. Neural Machine Translation With Rnns (45 Points)
No ratings yet
Cs 224N: Assignment #4: 1. Neural Machine Translation With Rnns (45 Points)
10 pages
NLP Assignment 2
No ratings yet
NLP Assignment 2
3 pages
RNN Text Generation
No ratings yet
RNN Text Generation
3 pages
Neural Machine Translation Assignment
No ratings yet
Neural Machine Translation Assignment
11 pages
Assingment-3 NLP
No ratings yet
Assingment-3 NLP
5 pages
Chap 7.1 Sequence Analysis Using FFN
No ratings yet
Chap 7.1 Sequence Analysis Using FFN
47 pages
Keras NLP Encoding and Sentiment Analysis
No ratings yet
Keras NLP Encoding and Sentiment Analysis
8 pages
DAA FinalReport
No ratings yet
DAA FinalReport
14 pages
Assignment 9
No ratings yet
Assignment 9
4 pages
Deep DL Manual Nainish
No ratings yet
Deep DL Manual Nainish
8 pages
NLP - Cheatsheet
No ratings yet
NLP - Cheatsheet
10 pages
Pgi20s02j - Lab Record
No ratings yet
Pgi20s02j - Lab Record
24 pages
POS Tagging with Seq2Seq Model
No ratings yet
POS Tagging with Seq2Seq Model
4 pages
Unit 5b - Natural Language Processing
No ratings yet
Unit 5b - Natural Language Processing
41 pages
AI Lab6
No ratings yet
AI Lab6
22 pages
NLP
No ratings yet
NLP
15 pages
NLTK Tokenization & Stemming Guide
No ratings yet
NLTK Tokenization & Stemming Guide
8 pages
Exp No 5
No ratings yet
Exp No 5
5 pages
Deep Learning PGM 1
No ratings yet
Deep Learning PGM 1
6 pages
Homework 2
No ratings yet
Homework 2
4 pages
Neural Translation Model (Capstone Project)
No ratings yet
Neural Translation Model (Capstone Project)
20 pages
DL Lab Answers Batch 2
No ratings yet
DL Lab Answers Batch 2
27 pages
22BCE9752 NLPDigital Assignment 02
No ratings yet
22BCE9752 NLPDigital Assignment 02
21 pages
NLP Final Review
No ratings yet
NLP Final Review
32 pages
Research Paper Summarization
No ratings yet
Research Paper Summarization
13 pages
CSE 3652 Lab Record Format - PDF
No ratings yet
CSE 3652 Lab Record Format - PDF
13 pages
Neural Machine Translation Project
No ratings yet
Neural Machine Translation Project
2 pages
Assignment 1
No ratings yet
Assignment 1
7 pages
Polynomial Expansion Paper
No ratings yet
Polynomial Expansion Paper
4 pages
NLP Lab1
No ratings yet
NLP Lab1
6 pages
NLP Lab Assignment - 05
No ratings yet
NLP Lab Assignment - 05
6 pages
NLP Smitpatel
No ratings yet
NLP Smitpatel
32 pages
Taask
No ratings yet
Taask
18 pages
NLP - Practical List
No ratings yet
NLP - Practical List
14 pages
Parts of Speech-Deep Learning
No ratings yet
Parts of Speech-Deep Learning
3 pages
4 Week Report
No ratings yet
4 Week Report
1 page
Medical Text Classifier GabrieldeOlaguibel
No ratings yet
Medical Text Classifier GabrieldeOlaguibel
12 pages
CS 224n: NMT Assignment Guide
No ratings yet
CS 224n: NMT Assignment Guide
7 pages
NLP Lab Manual
No ratings yet
NLP Lab Manual
21 pages
Internship Generative AI Task
No ratings yet
Internship Generative AI Task
3 pages
A Quick Recap: Artificial Intelligence LAB
No ratings yet
A Quick Recap: Artificial Intelligence LAB
29 pages
DL 6th Exp Program
No ratings yet
DL 6th Exp Program
3 pages
AIML LAB Week9 2
No ratings yet
AIML LAB Week9 2
3 pages
Thuyết Trình TWP
No ratings yet
Thuyết Trình TWP
7 pages
French-English Seq2Seq Translation
No ratings yet
French-English Seq2Seq Translation
45 pages
Conference - Mimansha Singh
No ratings yet
Conference - Mimansha Singh
18 pages
CS663-2024-Executive NLP - Assignment Sentiment Analysis
No ratings yet
CS663-2024-Executive NLP - Assignment Sentiment Analysis
4 pages
Jal Patel NLP
No ratings yet
Jal Patel NLP
32 pages
RNN LSTM From Scratch - Ipynb
No ratings yet
RNN LSTM From Scratch - Ipynb
55 pages
C24064 - NLP - Lab Manual
No ratings yet
C24064 - NLP - Lab Manual
28 pages
ACM Journals Primary Article Template Latest Version 4
No ratings yet
ACM Journals Primary Article Template Latest Version 4
31 pages
Konuralp
No ratings yet
Konuralp
15 pages
Mark
No ratings yet
Mark
3 pages
NLP Tasks for MCA Students
No ratings yet
NLP Tasks for MCA Students
16 pages
LLaMA Ankit - Rawat
No ratings yet
LLaMA Ankit - Rawat
52 pages
Transformers Model
No ratings yet
Transformers Model
11 pages
DL Programs
No ratings yet
DL Programs
13 pages
NLP Assignment (917722H031)
No ratings yet
NLP Assignment (917722H031)
18 pages
8 Habits That Beat Talent
No ratings yet
8 Habits That Beat Talent
13 pages
NLP Lab Assignment 8
No ratings yet
NLP Lab Assignment 8
14 pages
Natural Language Processing
No ratings yet
Natural Language Processing
11 pages
NLP Assignment 4 (22bce9560)
No ratings yet
NLP Assignment 4 (22bce9560)
12 pages
Module-2 NLP
No ratings yet
Module-2 NLP
50 pages
Module 6
No ratings yet
Module 6
11 pages
Grammar Study Guide
No ratings yet
Grammar Study Guide
31 pages
The Lexical Aspect of Verbs
No ratings yet
The Lexical Aspect of Verbs
12 pages
Englishplusunit 3 Langfocus Two Star
No ratings yet
Englishplusunit 3 Langfocus Two Star
1 page
Chapter 1 - Answer Key
No ratings yet
Chapter 1 - Answer Key
1 page
LIT17 ANC G11 GW MiDaMo
No ratings yet
LIT17 ANC G11 GW MiDaMo
3 pages
9th FIT MCQ 20
No ratings yet
9th FIT MCQ 20
3 pages
KS2 ARAL Program Monitoring Record Basic
100% (2)
KS2 ARAL Program Monitoring Record Basic
18 pages
Class-VI Worksheet
No ratings yet
Class-VI Worksheet
3 pages
Marianna Pascal Learning English PT 2.
100% (1)
Marianna Pascal Learning English PT 2.
2 pages
Toefl PPT 2
No ratings yet
Toefl PPT 2
15 pages
Excel in Composition Writing - Viewing Only PDF
No ratings yet
Excel in Composition Writing - Viewing Only PDF
68 pages
Aromanian Life in Grabova
No ratings yet
Aromanian Life in Grabova
11 pages
63484782531
No ratings yet
63484782531
3 pages
Unit 2 Lesson 3 Anglais
No ratings yet
Unit 2 Lesson 3 Anglais
4 pages
Pushdown Automata Overview
No ratings yet
Pushdown Automata Overview
66 pages
Testing Literature
100% (3)
Testing Literature
17 pages
Verbs of Change Vocabulary Guide
No ratings yet
Verbs of Change Vocabulary Guide
2 pages
Grammar Quiz 1 Group B
No ratings yet
Grammar Quiz 1 Group B
2 pages
Four Types of Sentences
No ratings yet
Four Types of Sentences
5 pages
Module - Grade11
100% (1)
Module - Grade11
5 pages
Summary Word Stress
No ratings yet
Summary Word Stress
14 pages
Coding Decoding - 57012490 - 2025 - 06 - 21 - 13 - 04
No ratings yet
Coding Decoding - 57012490 - 2025 - 06 - 21 - 13 - 04
24 pages
DLP English 5
No ratings yet
DLP English 5
241 pages
07 Aspect of Connected Speech
No ratings yet
07 Aspect of Connected Speech
10 pages
English Grammar Exercises
100% (1)
English Grammar Exercises
6 pages
In Between: Language Teaching Methods: Do We Need (To Know About) Methods at All?
No ratings yet
In Between: Language Teaching Methods: Do We Need (To Know About) Methods at All?
21 pages
Gemini 14: A Conversation by Telepathy
No ratings yet
Gemini 14: A Conversation by Telepathy
2 pages
The Mende Problem
100% (2)
The Mende Problem
27 pages
Teenagers' Mobile Gaming Habits
100% (1)
Teenagers' Mobile Gaming Habits
3 pages
Answer The Questions Below With Article A, An, or The
No ratings yet
Answer The Questions Below With Article A, An, or The
2 pages

Natural Language Processing Lab 9

Uploaded by

Natural Language Processing Lab 9

Uploaded by

Natural Language Processing

2. Perform Text summarization, POS

 1.Feed forward Neural network

 2.Recurrent Neural Networks

 4.Any Transformer using encoder

corpus_sentences = [" ".join(sentence) for sentence in brown.sents()

for category, keywords in sensitive_keywords.items():

classification = "Sensitive" if max_sensitivity > 0 else "Normal"

for sentence in corpus_sentences:

X_train, X_test, y_train, y_test = train_test_split(X, y,

print("\n📊 Model Evaluation Report:\n")

You might also like