forked from DhanushNehru/Hacktoberfest2025
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCheckTextData.js
More file actions
70 lines (57 loc) · 1.83 KB
/
CheckTextData.js
File metadata and controls
70 lines (57 loc) · 1.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
//===================================================
// Made by Gurdeep Singh https://github.com/Gurdeep99
//===================================================
function textSimilarity(text1, text2) {
// Helper: tokenize & clean
function tokenize(text) {
return text
.toLowerCase()
.replace(/[^a-zA-Z0-9\u0900-\u097F\s]/g, "") // supports English + Hindi
.split(/\s+/)
.filter(Boolean);
}
// Step 1: Tokenize texts
const tokens1 = tokenize(text1);
const tokens2 = tokenize(text2);
// Step 2: Build vocabulary
const vocab = Array.from(new Set([...tokens1, ...tokens2]));
// Step 3: Term frequency
function termFrequency(tokens) {
const freq = {};
tokens.forEach(word => freq[word] = (freq[word] || 0) + 1);
return vocab.map(word => freq[word] || 0);
}
const tf1 = termFrequency(tokens1);
const tf2 = termFrequency(tokens2);
// Step 4: Cosine similarity
function dot(a, b) {
return a.reduce((sum, val, i) => sum + val * b[i], 0);
}
function magnitude(v) {
return Math.sqrt(dot(v, v));
}
const cosine = dot(tf1, tf2) / (magnitude(tf1) * magnitude(tf2) || 1);
return {
similarity: (cosine * 100).toFixed(2) + "%",
text1Tokens: tokens1.length,
text2Tokens: tokens2.length,
commonWords: vocab.filter(word => tokens1.includes(word) && tokens2.includes(word))
};
}
// =================
// ✅ Example Usage
// =================
const result = textSimilarity(
"Artificial intelligence is changing the world rapidly.",
"AI is transforming the world in a very fast way."
);
console.log(result);
// =================
// ✅ Example Result
// =================
[{
similarity: "72.15%",
text1Tokens: 7,
text2Tokens: 8,
commonWords: ["world"]
}]