Document distance in JavaScript
up vote
0
down vote
favorite
I am looking for the fastest way to find document distance. How can I improve this?
Split each document into words. replaceAll() is faster than replace()
String.prototype.replaceAll = function(search, replacement) {
return this.replace(new RegExp(search, 'g'), replacement);
};
Creating word array here:
const wordsFromDocument = doc => {
//we care only about alphanumeric characters and space
return doc.replaceAll(/[^a-zA-Z0-9 ]/g, ``).split(` `);
};
count word frequencies (document vectors)
const wordFrequencies = words => {
const wordFrequencyMap = new Map();
for (const word of words) {
const lowerCaseWord = word.toLowerCase(); //We don't care about case sensitivity. What is faster way to achieve this?
if (!wordFrequencyMap.has(lowerCaseWord)) {
wordFrequencyMap.set(lowerCaseWord, 1);
} else {
wordFrequencyMap.set(lowerCaseWord, wordFrequencyMap.get(lowerCaseWord) + 1);
}
}
return wordFrequencyMap;
};
Compute dot product (& divide).
const dotProduct = (wordFrequencyMapOne, wordFrequencyMapTwo) => {
let sum = 0;
const wordOneSize = wordFrequencyMapOne.size;
const wordTwoSize = wordFrequenciesTwo.size;
if (wordOneSize < wordTwoSize) {
for (let key of wordFrequencyMapOne.keys()) {
if (wordFrequencyMapTwo.has(key)) {
sum = sum + wordFrequencyMapOne.get(key) *
wordFrequencyMapTwo.get(key);
}
}
} else {
for (let key of wordFrequenciesTwo.keys()) {
if (wordFrequenciesOne.has(key)) {
sum = sum + wordFrequencyMapOne.get(key) *
wordFrequencyMapTwo.get(key);
}
}
}
return sum;
};
vector angle = (dot(1, 2)) / Sqrt (dot(1, 1) * dot(2, 2))
const vectorAngle = (wordFrequencyMapOne, wordFrequencyMapTwo) => {
const numerator = dotProduct(wordFrequencyMapOne, wordFrequencyMapTwo);
const denominator = Math.sqrt(dotProduct(wordFrequencyMapOne, wordFrequencyMapOne) * (dotProduct(wordFrequenciesTwo, wordFrequenciesTwo)))
return numerator / denominator;
}
javascript performance algorithm edit-distance
New contributor
add a comment |
up vote
0
down vote
favorite
I am looking for the fastest way to find document distance. How can I improve this?
Split each document into words. replaceAll() is faster than replace()
String.prototype.replaceAll = function(search, replacement) {
return this.replace(new RegExp(search, 'g'), replacement);
};
Creating word array here:
const wordsFromDocument = doc => {
//we care only about alphanumeric characters and space
return doc.replaceAll(/[^a-zA-Z0-9 ]/g, ``).split(` `);
};
count word frequencies (document vectors)
const wordFrequencies = words => {
const wordFrequencyMap = new Map();
for (const word of words) {
const lowerCaseWord = word.toLowerCase(); //We don't care about case sensitivity. What is faster way to achieve this?
if (!wordFrequencyMap.has(lowerCaseWord)) {
wordFrequencyMap.set(lowerCaseWord, 1);
} else {
wordFrequencyMap.set(lowerCaseWord, wordFrequencyMap.get(lowerCaseWord) + 1);
}
}
return wordFrequencyMap;
};
Compute dot product (& divide).
const dotProduct = (wordFrequencyMapOne, wordFrequencyMapTwo) => {
let sum = 0;
const wordOneSize = wordFrequencyMapOne.size;
const wordTwoSize = wordFrequenciesTwo.size;
if (wordOneSize < wordTwoSize) {
for (let key of wordFrequencyMapOne.keys()) {
if (wordFrequencyMapTwo.has(key)) {
sum = sum + wordFrequencyMapOne.get(key) *
wordFrequencyMapTwo.get(key);
}
}
} else {
for (let key of wordFrequenciesTwo.keys()) {
if (wordFrequenciesOne.has(key)) {
sum = sum + wordFrequencyMapOne.get(key) *
wordFrequencyMapTwo.get(key);
}
}
}
return sum;
};
vector angle = (dot(1, 2)) / Sqrt (dot(1, 1) * dot(2, 2))
const vectorAngle = (wordFrequencyMapOne, wordFrequencyMapTwo) => {
const numerator = dotProduct(wordFrequencyMapOne, wordFrequencyMapTwo);
const denominator = Math.sqrt(dotProduct(wordFrequencyMapOne, wordFrequencyMapOne) * (dotProduct(wordFrequenciesTwo, wordFrequenciesTwo)))
return numerator / denominator;
}
javascript performance algorithm edit-distance
New contributor
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
I am looking for the fastest way to find document distance. How can I improve this?
Split each document into words. replaceAll() is faster than replace()
String.prototype.replaceAll = function(search, replacement) {
return this.replace(new RegExp(search, 'g'), replacement);
};
Creating word array here:
const wordsFromDocument = doc => {
//we care only about alphanumeric characters and space
return doc.replaceAll(/[^a-zA-Z0-9 ]/g, ``).split(` `);
};
count word frequencies (document vectors)
const wordFrequencies = words => {
const wordFrequencyMap = new Map();
for (const word of words) {
const lowerCaseWord = word.toLowerCase(); //We don't care about case sensitivity. What is faster way to achieve this?
if (!wordFrequencyMap.has(lowerCaseWord)) {
wordFrequencyMap.set(lowerCaseWord, 1);
} else {
wordFrequencyMap.set(lowerCaseWord, wordFrequencyMap.get(lowerCaseWord) + 1);
}
}
return wordFrequencyMap;
};
Compute dot product (& divide).
const dotProduct = (wordFrequencyMapOne, wordFrequencyMapTwo) => {
let sum = 0;
const wordOneSize = wordFrequencyMapOne.size;
const wordTwoSize = wordFrequenciesTwo.size;
if (wordOneSize < wordTwoSize) {
for (let key of wordFrequencyMapOne.keys()) {
if (wordFrequencyMapTwo.has(key)) {
sum = sum + wordFrequencyMapOne.get(key) *
wordFrequencyMapTwo.get(key);
}
}
} else {
for (let key of wordFrequenciesTwo.keys()) {
if (wordFrequenciesOne.has(key)) {
sum = sum + wordFrequencyMapOne.get(key) *
wordFrequencyMapTwo.get(key);
}
}
}
return sum;
};
vector angle = (dot(1, 2)) / Sqrt (dot(1, 1) * dot(2, 2))
const vectorAngle = (wordFrequencyMapOne, wordFrequencyMapTwo) => {
const numerator = dotProduct(wordFrequencyMapOne, wordFrequencyMapTwo);
const denominator = Math.sqrt(dotProduct(wordFrequencyMapOne, wordFrequencyMapOne) * (dotProduct(wordFrequenciesTwo, wordFrequenciesTwo)))
return numerator / denominator;
}
javascript performance algorithm edit-distance
New contributor
I am looking for the fastest way to find document distance. How can I improve this?
Split each document into words. replaceAll() is faster than replace()
String.prototype.replaceAll = function(search, replacement) {
return this.replace(new RegExp(search, 'g'), replacement);
};
Creating word array here:
const wordsFromDocument = doc => {
//we care only about alphanumeric characters and space
return doc.replaceAll(/[^a-zA-Z0-9 ]/g, ``).split(` `);
};
count word frequencies (document vectors)
const wordFrequencies = words => {
const wordFrequencyMap = new Map();
for (const word of words) {
const lowerCaseWord = word.toLowerCase(); //We don't care about case sensitivity. What is faster way to achieve this?
if (!wordFrequencyMap.has(lowerCaseWord)) {
wordFrequencyMap.set(lowerCaseWord, 1);
} else {
wordFrequencyMap.set(lowerCaseWord, wordFrequencyMap.get(lowerCaseWord) + 1);
}
}
return wordFrequencyMap;
};
Compute dot product (& divide).
const dotProduct = (wordFrequencyMapOne, wordFrequencyMapTwo) => {
let sum = 0;
const wordOneSize = wordFrequencyMapOne.size;
const wordTwoSize = wordFrequenciesTwo.size;
if (wordOneSize < wordTwoSize) {
for (let key of wordFrequencyMapOne.keys()) {
if (wordFrequencyMapTwo.has(key)) {
sum = sum + wordFrequencyMapOne.get(key) *
wordFrequencyMapTwo.get(key);
}
}
} else {
for (let key of wordFrequenciesTwo.keys()) {
if (wordFrequenciesOne.has(key)) {
sum = sum + wordFrequencyMapOne.get(key) *
wordFrequencyMapTwo.get(key);
}
}
}
return sum;
};
vector angle = (dot(1, 2)) / Sqrt (dot(1, 1) * dot(2, 2))
const vectorAngle = (wordFrequencyMapOne, wordFrequencyMapTwo) => {
const numerator = dotProduct(wordFrequencyMapOne, wordFrequencyMapTwo);
const denominator = Math.sqrt(dotProduct(wordFrequencyMapOne, wordFrequencyMapOne) * (dotProduct(wordFrequenciesTwo, wordFrequenciesTwo)))
return numerator / denominator;
}
javascript performance algorithm edit-distance
javascript performance algorithm edit-distance
New contributor
New contributor
edited 12 hours ago
200_success
127k15148410
127k15148410
New contributor
asked 16 hours ago
user7331530
11
11
New contributor
New contributor
add a comment |
add a comment |
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
user7331530 is a new contributor. Be nice, and check out our Code of Conduct.
user7331530 is a new contributor. Be nice, and check out our Code of Conduct.
user7331530 is a new contributor. Be nice, and check out our Code of Conduct.
user7331530 is a new contributor. Be nice, and check out our Code of Conduct.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f207837%2fdocument-distance-in-javascript%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown