5

Lets consider i have a string & want to extract uncommon keywords for SEO. $text = "This is some text. This is some text. Vending Machines are great.";

& Will define a array of common words to ignore keywords in extracted list like $commonWords = ['i','a','about','an','and','are','as','at','be','by','com','de','en','for','from','how','in','is','it','la','of','on','or','that','the','this','to','was','what','when','where','who','will','with','und','the','www'];

Expected output: Result=[some,text,machines,vending]

Would really appreciate if Could any one help us to write generic logic or procedure for the extracting keywords from string?

Niks Jain
  • 1,617
  • 5
  • 27
  • 53
  • 1
    remove punctuation marks ([`.replace()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace)), split the string ([`.split()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split)), filter the array for words not found in the `$commonWords` array ([`.filter()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/filter), [`.indexOf()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/indexOf)) – Andreas Oct 30 '15 at 07:46

4 Answers4

4

This can help ( it supports multi languages):

https://github.com/michaeldelorenzo/keyword-extractor

var sentence = "President Obama woke up Monday facing a Congressional defeat that many in both parties believed could hobble his presidency."

//  Extract the keywords
var extraction_result = keyword_extractor.extract(sentence,{
                                                            language:"english",
                                                            remove_digits: true,
                                                            return_changed_case:true,
                                                            remove_duplicates: false

                                                       });
Alireza Fattahi
  • 42,517
  • 14
  • 123
  • 173
2

Some like this

var $commonWords = ['i','a','about','an','and','are','as','at','be','by','com','de','en','for','from','how','in','is','it','la','of','on','or','that','the','this','to','was','what','when','where','who','will','with','und','the','www'];
var $text = "This is some text. This is some text. Vending Machines are great.";

// Convert to lowercase
$text = $text.toLowerCase();

// replace unnesessary chars. leave only chars, numbers and space
$text = $text.replace(/[^\w\d ]/g, '');

var result = $text.split(' ');

// remove $commonWords
result = result.filter(function (word) {
    return $commonWords.indexOf(word) === -1;
});

// Unique words
result = result.unique();

console.log(result);
Tomasz Jakub Rup
  • 10,502
  • 7
  • 48
  • 49
1
var string = "This is some text. This is some text. Vending Machines are great.";

var substrings = ['your','words', 'here'],

var results = array();
for (var i = substrings.length - 1; i >= 0; --i) {
    if (string.indexOf(substrings[i]) != -1) {
         // str contains substrings[i]
         array.push(substrings[i]);
    }
}
ambe5960
  • 1,870
  • 2
  • 19
  • 47
0
var arrayLength = commonWords.length;
var words = [];   //new array to say the words
for (var i = 0; i < arrayLength; i++) {
    if ($text.indexOf(commonWords[i]) > -1){
        words.push(commonWords[i]);
    }
}