0

I'm trying to write a JS code that will split a sentence into an array of elements, but without all the special characters (like commas, dots, exclamation and question marks etc.). But when I try to display the list of all the elements, there are empty lines. How do I get rid of that?

Here's my code:

function Splitter() {
    var sentence = "Here is a sentence, with commas and with other characters, such as dots. And numbers 123 45 6!?";
    var chars = [' ', '\\\+', '-', '\\\(', '\\\)', '\\*', '/', ':', '\\\?', '!', '\\\,', '\\\.'];
    var parts = sentence.trim().split(new RegExp(chars.join('|'), 'g'));
    var longestIndex = -1;
    var longestWord = 0;
    
    for(var i=0; i < parts.length; i++){
        if(parts[i].length > longestWord){
            longestWord = parts[i].length;
            longestIndex = i;
        }
    }
    
    document.write("<b>Original sentence:</b><br>" + sentence);
    
    document.write("<br><br><b>how many words in sentence:</b> " + parts.length);
    
    document.write("<br><br><b>the longest word is:</b> " + parts[longestIndex] + "<br>(number of characters in this word: " + longestWord + ")");
    
    document.write("<br><br><b>fifth word:</b> " + parts[4]);
    
    document.write("<br><br><b>words:</b><br><ol>");
    
    for(var k=0; k<parts.length; k++) { 
        document.write("<li>" + parts[k] + "</li>"); 
    }
    
    document.write("</ol>");
    
}
    
Splitter();

It counts words, and shows the longest word, but when it comes to display all the elements, the result shows empty lines (where comma, or exclamation mark are in the original sentence). The "fifth word" also shows empty value.

What am I doing wrong here?

barbsan
  • 3,418
  • 11
  • 21
  • 28
Mike
  • 11
  • 3
  • Just add a filter: `split(........).filter(Boolean)`. See documentation on `split` why there can be empty results. – trincot Nov 21 '18 at 08:23

2 Answers2

1

You can chain a .filter(Boolean) to the split result to remove those empty string results.

Please note that you can simplify your regular expression. Instead of using the pipe, you can put all offending characters in a regex class, like so:

/[ +\-()*\/:?!,.]+/g

By adding the additional + at the end you also partly resolve the empty string returns, except for a potential one at the start and the end, so you still need the filter.

To completely avoid having to do the filter, you can use match instead of split, but with a negated class ([^). Here you must use the + at the end:

var parts = sentence.match(/[^ +\-()*\/:?!,.]+/g);

Finally, also consider \w+. It is more strict of a condition than you currently have, as it will only keep alphanumerical characters:

var parts = sentence.match(/\w+/g);
trincot
  • 317,000
  • 35
  • 244
  • 286
0

This happens because you use dots and spaces as separators, but in the way .split has been designed, the method will use the separator to split the string and won't include the separator anywhere.

You can use a positive lookahead (?= to check if a pattern exist and not match it, in this way it will included in the result.

function Splitter() {
    var sentence = "Here is a sentence, with commas and with other characters, such as dots. And numbers 123 45 6!?";
    var chars = [' ', '\\\+', '-', '\\\(', '\\\)', '\\*', '/', ':', '\\\?', '!', '\\\,', '\\\.'];
    var parts = sentence.trim().split(new RegExp('(?='+chars.join('|')+')', 'g'));
    var longestIndex = -1;
    var longestWord = 0;
    
    for(var i=0; i < parts.length; i++){
        if(parts[i].length > longestWord){
            longestWord = parts[i].length;
            longestIndex = i;
        }
    }
    
    document.write("<b>Original sentence:</b><br>" + sentence);
    
    document.write("<br><br><b>how many words in sentence:</b> " + parts.length);
    
    document.write("<br><br><b>the longest word is:</b> " + parts[longestIndex] + "<br>(number of characters in this word: " + longestWord + ")");
    
    document.write("<br><br><b>fifth word:</b> " + parts[4]);
    
    document.write("<br><br><b>words:</b><br><ol>");
    
    for(var k=0; k<parts.length; k++) { 
        document.write("<li>" + parts[k] + "</li>"); 
    }
    
    document.write("</ol>");
    
}
    
Splitter();

EDIT

Maybe I misunderstood the question, if you just wanted to exclude blank characters, you can just match special characters even when they're close each others, using the + option:

function Splitter() {
    var sentence = "Here is a sentence, with commas and with other characters, such as dots. And numbers 123 45 6!?";
    var chars = [' ', '\\\+', '-', '\\\(', '\\\)', '\\*', '/', ':', '\\\?', '!', '\\\,', '\\\.'];
    var parts = sentence.trim().split(new RegExp('[('+chars.join('|')+')]+', 'g'));
    var longestIndex = -1;
    var longestWord = 0;
    
    for(var i=0; i < parts.length; i++){
        if(parts[i].length > longestWord){
            longestWord = parts[i].length;
            longestIndex = i;
        }
    }
    
    document.write("<b>Original sentence:</b><br>" + sentence);
    
    document.write("<br><br><b>how many words in sentence:</b> " + parts.length);
    
    document.write("<br><br><b>the longest word is:</b> " + parts[longestIndex] + "<br>(number of characters in this word: " + longestWord + ")");
    
    document.write("<br><br><b>fifth word:</b> " + parts[4]);
    
    document.write("<br><br><b>words:</b><br><ol>");
    
    for(var k=0; k<parts.length; k++) { 
        document.write("<li>" + parts[k] + "</li>"); 
    }
    
    document.write("</ol>");
    
}
    
Splitter();
Community
  • 1
  • 1
  • If that is what OP needs, you can vote to close/flag as dupe of https://stackoverflow.com/questions/12001953/javascript-and-regex-split-string-and-keep-the-separator. BTW, there is no point using `g` modifier, it is default with `String#split`. Also, you could show a better approach to regex building: the `chars` are actually single chars OP wants to match, hence a character class would be more natural, and will require much less escaping. – Wiktor Stribiżew Nov 21 '18 at 08:27