1

In a HTML/JavaScript/React/Redux web application, I have a long string (around 300kb) of natural language. It is a transcript of a recording being played back.

I need

  • to highlight the currently uttered word,
  • to recognize a word that's clicked on,
  • to extract selected ranges
  • and to replace parts of the string (when a correction to the transcript is submitted by the user).

Everything is easy when I wrap each word in its own <span>. However, this makes the number of elements unbearable for the browser and the page gets very slow.

I can think of two ways to approach this:

  • I could wrap each sentence in a <span> and only wrap each word of the currently played-back sentence.

  • I could leave the text without HTML tags, handle clicks via document.caretPositionFromPoint, but I don't know how to highlight a word.

I would welcome more ideas and thoughts on the balance between difficulty and speed.

Sixtease
  • 790
  • 8
  • 18

5 Answers5

2

"to recognize a word that's clicked on"

New answer

I figure that, the code in my previous answer actually had to split the huge string of text into an huge array on every on click event. After that, a linear search is performed on the array to locate the matching string.

However, this could be improved by precomputing the word array and use binary search instead of linear searching. Now every highlighting will run in O(log n) instead of O(n)

See: http://jsfiddle.net/amoshydra/vq8y8h19/

// Build character to text map
var text = content.innerText;

var counter = 1;
textMap = text.split(' ').map((word) => {
  result = {
    word: word,
    start: counter,
    end: counter + word.length,
  }
  counter += word.length + 1;
    return result;
});

content.addEventListener('click', function (e) {
    var selection = window.getSelection();
  var result = binarySearch(textMap, selection.focusOffset, compare_word);
  var textNode = e.target.childNodes[0];

  if (textNode) {
      var range = document.createRange();
    range.setStart(textNode, textMap[result].start);
    range.setEnd(textNode, textMap[result].end);
    var r = range.getClientRects()[0];
    console.log(r.top, r.left, textMap[result].word);

    // Update overlay
    var scrollOffset = e.offsetY - e.clientY; // To accomondate scrolling
    overlay.innerHTML = textMap[result].word;
    overlay.style.top = r.top + scrollOffset + 'px';
    overlay.style.left = r.left + 'px';
  }
});

// Slightly modified binary search algorithm
function binarySearch(ar, el, compare_fn) {
    var m = 0;
    var n = ar.length - 1;
    while (m <= n) {
        var k = (n + m) >> 1;
        var cmp = compare_fn(el, ar[k]);
        if (cmp > 0) {
            m = k + 1;
        } else if(cmp < 0) {
            n = k - 1;
        } else {
            return k;
        }
    }
    return m - 1;
}

function compare_word(a, b) {
  return a - b.start;
}

Original answer

I took a fork of code from this answer from aaron and implemented this:

Instead of setting a span tag on the paragraph, we could put an overlay on top of the word.
And resize and reposition the overlay when travelling to a word.

Snippet

JavaScript

// Update overlay
overlayDom.innerHTML = word;
overlayDom.style.top = r.top + 'px';
overlayDom.style.left = r.left + 'px';

CSS

Use an overlay with transparent color text, so that we can get the overlay to be of the same width with the word.

#overlay {
  background-color: yellow;
  opacity: 0.4;
  display: block;
  position: absolute;
  color: transparent;
}

Full forked JavaScript code below

var overlayDom = document.getElementById('overlay');

function findClickedWord(parentElt, x, y) {
    if (parentElt.nodeName !== '#text') {
        console.log('didn\'t click on text node');
        return null;
    }
    var range = document.createRange();
    var words = parentElt.textContent.split(' ');
    var start = 0;
    var end = 0;
    for (var i = 0; i < words.length; i++) {
        var word = words[i];
        end = start+word.length;
        range.setStart(parentElt, start);
        range.setEnd(parentElt, end);
        // not getBoundingClientRect as word could wrap
        var rects = range.getClientRects();
        var clickedRect = isClickInRects(rects);
        if (clickedRect) {
            return [word, start, clickedRect];
        }
        start = end + 1;
    }

    function isClickInRects(rects) {
        for (var i = 0; i < rects.length; ++i) {
            var r = rects[i]
            if (r.left<x && r.right>x && r.top<y && r.bottom>y) {            
                return r;
            }
        }
        return false;
    }
    return null;
}
function onClick(e) {
    var elt = document.getElementById('info');

    // Get clicked status
    var clicked = findClickedWord(e.target.childNodes[0], e.clientX, e.clientY);

    // Update status bar
    elt.innerHTML = 'Nothing Clicked';
    if (clicked) {
        var word = clicked[0];
        var start = clicked[1];
        var r = clicked[2];
        elt.innerHTML = 'Clicked: ('+r.top+','+r.left+') word:'+word+' at offset '+start;

        // Update overlay
        overlayDom.innerHTML = word;
        overlayDom.style.top = r.top + 'px';
        overlayDom.style.left = r.left + 'px';
    }
}

document.addEventListener('click', onClick);

See the forked demo: https://jsfiddle.net/amoshydra/pntzdpff/

This implementation uses the createRange API

Community
  • 1
  • 1
Amos Wong
  • 192
  • 1
  • 10
  • Wow! The range.getClientRects method was the link in the chain that I was missing to implement this. I was thinking of highlighting via a positioned overlay but didn't know how to get coordinates of a text node substring. Thank you sir. – Sixtease Mar 27 '17 at 18:04
2

I don't think the number of <span> elements is unbearable once they have been positioned. You might just need to minimize reflow by avoiding layout changes.

Small experiment: ~3kb of text highlighted via background-color

// Create ~3kb of text:
let text = document.getElementById("text");
for (let i = 0; i < 100000; ++i) {
  let word = document.createElement("span");
  word.id = "word_" + i;
  word.textContent = "bla ";
  text.appendChild(word);
}
document.body.appendChild(text);

// Highlight text:
let i = 0;
let word;
setInterval(function() {
  if (word) word.style.backgroundColor = "transparent";
  word = document.getElementById("word_" + i);
  word.style.backgroundColor = "red";
  i++;
}, 100)
<div id="text"></div>

Once the initial layout has finished, this renders smoothly for me in FF/Ubuntu/4+ years old laptop.

Now, if you where to change font-weight instead of background-color, the above would become unbearably slow due to the constant layout changes triggering a reflow.

Community
  • 1
  • 1
le_m
  • 19,302
  • 9
  • 64
  • 74
2

Here is a simple editor that can easily handle very large string. I tried to use minimum DOM for performance.

It can

  • recognize a word that's clicked on
  • highlight the currently clicked word, or drag selection
  • extract selected ranges
  • replace parts of the string (when a correction to the transcript is submitted by the user).

See this jsFiddle

var editor = document.getElementById("editor");

var highlighter = document.createElement("span");
highlighter.className = "rename";

var replaceBox = document.createElement("input");
replaceBox.className = "replace";
replaceBox.onclick = function() {
  event.stopPropagation();
};
editor.parentElement.appendChild(replaceBox);

editor.onclick = function() {
  var sel = window.getSelection();
  if (sel.anchorNode.parentElement === highlighter) {
    clearSelection();
    return;
  }
  var range = sel.getRangeAt(0);
  if (range.collapsed) {
    var idx = sel.anchorNode.nodeValue.lastIndexOf(" ", range.startOffset);
    range.setStart(sel.anchorNode, idx + 1);
    var idx = sel.anchorNode.nodeValue.indexOf(" ", range.endOffset);
    if (idx == -1) {
      idx = sel.anchorNode.nodeValue.length;
    }
    range.setEnd(sel.anchorNode, idx);
  }
  clearSelection();
  range.surroundContents(highlighter);
  range.detach();
  showReplaceBox();
  event.stopPropagation();
};

document.onclick = function(){
  clearSelection();
};

function clearSelection() {
  if (!!highlighter.parentNode) {
    replaceBox.style.display = "none";
    highlighter.parentNode.insertBefore(document.createTextNode(replaceBox.value), highlighter.nextSibling);
    highlighter.parentNode.removeChild(highlighter);
  }
  editor.normalize(); // comment this line in case of any performance issue after an  edit
}

function showReplaceBox() {
  if (!!highlighter.parentNode) {
    replaceBox.style.display = "block";
    replaceBox.style.top = (highlighter.offsetTop + highlighter.offsetHeight) + "px";
    replaceBox.style.left = highlighter.offsetLeft + "px";
    replaceBox.value = highlighter.textContent;
    replaceBox.focus();
    replaceBox.selectionStart = 0;
    replaceBox.selectionEnd = replaceBox.value.length;
  }
}
.rename {
  background: yellow;
}

.replace {
  position: absolute;
  display: none;
}
<div id="editor">
Your very large text goes here...
</div>
Sen Jacob
  • 3,384
  • 3
  • 35
  • 61
0

I would first find the clicked word via some annoying logic (Try looking here ) Then you can highlight the word simply by wrapping the exact word with a styled span as you suggested above :)

Community
  • 1
  • 1
Gal Ben Arieh
  • 413
  • 4
  • 11
0

Well, I'm not really sure how you could recognise words. You may need a 3rd party software. To highlight a word, you can use CSS and span as you said.

CSS

span {
background-color: #B6B6B4;
}

To add the 'span' tags, you could use a find and replace thing. Like this one.

Find: all spaces

Replace: <span>

Sank6
  • 491
  • 9
  • 28