1

Pesky Spaces

As has been noted here and here on SO, Google Apps Script has a propensity to add in random new lines to tables and other elements (although the character in question seems to be \10, as noted in the first link.) The hack noted in the first link (and implemented in the code below) works fine if you don't have any Text elements with rich formatting, but I am having trouble extending the hack and would appreciate any direction.

my use case

In my case, I am making an Nx2 table in which the left column contains English text and the right column Hebrew. The Hebrew needs to be added dynamically with insertParagraph() in order to allow for the proper left-to-right settings, but doing so adds the aforementioned random break character. Using the hack and then applying the proper attributes to the modified text for some reason does nothing to the first row while adding a newline back to the second column, while changing the l-t-r direction as intended.

code (edited)

I am realizing that my MRE isn't really an MRE, so here's something a bit closer to the code I am working with. insertRichTextFromHTML converts an HTML string to Google Docs rich-text format. The data set I am working with comes from Sefaria, a repository of Jewish texts. Note that the code integrates @Tanaike's suggestions above; for some reason it still doesn't work as expected:

function insertReference() {
  let reference = "Shemot 12:2-4";
  let url = 'http://www.sefaria.org/api/texts/'+reference+'?commentary=0&context=0';
  let response = UrlFetchApp.fetch(url);
  let json = response.getContentText();
  let data = JSON.parse(json);
  let doc = DocumentApp.getActiveDocument().getBody();
  let index = DocumentApp.getActiveDocument().getBody().getNumChildren()-1;

    let cells = [
      ["", ""],
      ["", ""]
    ];
    let tableStyle = {};
        tableStyle[DocumentApp.Attribute.BOLD] = false;
    let table = doc.insertTable(index, cells)

    table.setAttributes(tableStyle);

    let engTitle = table.getCell(0, 0)
      .setText("")
      .insertParagraph(0, "")
      .setLeftToRight(true)
      .editAsText();
    insertRichTextFromHTML(engTitle, data.ref);

    let hebTitle = table.getCell(0, 1)
      .setText("")
      .insertParagraph(0, "")
      .setLeftToRight(false)
      .editAsText();
    insertRichTextFromHTML(hebTitle, data.heRef);

    let engText = table.getCell(1, 0)
      .setText("")
      .insertParagraph(0, "")
      .setLeftToRight(true)
      .editAsText();
    insertRichTextFromHTML(engText, data.text);

    let hebText = table.getCell(1, 1)
      .setText("")
      .insertParagraph(0, "")
      .setLeftToRight(false)
      .editAsText();
    insertRichTextFromHTML(hebText, data.he);
}
function insertRichTextFromHTML(element, htmlString) {
  let buf = [];
  let index = 0, italicsFnCount = 0, textLength = element.editAsText().getText().length;
  let bolded = false, italicized = false, inFootnote = false;
  if (Array.isArray(htmlString)) {
    htmlString = htmlString.join("");
  }
  let iterableString = htmlString.split(/(<\/?[a-zA-Z]+[a-zA-Z'"0-9= \-/]*>)/g);

  let inserterFn = (textModification) => {
    //grab all words in the buffer and join
    let snippet = buf.join("");

    //index of snippet needs to be zero-indexed. This is how we keep track of which words/phrases/sentences to bold/italicize
    let snippetLength = snippet.length;
    let snippetIndex = snippetLength - 1;

    if (snippet != "") {
      element.insertText(textLength, snippet);

      //set rich text settings
      element.setBold(textLength, textLength+snippetIndex, bolded); 
      element.setItalic(textLength, textLength+snippetIndex, italicized);

      textLength += snippetLength;
    }

    switch(textModification) {
      case "bold":
        bolded = !bolded;
        break;
      case "italic":
        italicized = !italicized;
        break;
      case "linebreak":
        element.insertText(textLength, "\n");
        textLength += 1;
        break;
    }
  }

  for (let i = 0; i < iterableString.length; i++) {
    let word = iterableString[i];

    /* example format of footnotes in the text: -----‘Do not let me see your faces<sup class=\"footnote-marker\">*</sup><i class=\"footnote\"><b>Do not let me see your faces </b>See note at v. 3.</i> unless----*/
    if (inFootnote) {
      if ( word == "<i class=\"footnote\">" || word == "<i>") {
        italicsFnCount++;
      } else if ( word == "</i>") {
        italicsFnCount--;
        if (italicsFnCount == 0) {
          inFootnote = false;
          continue;
        }
      }

    }

    else if (word[0] == "<") {
      //we have a tag; grab the name of the tag
      let tagName = /<\/?([a-zA-Z]+)([a-zA-Z'"0-9= \-/])*>/.exec(word)[1];

      switch (tagName) {
        case "b":
          inserterFn("bold");
          buf = [];
          index = 0;
          break;
        case "strong":
          inserterFn("bold");
          buf = [];
          index = 0;
          break;
        case "i":
          inserterFn("italic");
          buf = [];
          index = 0;
          break;
        case "br":
          inserterFn("linebreak");
          buf = [];
          index = 0;
          break;
        case "sup":
          inFootnote = true;
          // yes, this fires even at </sup>, but that is of no consequence for the logic
          italicsFnCount = 0;
          break;
        default:
          break;
      }
      continue;
    }

    if (!inFootnote) {
      buf[index++] = word;
    }
  }

  // add in the last words, if the text snippet does not end with a tag
  let snippet = buf.join("");
  if ( snippet != "" ) {
    element.insertText(textLength, snippet);
    let snippetIndex = snippet.length - 1;
    element.setBold(textLength, textLength+snippetIndex, false); 
    element.setItalic(textLength, textLength+snippetIndex, false);
  }
}

Current output: still bad Desired output: best

shman613
  • 196
  • 1
  • 12
  • I have to apologize for my poor English skill. Unfortunately, I cannot understand your question. In order to correctly understand your question, can you provide the sample input and output situations you expect? First, I would like to correctly understand your question. – Tanaike Jul 09 '23 at 23:17
  • @Tanaike edited to include screenshots. – shman613 Jul 09 '23 at 23:28
  • Thank you for replying. From your reply, I proposed a modified script as an answer. Please confirm it. If I misunderstood your question, I apologize. – Tanaike Jul 09 '23 at 23:42
  • Thank you for replying. About `No need to apologize - thanks for all you've sent so far! Let me edit and just put in the implementation at this point...`, when I saw your updated question, unfortunately, your question is different from your initial question. In this case, I think that my answer was not useful for your situation. Because my answer is for your initial question. By this, I have to delete my answer. I think that this is due to my poor skill. I deeply apologize for this. – Tanaike Jul 10 '23 at 08:13
  • definitely not on you! I just thought I had a minimal reproducible example and I guess I didn't...thanks for all of your help in any event! – shman613 Jul 10 '23 at 13:22
  • Thank you for replying. Although, unfortunately, I cannot know your actual script, if it supposes that your updated script is your actual script, how about the posted modified script? But, I cannot know your actual script. So, when you have been using another script again, this script might not be able to be used. Please be careful about this. And, if my answer was not useful, I apologize again. – Tanaike Jul 11 '23 at 02:08

1 Answers1

1

From your updated question, although I'm not so sure if I understand it correctly, how about modifying insertReference() as follows:

### From:
        let hebText = table.getCell(1, 1)
          .setText("")
          .insertParagraph(0, "")
          .setLeftToRight(false)
          .editAsText();
        insertRichTextFromHTML(hebText, data.he);
    }

### To:
      let hebText = table.getCell(1, 1)
        .setText("")
        .insertParagraph(0, "")
        .setLeftToRight(false)
        .editAsText();
      insertRichTextFromHTML(hebText, data.he);

      // I added the below script.
      for (let r = 0; r < table.getNumRows(); r++) {
        const row = table.getRow(r);
        for (let c = 0; c < row.getNumCells(); c++) {
          const cell = row.getCell(c);
          const n = cell.getNumChildren();
          cell.getChild(n - 1).removeFromParent();
        }
      }
    }

or

      let hebText = table.getCell(1, 1)
        .setText("")
        .insertParagraph(0, "")
        .setLeftToRight(false)
        .editAsText();
      insertRichTextFromHTML(hebText, data.he);

      // I added the below script.
      for (let r = 0; r < table.getNumRows(); r++) {
        const row = table.getRow(r);
        for (let c = 0; c < row.getNumCells(); c++) {
          const cell = row.getCell(c);
          const n = cell.getNumChildren();
          for (let t = 0; t < n; t++) {
            const child = cell.getChild(t);
            if (child.asParagraph().getText().trim() == "") {
              child.removeFromParent();
            }
          }
        }
      }
    }
  • I guessed that in your added script, the 1st insertParagraph(0, "") might be the reason for your current issue. So, I added the script for removing it. But, I do not know your actual script. So, when you use another script again, you might not be able to use the above script. Please be careful about this.
Tanaike
  • 181,128
  • 11
  • 97
  • 165
  • Perhaps my MRE wasn't really an MRE - I have `let hebTitle = table.getCell(0, 1).setText("").insertParagraph(0, "").setLeftToRight(false).editAsText(); insertRichTextFromHTML(hebTitle, data.heRef);` where the insertRichTextFromHTML parses an HTML string with bolds and italics and, piece by piece, calls `hebTitle.insertText(currentIndex, processedTextN)` for, say, the first N 100 pieces. This still yields the extra space... – shman613 Jul 10 '23 at 00:33
  • @shman613 Thank you for replying. About `Perhaps my MRE wasn't really an MRE - I have let hebTitle = table.getCell(0, 1).setText("").insertParagraph(0, "").setLeftToRight(false).editAsText(); insertRichTextFromHTML(hebTitle, data.heRef); where the insertRichTextFromHTML parses an HTML string with bolds and italics and, piece by piece, calls hebTitle.insertText(currentIndex, processedTextN) for, say, the first N 100 pieces. This still yields the extra space...`, what is `insertRichTextFromHTML(hebTitle, data.heRef);`? – Tanaike Jul 10 '23 at 02:09
  • @shman613 Unfortunately, I cannot find it in your question. I deeply apologize for this. For example, your showing script is different from your actual situation? – Tanaike Jul 10 '23 at 02:10
  • @shman613 When I asked `can you provide the sample input and output situations you expect?`, you added `I would like the output to look like this:` and `Instead it comes out as this:`. Unfortunately, when my proposed script is run, it seems that your expected result is obtained. Unfortunately, I cannot understand your reply and your situation. I really apologize for this. – Tanaike Jul 10 '23 at 02:12
  • No need to apologize - thanks for all you've sent so far! Let me edit and just put in the implementation at this point... – shman613 Jul 10 '23 at 06:22
  • @shman613 Thank you for replying. About `No need to apologize - thanks for all you've sent so far! Let me edit and just put in the implementation at this point...`, when I saw your updated question, unfortunately, your question is different from your initial question. In this case, I think that my answer was not useful for your situation. Because my answer is for your initial question. By this, I have to delete my answer. I think that this is due to my poor skill. I deeply apologize for this. – Tanaike Jul 10 '23 at 08:13
  • Fantastic! that works!! And it is a testament to your humility that someone as prominent as yourself in the Google Apps Script world would claim that they have "poor skill." Many thanks – shman613 Jul 11 '23 at 08:03
  • @shman613 Thank you for replying. I think that I want to study more and have a lot of things for studying. Also, I could study from your question. Thank you, too. And, I'm glad your issue was resolved. – Tanaike Jul 11 '23 at 11:15