0

I am writing a set of scripts in Google App Script, for a Google Sheet. I created a user interface to provide a web page experience for the Google Sheet, acting as a discussion forum. I store discussions in a recursive pattern, in the spread sheet, like this:

  ID    |    Parent_ID  |   Title      |      Note                | Forum_ID   | Is_Active  

1       |   0           |  Some Title  |       Some Discussion    | 100        | True

2       |   0           |  Some Title  |       Some Discussion    | 100        | True

3       |   2           |  Some Title  |       Some Discussion    | 100        | True

4       |   3           |  Some Title  |       Some Discussion    | 100        | True

5       |   2           |  Some Title  |       Some Discussion    | 100        | True

So, the nesting can be of an indeterminate number of levels. When the page loads, it call a script, passing in an ID, which will always be the root ID of upper node (where the Parent_ID is always 0). I then return a node, with all nested children. I then need to use the ID of each record in the nested group, to perform a couple of calculations. The code I have works, but can be very slow. To iterate over 20 records can take upwards of 15 seconds. Can anyone provide feedback on how to speed this up and make the code more efficient? Here is my code:

function GetMessageBoardChildren(message_id) {
    console.time('Gettingcomments') //this block of code can take around 2-3 seconds to run through (where the number of records is about 50)
    var ss = SpreadsheetApp.openById(SPREAD_SHEET_ID);
    var sheet = ss.getSheetByName(MESSAGE_BOARD);
    var rows = sheet.getDataRange().getValues();               
    var newArray = [];
    var CommentsArray = [];   
    for (var i = 1; i < rows.length; i++) {
        var row = rows[i];     
        if (row[5] == 1) {        
             CommentsArray.push({
                post_id: row[0].toString(),
                parent_id: row[1].toString(),
                forum_id: row[2].toString(),
                title: htmlEscape(row[3].toString()),
                message: htmlEscape(row[4].toString()),
                is_active: row[5].toString(),
                date_created: ConvertUnixTimeStampToDateTime(row[6].toString()),
                created_by_id: row[8].toString()      
            })
        }    
    }   
    console.timeEnd('Gettingcomments') 
   
    console.time('BuildgingTree')// this is fast.  couple of miliseconds
    tree = nest( CommentsArray );  
    for(var j = 0; j<tree.length; j++){ 
      if(tree[j].post_id == message_id){
         newArray.push(tree[j])
      }
    }
   console.timeEnd('BuildgingTree')
   
   console.time('recursing') //this can take 11-15 seconds to complete
   var t = recursePosts(newArray);
   console.timeEnd('recursing')
   return t;  

}

function recursePosts(posts){      
  for(var i=0;i<posts.length;i++){
    if(posts[i].children){
      recursePosts(posts[i].children)
     } 
     console.time('GettingVotes')
     var voteCount = GetVotesByCommentId(posts[i].post_id); 
     posts[i].number_up_posts = voteCount.upVote,
     posts[i].number_down_posts = voteCount.downVote
     console.timeEnd('GettingVotes')
     console.time('GettingUsername')
     posts[i].created_by = GetUserNameByUserId( posts[i].created_by_id);
     console.timeEnd('GettingUsername')
     posts[i].created_by_id="";     
  }
  return posts;
}

const nest = (items, id = '0', link = 'parent_id') =>items
    .filter(item => (item[link] === id )  )
    .map(item => ({ ...item,children: nest(items, item.post_id) }));

function ConvertUnixTimeStampToDateTime(unix_timestamp) {
    var a = new Date(unix_timestamp * 1000);
    var months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
    var year = a.getFullYear();
    var month = months[a.getMonth()];
    var date = a.getDate();
    var hour = a.getHours();
    var min = a.getMinutes();
    var sec = a.getSeconds();
    var time = a.getMonth() + "/" + date + "/" + year + " " + hour + ":" + min + ":" + sec;   
    return time;

}
function htmlEscape(str){
//prevent injection
    return str.replace(/<[^>]+>/g, "")
}
function GetUserNameByUserId(ID){
   var ss = SpreadsheetApp.openById(SPREAD_SHEET_ID);
   var sheet = ss.getSheetByName(USERS);
   var rows = sheet.getDataRange().getValues();
   var userName = "";
   
   for (var i = 1; i < rows.length; i++) {
     var row = rows[i];
     if(row[0] === ID){
       userName = row[3];
       break;
     }
   }
   if(userName == ""){
     userName = "Admin"
   }
   return userName;    
}

function GetVotesByCommentId(comment_id) {
    var ss = SpreadsheetApp.openById(SPREAD_SHEET_ID);
    var sheet = ss.getSheetByName(COMMENT_VOTES);
    var rows = sheet.getDataRange().getValues();
    var countUp = 0;
    var countDown = 0;

    for (var i = 0; i < rows.length; i++) {
      var row = rows[i];
        if (row[1] === comment_id && row[2] === 1) {
            countUp++;
        }else if (row[1] === comment_id && row[2] === -1){
          countDown++;
        }        
    }
    return {upVote:countUp, downVote:countDown};
}
jason
  • 3,821
  • 10
  • 63
  • 120
  • 1
    Provide logs along with [mcve]( many functions are missing) – TheMaster Dec 03 '20 at 17:07
  • @TheMaster Sure, what Logs would you like? I put the log outputs (for the times) in the code's comments. I am not tracking any other logs, but I can add more logging? I can also add the other functions, but thought it would overly complicate the example. But, I will add them – jason Dec 03 '20 at 17:11
  • I'd suggest moving the variable declarations into their own time block to see if the spreadsheet operations are taking most of the time or whether they are quick. Without that delineation, you don't even know if it's your code that's being slow. Also, JS is a loosely typed language, so doing a `toString` is likely a useless operation. There are times when it's needed, but likely not here. – computercarguy Dec 03 '20 at 17:11
  • Now, I am thinking the issue may be the server environment. Everything was acting real slow. Even the Stackdriver logs, in the Apps Script Dashboard, would take minutes to run. But, now everything seems fine. Still, if my code can be better optimized, it might help when our network starts acting up. So, any feedback would b e greatly appreciated! – jason Dec 03 '20 at 17:48
  • I assume that all of the post id's in column 1 are unique and that all messages with the same parent are in the same thread. So if you look for all of the messages in a given thread why load the entire message board to get it. Why not just load only the messages that have the same parent id which should not require are recursion. Or perhaps I just don't understand what you are doing. – Cooper Dec 03 '20 at 18:23
  • @Cooper yes, all ID's are unique. The Title and Notes would be unique (sorry for the bad example). Yes, you can use the ID and Parent_ID to build a tree of the thread. Each node can have an indeterminate number of children. So, a child note can have N number of children, recursively. A list of child notes two levels deep would no longer have the Parent_ID of the root. Though, I suppose I could have a field for Root_ID, that would alleviate that issue. But, its not very normalized. But, considering I am using a spread sheet, maybe there would be some value there? – jason Dec 03 '20 at 18:43

1 Answers1

3

The function recursePosts is reloading and reinitializing multiple variables like ss, sheet and rows multiple times in the inner functions. You should make those variables constant in the parent scope and call those methods only once

const config = {
  ss: null,
  mb_sheet: null,
  users_sheet: null,
  comments_sheet: null,
  mb_rows: null,
  users_rows: null,
  comments_rows: null,
};

function initAll_() {
  config.ss = SpreadsheetApp.openById(SPREAD_SHEET_ID);
  [config.mb_sheet, config.users_sheet, config.comments_sheet] = [
    MESSAGE_BOARD,
    USERS,
    COMMENT_VOTES,
  ].map(name => config.ss.getSheetByName(name));
  [config.mb_rows, config.users_rows, config.comments_rows] = [
    config.mb_sheet,
    config.users_sheet,
    config.comments_sheet,
  ].map(sheet => sheet.getDataRange().getValues());
}


function GetMessageBoardChildren(message_id) {
  /*Initialize everything once and only once*/initAll_();
  console.time('Gettingcomments'); 
  //Removed var ss = SpreadsheetApp.openById(SPREAD_SHEET_ID);
  // var sheet = ss.getSheetByName(MESSAGE_BOARD);
  var rows = /*Modified*/ config.mb_rows;
  /*stuff*/
}

function GetUserNameByUserId(ID) {
  // var ss = SpreadsheetApp.openById(SPREAD_SHEET_ID);
  // var sheet = ss.getSheetByName(USERS);
  var rows = config.users_rows
  /*stuff*/
}

function GetVotesByCommentId(comment_id) {
  // var ss = SpreadsheetApp.openById(SPREAD_SHEET_ID);
  // var sheet = ss.getSheetByName(COMMENT_VOTES);
  var rows = config.comments_rows;
  /*stuff*/
}

If you want modular loading of variables, you can use the lazy loading technique described here

/**
 * A sheet configuration object containing class sheet and
 *   it's full datarange values
 * @typedef {Object} SheetConfig
 * @property {GoogleAppsScript.Spreadsheet.Sheet} sheet
 * @property {Object[][]} values
 */

/**
 * Global configuration object
 * @type {{
 * ss:GoogleAppsScript.Spreadsheet.Spreadsheet,
 * [x:string]:SheetConfig|GoogleAppsScript.Spreadsheet.Spreadsheet}}
 */
const config = {
  get ss() {
    delete this.ss;
    return (this.ss = SpreadsheetApp.getActive());
  },
};
(function addSheetGettersToConfig_() {
  /*Add 3 {SheetConfig}  to config object*/
  [MESSAGE_BOARD,USERS,COMMENT_VOTES,].forEach(name =>
    Object.defineProperty(config, name, {
      enumerable: true,
      configurable: true,
      get: function() {
        delete this[name];
        return (this[name] = {
          sheet: this.ss.getSheetByName(name),
          get values() {
            delete this.values;
            return (this.values = this.sheet.getDataRange().getValues());
          },
        });
      },
    })
  );
})();

console.log('config before');
console.log(config);

function test_lazyLoading() {
  console.log(config[MESSAGE_BOARD].values);
  console.log('config after');
  console.log(config);
}
TheMaster
  • 45,448
  • 6
  • 62
  • 85
  • I like that! Thank you! – jason Dec 03 '20 at 21:56
  • Can I ask, how will this scale when more and more data is added to the sheets? It seems like every time I call initAll_() it gets all rows, from all sheets, that are mapped in the function. When I console.log(config), after calling initAll_() I can see the data from all sheets. – jason Dec 04 '20 at 01:13
  • @jason You should call ``initAll_`` **only once**. All data is then loaded in the global ``config`` constant and is accessible across all functions. Notice how I called ``initAll_()`` only once in `GetMessageBoardChildren` and **not** in every other function. Try timing now and see how long it takes. – TheMaster Dec 04 '20 at 04:44
  • Thanks. This is written for Google App Scripts, run on the server. Some functions are called when the page loads, others called in async calls from the front end. The GAS does have a doGet function that gets called when the page loads, but calling initAll_ there initializes the variables, but because the back end doesnt have sessions or anything, I can't store those variables beyond the context of the initial call. I would need to call initAll_ every time a function is called. Would it make more sense to structure initAll_ to only initialize the sheets/rows needed for a specific context? – jason Dec 04 '20 at 14:39
  • Though, I do like the idea of keeping all the code clean, like you have it and only initializing when needed. I am just not sure how as this code would be run on the server side of the web page? – jason Dec 04 '20 at 14:40
  • @jason You would use "lazy loading". I edited my answer – TheMaster Dec 04 '20 at 20:26