1

I am trying to parse a large json object with mostly useless information. I'm trying to access all '_text' keys and their values and put them in an array. EX:

const myObj = {
"_declaration": {
    "_attributes": {
        "version": "1.0",
        "encoding": "UTF-8",
        "standalone": "yes"
    }
},
"w:document": {
    "_attributes": {
        "xmlns:wpc": "http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas",
        "xmlns:cx": "http://schemas.microsoft.com/office/drawing/2014/chartex",
        "xmlns:cx1": "http://schemas.microsoft.com/office/drawing/2015/9/8/chartex",
        "xmlns:cx2": "http://schemas.microsoft.com/office/drawing/2015/10/21/chartex",
        "xmlns:cx3": "http://schemas.microsoft.com/office/drawing/2016/5/9/chartex",
        "xmlns:cx4": "http://schemas.microsoft.com/office/drawing/2016/5/10/chartex",
        "xmlns:cx5": "http://schemas.microsoft.com/office/drawing/2016/5/11/chartex",
        "xmlns:cx6": "http://schemas.microsoft.com/office/drawing/2016/5/12/chartex",
        "xmlns:cx7": "http://schemas.microsoft.com/office/drawing/2016/5/13/chartex",
        "xmlns:cx8": "http://schemas.microsoft.com/office/drawing/2016/5/14/chartex",
        "xmlns:mc": "http://schemas.openxmlformats.org/markup-compatibility/2006",
        "xmlns:aink": "http://schemas.microsoft.com/office/drawing/2016/ink",
        "xmlns:am3d": "http://schemas.microsoft.com/office/drawing/2017/model3d",
        "xmlns:o": "urn:schemas-microsoft-com:office:office",
        "xmlns:r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
        "xmlns:m": "http://schemas.openxmlformats.org/officeDocument/2006/math",
        "xmlns:v": "urn:schemas-microsoft-com:vml",
        "xmlns:wp14": "http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing",
        "xmlns:wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
        "xmlns:w10": "urn:schemas-microsoft-com:office:word",
        "xmlns:w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
        "xmlns:w14": "http://schemas.microsoft.com/office/word/2010/wordml",
        "xmlns:w15": "http://schemas.microsoft.com/office/word/2012/wordml",
        "xmlns:w16cid": "http://schemas.microsoft.com/office/word/2016/wordml/cid",
        "xmlns:w16se": "http://schemas.microsoft.com/office/word/2015/wordml/symex",
        "xmlns:wpg": "http://schemas.microsoft.com/office/word/2010/wordprocessingGroup",
        "xmlns:wpi": "http://schemas.microsoft.com/office/word/2010/wordprocessingInk",
        "xmlns:wne": "http://schemas.microsoft.com/office/word/2006/wordml",
        "xmlns:wps": "http://schemas.microsoft.com/office/word/2010/wordprocessingShape",
        "mc:Ignorable": "w14 w15 w16se w16cid wp14"
    },
    "w:body": {
        "w:p": [
            {
                "_attributes": {
                    "w14:paraId": "53160B82",
                    "w14:textId": "77777777",
                    "w:rsidR": "00484DC0",
                    "w:rsidRDefault": "00484DC0",
                    "w:rsidP": "00264A78"
                },
                "w:pPr": {
                    "w:spacing": {
                        "_attributes": {
                            "w:after": "0",
                            "w:line": "240",
                            "w:lineRule": "auto"
                        }
                    },
                    "w:rPr": {
                        "w:sz": {
                            "_attributes": {
                                "w:val": "36"
                            }
                        },
                        "w:szCs": {
                            "_attributes": {
                                "w:val": "36"
                            }
                        }
                    }
                }
            },
            {
                "_attributes": {
                    "w14:paraId": "0FC460F5",
                    "w14:textId": "77777777",
                    "w:rsidR": "00880E84",
                    "w:rsidRDefault": "00880E84",
                    "w:rsidP": "00264A78"
                },
                "w:pPr": {
                    "w:spacing": {
                        "_attributes": {
                            "w:after": "0",
                            "w:line": "240",
                            "w:lineRule": "auto"
                        }
                    },
                    "w:rPr": {
                        "w:sz": {
                            "_attributes": {
                                "w:val": "36"
                            }
                        },
                        "w:szCs": {
                            "_attributes": {
                                "w:val": "36"
                            }
                        }
                    }
                },
                "w:r": {
                    "w:rPr": {
                        "w:sz": {
                            "_attributes": {
                                "w:val": "36"
                            }
                        },
                        "w:szCs": {
                            "_attributes": {
                                "w:val": "36"
                            }
                        }
                    },
                    "w:t": {
                        "_text": "Teaching Guide/Lesson Template"
                    }
                }
            }
            ...More of the object, this is the JSON format that I 
            then parse using JSON.parse in my code to read
const myArray = /*An array*/
//myArray should be ["Some text", "that I want", "in an array"]

The order does matter I tried this with a recursive function but the object is too large and I'm exceeding the maximum call stack. Is there any function or library out there that I can use for this functionality?

1 Answers1

2

Make a recursive function with Object.keys like so:

function getText(obj) {
  let text = [];
  Object.keys(obj).forEach(key => {
    if (key == "_text") {
      text.push(obj[key]);
    }
    if (typeof obj[key] == "object") {
      text.push(getText(obj[key]));
    }
  });
  const flatten = function(arr, result = []) {
    for (let i = 0, length = arr.length; i < length; i++) {
      const value = arr[i];
      if (Array.isArray(value)) {
        flatten(value, result);
      } else {
        result.push(value);
      }
    }
    return result;
  };
  return flatten(text);
}

const myObj = {
  object1: {
    deeperLevel: {
      _text: "Some text"
    }
  },
  object2: {
    _text: "that I want"
  },
  object3: {
    deeperLevel: {
      EvenDeeper: {
        _text: "in an array"
      }
    }
  }
};

const myArr = getText(myObj);

console.log(myArr);

Flattening algorithm from this answer.

Jack Bashford
  • 43,180
  • 11
  • 50
  • 79
  • The OP said *I tried this with a recursive function but the object is too large and I'm exceeding the maximum call stack*. Why are you suggesting a recursive solution? – Ele Feb 26 '19 at 23:21
  • 1
    The OP may have had a bug in the recursive function - returning a call with no filtering (`function recursive(item) { return recursive(item) }`). – Jack Bashford Feb 26 '19 at 23:22
  • I tried your recursive function, and it didn't error out, but it also returned an empty array. I'll edit my code to reflect a more precise example, with only one _text attribute. – David Westwood Feb 26 '19 at 23:39
  • Erm...it runs perfectly above with the object you provided. – Jack Bashford Feb 26 '19 at 23:40
  • I just edited with a snippet of the object I'm dealing with, in JSON format. hopefully that helps. – David Westwood Feb 26 '19 at 23:43
  • The recursive function worked, (I was calling the function when I was previously trying to go deeper into the object - and was calling it with an object that didn't have the property _text) – David Westwood Feb 26 '19 at 23:45