For a project of mine I have to analyze a bunch of simple html formatted strings and further process them.
Let's say I have a string like:
Hey <font face="Times New Roman" size="14">this text look <i>cool<i> and <b>fancy <i>very fancy<i></b> however </font> I'm ok <u>with</u> it.
What I would like to obtain with my recursive function is an array with various objects as child structured as follows:
var myArr = myParse(myString);
myArr[0] = {
"text": "Hey",
}
myArr[0] = {
"text": "this text look ",
"face": "Times New Roman",
"size": 14
}
myArr[1] = {
"text": "cool",
"face": "Times New Roman",
"size": 14,
"italic": true
}
myArr[2] = {
"text": " and ",
"face": "Times New Roman",
"size": 14
}
myArr[3] = {
"text": "fancy ",
"face": "Times New Roman",
"size": 14,
"bold": true
}
myArr[4] = {
"text": "fancy ",
"face": "Times New Roman",
"size": 14,
"bold": true
}
myArr[5] = {
"text": "very fancy",
"face": "Times New Roman",
"size": 14,
"italic": true,
"bold": true
}
myArr[6] = {
"text": " however ",
"face": "Times New Roman",
"size": 14
}
myArr[6] = {
"text": " I'm ok "
}
myArr[7] = {
"text": "with",
"underline": true
}
myArr[8] = {
"text": " it."
}
I tried to structure it as recursive function but I'm unsure how to proceed to make it completely functional..
function myParse(str, arr) {
if(!arr) arr = [];
var regex = /<font(.+?)>(.+?)<\/font>|<i>(.+?)<\/i>|<b>(.+?)<\/b>|<u>(.+?)<\/u>/g;
var match = regex.exec(str);
while (match != null) {
for (var i = 0; i < match.length; i++) {
// this way i can identify with matches:
// match[1] - font specifics
// match[2] - font tag content
// match[3] - italic tag content
// match[4] - bold tag content
// match[5] - underline tag content
var temp_object = {};
var temp_object.text = matched_text;
// process here a second regex to obtain font name and size.. like:
var regex = /face="(.+?)"|size="(.+?)"/g;
...
var temp_object.italic = match[3] ? true : false;
var temp_object.bold = match[4] ? true : false;
var temp_object.underline = match[5] ? true : false;
// at some point i'm pretty sure that i have to..
// put a marker let's say a
var marker = Math.floor(Math.random() * 5000).toString();
str.replace(matched_text, marker)
// then recurse the parse(str, arr)
}
match = regex.exec(str);
}
}