I am attempting to build a parser for BBCode in JavaScript that will allow me to transpile a string with BBCode in it to a string with HTML. I have in my head how it is all supposed to work and I even have two of the parser steps built.
Right now the entire process of the parser can be described as
- Get input
- Break input into tokens (tokenize)
- Add information about the tokens (lex)
- Build the AST from the tokens (parse)
- Clean up the AST based on grammar rules (clean)
- Evaluate AST and transform to HTML (evaluate)
- Return the HTML string
I have a general idea of how to do all of this in my head except for step four.
When I reached step four I ran into a problem when building the AST. The problem was how would I go about recursively building this tree. I have in the past recursively built two dimensional arrays but a variable depth tree is way out of my scope of abilities.
In my head I think that the tree should look something like this:
// Hello, [b]World![/b]
{
"text": "Hello, ",
"tag": {
"type": "b",
"text": "World!"
}
}
But when trying to generate this I have an issue with recursively building this down.
A more complex example would be as follows:
// [c=red]Hello Tom, [/c][b][c=green]how are you?[/c][/b]
{
"tag": {
type: "c",
"parameters": "red",
"text": "Hello Tom, "
"tag": {
"type": "b",
"tag": {
"type": "c",
"parameters": "green",
"text": "how are you?"
}
}
}
}
The main issue I run across is keeping my place while building down without accidentally overwriting the entire tree.
Currently the code I am using is:
var bbcode = {};
bbcode._tokens = {
'TO_DEL': '[',
'TC_DEL': ']',
'TE_DEL': '/',
'EQ_DEL': '='
};
bbcode._tags = ['c', 'b'];
bbcode.parse = function(bbcode) {
var tokens = this._tokenize(bbcode);
tokens = this._lex(tokens);
var ast = this._parse(tokens);
console.log(JSON.stringify(ast, null, 4));
//return tokens;
};
bbcode._isToken = function(token) {
for (var k in this._tokens) {
if (this._tokens[k] === token) {
return true;
}
}
return false;
};
bbcode._isTag = function(token) {
return (this._tags.indexOf(token) > -1) ? true : false;
};
bbcode._getType = function(token) {
for (var k in this._tokens) {
if (this._tokens[k] === token) {
return k;
}
}
};
bbcode._next = function(tokens, curr) {
return tokens[curr + 1][0];
};
bbcode._previous = function(tokens, curr) {
return tokens[curr - 1][0];
};
bbcode._tokenize = function(bbcode) {
var tree = [];
var temp = '';
for (var i = 0; i < bbcode.length; i++) {
if (this._isToken(bbcode[i])) {
if (temp.length > 0) {
tree.push(temp);
temp = '';
}
tree.push(bbcode[i]);
} else {
temp += bbcode[i];
}
}
return tree;
};
bbcode._lex = function(tokens) {
var tree = [];
for (var i = 0; i < tokens.length; i++) {
if (this._isToken(tokens[i])) {
tree.push([this._getType(tokens[i]), tokens[i]]);
} else if (this._isTag(tokens[i])) {
tree.push(['BB_TAG', tokens[i]]);
} else {
tree.push(['BB_STRING', tokens[i]]);
}
}
return tree;
};
/*****************************************************************************/
/* I need help with the block below */
/*****************************************************************************/
bbcode._parse = function(tokens) {
var tree = {};
for (var i = 0; i < tokens.length; i++) {
if (tokens[i][0] === 'BB_STRING') {
if (tree['text']) {
tree['text'] += tokens[i][1];
} else {
tree['text'] = tokens[i][1];
}
} else if (tokens[i][0] === 'TO_DEL') {
if (this._next(tokens, i) === 'BB_TAG') {
tree['tag'] = {};
} else {
if (tree['text']) {
tree['text'] += tokens[i][1];
} else {
tree['text'] = tokens[i][1];
}
}
}
}
return tree;
};
/*****************************************************************************/