I have been working on a utility to parse text files in the format used by Paradox Interactive in their grand strategy games to be used with a visual-based modding tool I am also developing. I have a mostly-implemented, crude, early version of the parser written out and it is mostly working as intended. This is my second attempt at writing a text parser (the first, which ended up working just fine, parsed a subset of XML).
I speed-wrote my parser on the 9th and have spent all weekend trying to debug it, but all my efforts have failed. I have tracked the issue down to the 3rd line of nextChar()
. It was throwing an ArrayIndexOutOfBounds error with a crazy small number (in the -2 millions). After I added a bounds check the program just... continues. It reads all the information as needed, it just doesn't ever exit the parse loop.
The format is basically this:
car = {
model_year = 1966
model_name = "Chevy"
components = {
"engine", "frame", "muffler"
}
}
though I have yet to add support for nested lists like I plan, so my test string is:
car = {
model_year = 1966
model_name = "Chevy"
}
For both my understanding and anybody who would see my code, I tried to generously comment my code where I thought it might be necessary, though if any clarification is needed I would be happy to provide it.
My code:
/**
* Parses text files in the format used by Paradox Interactive in their computer games EUIV, CK2, and Stellaris.
*
* @author DJMethaneMan
* @date 12/9/2016
*/
public class Parser
{
private int pos, line, len, depth;
public String text;
private char[] script; //TODO: Initialize in the parse method
public Parser()
{
pos = 0;
line = 1;
len = 0;
depth = 0;
text = "car = {\n" +
" model_year = 1966 \n" +
" model_name = \"Chevy\"\n" +
"}\u0003";
//text = "Hello World";
//Car c = new Car();
//parse(text, c);
}
public static void main()
{
Car c = new Car();
Parser p = new Parser();
p.parse(p.text, c);
System.out.println("The model name is " + c.model_name);
System.out.println("The model year is " + c.model_year);
}
//TODO: Work
public void parse(String text, Parseable parsed)
{
char[] script = text.toCharArray();
this.script = script;
boolean next_char = false;
PARSE_LOOP:while(true)
{
char c;
if(next_char)
{
c = nextChar();
}
else
{
c = script[0];
next_char = true;
}
switch(c)
{
case 'A':
case 'a':
case 'B':
case 'b':
case 'C':
case 'c':
case 'D':
case 'd':
case 'E':
case 'e':
case 'F':
case 'f':
case 'G':
case 'g':
case 'H':
case 'h':
case 'I':
case 'i':
case 'J':
case 'j':
case 'K':
case 'k':
case 'L':
case 'l':
case 'M':
case 'm':
case 'N':
case 'n':
case 'O':
case 'o':
case 'P':
case 'p':
case 'Q':
case 'q':
case 'R':
case 'r':
case 'S':
case 's':
case 'T':
case 't':
case 'U':
case 'u':
case 'V':
case 'v':
case 'W':
case 'w':
case 'X':
case 'x':
case 'Y':
case 'y':
case 'Z':
case 'z':
case '_'://TODO: HERE
if(depth > 0) //
{
parsed.parseRead(buildWordToken(true), this);//Let the class decide how to handle this information. Best solution since I do not know how to implement automatic deserialization.
}
continueUntilChar('=', false); //A value must be assigned because it is basically a key value pair with {} or a string or number as the value
skipWhitespace();//Skip any trailing whitespace straight to the next token.
break;
case '{':
depth++;
break;
case '}':
depth--;
break;
case '\n':
line++;
break;
case ' ':
case '\t':
skipWhitespace();
break;
case '\u0003': //End of Text Character... Not sure if it will work in a file...
break PARSE_LOOP;
}
}
}
//Returns a string from the next valid token
public String parseString()
{
String retval = "";
continueUntilChar('=', false);
continueUntilChar('"', false);
retval = buildWordToken(false);
continueUntilChar('"', false); //Don't rewind because we want to skip over the quotation and not append it.
return retval;
}
//Returns a double from the next valid token
public double parseNumber()
{
double retval = 0;
continueUntilChar('=', false); //False because we don't want to include the = in any parsing...
skipWhitespace(); //In case we encounter whitespace.
try
{
retval = Double.parseDouble(buildNumberToken(false));
}
catch(Exception e)
{
System.out.println("A token at line " + line + " is not a valid number but is being passed as such.");
}
return retval;
}
/**********************************Utility Methods for Parsing****************************************/
protected void continueUntilChar(char target, boolean rewind)
{
while(true)
{
char c = nextChar();
if(c == target)
{
break;
}
}
if(rewind)
{
pos--;
}
}
protected void skipWhitespace()
{
while(true)
{
char c = nextChar();
if(!Character.isWhitespace(c))
{
break;
}
}
pos--;//Rewind because by default parse increments pos by 1 one when fetching nextChar each iteration.
}
protected String buildNumberToken(boolean rewind)
{
StringBuilder token = new StringBuilder();
String retval = "INVALID_NUMBER";
char token_start = script[pos];
System.out.println(token_start + " is a valid char for a word token."); //Print it.
token.append(token_start);
while(true)
{
char c = nextChar();
if(Character.isDigit(c) || (c == '.' && (Character.isDigit(peek(1)) || Character.isDigit(rewind(1))))) //Makes sure things like 1... and ...1234 don't get parsed as numbers.
{
token.append(c);
System.out.println(c + " is a valid char for a word token."); //Print it for debugging
}
else
{
break;
}
}
return retval;
}
protected String buildWordToken(boolean rewind)
{
StringBuilder token = new StringBuilder(); //Used to build the token
char token_start = script[pos]; //The char the parser first found would make this a valid token
token.append(token_start); //Add said char since it is part of the token
System.out.println(token_start + " is a valid char for a word token."); //Print it.
while(true)
{
char c = nextChar();
if(Character.isAlphabetic(c) || Character.isDigit(c) || c == '_')//Make sure it is a valid token for a word
{
System.out.println(c + " is a valid char for a word token."); //Print it for debugging
token.append(c); //Add it to the token since its valid
}
else
{
if(rewind)//If leaving the method will make this skip over a valid token set this to true.
{
//Rewind by 1 because the main loop in parse() will still check pos++ and we want to check the pos of the next char after the end of the token.
pos--;
break; //Leave the loop and return the token.
}
else //Otherwise
{
break; //Just leave the loop and return the token.
}
}
}
return token.toString(); //Get the string value of the token and return it.
}
//Returns the next char in the script by amount but does not increment pos.
protected char peek(int amount)
{
int lookahead = pos + amount; //pos + 1;
char retval = '\u0003'; //End of text character
if(lookahead < script.length)//Make sure lookahead is in bounds.
{
retval = script[lookahead]; //Return the char at the lookahead.
}
return retval; //Return it.
}
//Returns the previous char in the script by amount but does not decrement pos.
//Basically see peek only this is the exact opposite.
protected char rewind(int amount)
{
int lookbehind = pos - amount; //pos + 1;
char retval = '\u0003';
if(lookbehind > 0)
{
retval = script[lookbehind];
}
return retval;
}
//Returns the next character in the script.
protected char nextChar()
{
char retval = '\u0003';
pos++;
if(pos < script.length && !(pos < 0))
{
retval = script[pos]; //It says this is causing an ArrayIndexOutOfBoundsException with the following message. Shows a very large (small?) negative number.
}
return retval;
}
}
//TODO: Extend
interface Parseable
{
public void parseRead(String token, Parser p);
public void parseWrite(ParseWriter writer);
}
//TODO: Work on
class ParseWriter
{
}
class Car implements Parseable
{
public String model_name;
public int model_year;
@Override
public void parseRead(String token, Parser p)
{
if(token.equals("model_year"))
{
model_year = (int)p.parseNumber();
}
else if(token.equals("model_name"))
{
model_name = p.parseString();
}
}
@Override
public void parseWrite(ParseWriter writer)
{
//TODO: Implement along with the ParseWriter
}
}