I am trying to get this piece of Java code to lexically analyze the phrase "(sum + 47) / total" and spit it out as:
Next token is: 25 Next lexeme is (
Next token is: 11 Next lexeme is sum
Next token is: 21 Next lexeme is +
Next token is: 10 Next lexeme is 47
Next token is: 26 Next lexeme is )
Next token is: 24 Next lexeme is /
Next token is: 11 Next lexeme is total
Next token is: -1 Next lexeme is EOF
However, it comes out as this instead:
Next token is: 25 Next lexeme is (
Next token is: 11 Next lexeme is um
Next token is: 21 Next lexeme is +
Next token is: 10 Next lexeme is 47
Next token is: 24 Next lexeme is /
Next token is: 11 Next lexeme is total
I know that I am screwing up something for EOF to not show up, but I cannot figure out why it cut off the "s" in sum and ")" after the 47. Here is my code for reference. Please let me know if there is anything I need to do to this post, as this is my first one.
import java.io.*;
import java.util.*;
public class Main
{
private static final int LETTER=0;
private static final int DIGIT=1;
private static final int UNKNOWN=99;
private static final int EOF=-1;
private static final int INT_LIT=10;
private static final int IDENT=11;
private static final int ASSIGN_OP=20;
private static final int ADD_OP=21;
private static final int SUB_OP=22;
private static final int MULT_OP=23;
private static final int DIV_OP=24;
private static final int LEFT_PAREN=25;
private static final int RIGHT_PAREN=26;
private static int charClass;
private static char lexeme[];
private static char nextChar;
private static int lexLen;
private static int token;
private static int nextToken;
private static File file;
private static FileInputStream fis;
public static int lookup(char ch)
{
switch (ch)
{
case '(':
addChar();
nextToken = LEFT_PAREN;
break;
case ')':
addChar();
nextToken = RIGHT_PAREN;
break;
case '+':
addChar();
nextToken = ADD_OP;
break;
case '-':
addChar();
nextToken = SUB_OP;
break;
case '*':
addChar();
nextToken = MULT_OP;
break;
case '/':
addChar();
nextToken = DIV_OP;
break;
default:
addChar();
nextToken = EOF;
break;
}
return nextToken;
}
public static void addChar()
{
if (lexLen <= 98)
{
lexeme[lexLen++] = nextChar;
lexeme[lexLen] = 0;
}
else
System.out.println("Error -lexeme is too long\n");
}
public static void getChar()
{
try
{
if(fis.available()>0)
{
nextChar=(char)fis.read();
if(Character.isLetter(nextChar))
charClass=LETTER;
else if(Character.isDigit(nextChar))
charClass=DIGIT;
else
charClass=UNKNOWN;
}
else
charClass=EOF;
}
catch(IOException e)
{
e.printStackTrace();
}
}
public static void getNonBlank()
{
while(Character.isSpaceChar(nextChar))
getChar();
}
public static int lex()
{
lexLen = 0;
getNonBlank();
switch (charClass)
{
/* parse identifiers */
case LETTER:
addChar();
getChar();
while (charClass == LETTER || charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = IDENT;
break;
/* parse integer literals and integers */
case DIGIT:
addChar();
getChar();
while(charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = INT_LIT;
break;
/* parentheses and operators */
case UNKNOWN:
lookup(nextChar);
getChar();
break;
/* EOF */
case EOF:
nextToken = EOF;
break;
} /* end of switch */
System.out.print("Next token is :"+nextToken+" Next lexeme is :");
for(int i=0;i<lexLen;i++)
System.out.print(lexeme[i]);
System.out.println();
return nextToken;
}
public static void main(String args[])
{
lexLen=0;
lexeme=new char[100];
for(int i=0;i<100;i++)
lexeme[i]='0';
file = new File("input1.txt");
if (!file.exists())
{
System.out.println( "input1.txt does not exist.");
return;
}
if (!(file.isFile() && file.canRead()))
{
System.out.println(file.getName() + " cannot be read.");
return;
}
try
{
fis = new FileInputStream(file);
char current;
while (fis.available() > 0)
{
getChar();
// System.out.println(nextChar+" "+charClass);
lex();
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
}