I have this assignment for my university https://cs1331.gitlab.io/fall2018/hw2/hw2-source-model.html. I wrote the code but when I run the program I get this message at the console :
Exception in thread "main" java.lang.StringIndexOutOfBoundsException: begin 0, end -1, length 2
at java.base/java.lang.String.checkBoundsBeginEnd(String.java:3107)
at java.base/java.lang.String.substring(String.java:1873)
at homework1.SourceModel.main(SourceModel.java:127)
Here is my code for this assignment with comments :
package homework1;
import java.util.Scanner;
import java.io.File;
import java.io.FileNotFoundException;
public class SourceModel {
//initialize variables so they can be accessed everywhere
private String modelName;
private int[][] characterCount;
private double[] rowCount;
private double[][] probability;
/**
*
* @param name takes the name of the corpus
* @param fileName takes the filesName of corpus
*/
public SourceModel(String name, String fileName) {
modelName = name;
characterCount = new int[26][26];
rowCount = new double[26];
probability = new double[26][26];
System.out.println("Training " + name + "model...");
try {
Scanner scan = new Scanner(new File(fileName));
String temp = "";
//append all of the text
while (scan.hasNext()) {
temp += scan.next();
}
//only keeps the letters and makes them lowercase
temp = temp.replaceAll("[^A-Za-z]+", "").toLowerCase();
System.out.println(temp);
//iterates trough each letter then puts the letters
//sequence to the respective row and column
for (int i = 0; i < (temp.length() - 1); i++) {
char firstLetter = temp.charAt(i);
char secondLetter = temp.charAt(i + 1);
//index based on ASCII values
characterCount[(int) firstLetter - 97][(int) secondLetter - 97]++;
rowCount[(int) firstLetter - 97]++;
}
//calculates the probability by dividing the count
//by the total counts in each row
for (int i = 0; i < probability.length; i++) {
for (int j = 0; j < probability[i].length; j++) {
if (rowCount[i] == 0) {
rowCount[i] = 0.01;
}
probability[i][j] = (((double) characterCount[i][j]) / rowCount[i]);
if (probability[i][j] == 0) {
probability[i][j] = 0.01;
}
}
}
System.out.println("done");
}
catch (FileNotFoundException e) {
e.printStackTrace();
}
}
/**
*
* @return a string which contains the name
*/
public String getName() {
return modelName;
}
/**
* @return a string with the matrix
*/
public String toString() {
String matrix = "";
matrix += "";
for (int i = 97; i < 123; i++) {
matrix += " ";
matrix += (char) i;
}
matrix += ("\n");
for (int i = 0; i < probability.length; i++) {
matrix += ((char) (i + 97) + " ");
for (int j = 0; j < probability[i].length; j++) {
matrix += String.format("%.2f", probability[i][j]);
matrix += ("");
}
matrix += "\n";
}
return matrix;
}
/**
*
* @param test a set of letters to test
* @return the probability for the word
*/
public double probability(String test) {
test = test.replaceAll("[^A-Za-z]+", "").toLowerCase();
double stringProbability = 1.0;
for (int i = 0; i < test.length() - 1; i++) {
int firstIndex = (int) (test.charAt(i)) - 97;
int secondIndex = (int) (test.charAt(i + 1)) - 97;
stringProbability *= probability[firstIndex][secondIndex];
}
return stringProbability;
}
/**
*
* @param args the command line arguments
*/
public static void main(String[] args) {
SourceModel[] models = new SourceModel[args.length - 1];
for (int i = 0; i < args.length - 1; i++) {
models[i] = new SourceModel(args[i].substring(0, args[i].indexOf(".")), args[i]);
}
System.out.println("Analyzing: " + args[args.length - 1]);
double[] normalizedProbability = new double[args.length - 1];
double sumProbability = 0;
for (int i = 0; i < args.length - 1; i++) {
sumProbability += models[i].probability(args[args.length - 1]);
}
//normalize the probability in respect to the values given
for (int i = 0; i < normalizedProbability.length; i++) {
normalizedProbability[i] = models[i].probability(args[args.length - 1]) / sumProbability;
}
int highestIndex = 0;
for (int i = 0; i < args.length - 1; i++) {
System.out.print("Probability that test string is");
System.out.printf("%9s: ", models[i].getName());
System.out.printf("%.2f", normalizedProbability[i]);
System.out.println("");
if (normalizedProbability[i] > normalizedProbability[highestIndex]) {
highestIndex = i;
}
}
System.out.println("Test string is most likely " + models[highestIndex].getName() + ".");
}
}