123

What I am trying to do is read a .java file, and pick out all of the identifiers and store them in a list. My problem is with the .split() method. If you run this code the way it is, you will get ArrayOutOfBounds, but if you change the delimiter from "." to anything else, the code works. But I need to lines parsed by "." so is there another way I could accomplish this?

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;


public class MyHash {
    private static String[] reserved = new String[100];
    private static List list = new LinkedList();
    private static List list2 = new LinkedList();

    public static void main (String args[]){
        Hashtable hashtable  = new Hashtable(997);
        makeReserved();
        readFile();
        String line;
        ListIterator itr = list.listIterator();
        int listIndex = 0;
        while (listIndex < list.size()) {

            if (itr.hasNext()){
                line = itr.next().toString();
                //PROBLEM IS HERE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
                String[] words = line.split(".");  //CHANGE THIS AND IT WILL WORK
                System.out.println(words[0]);      //TESTING TO SEE IF IT WORKED
            }
            listIndex++;
        }
    }

    public static void readFile() {
        String text;
        String[] words;
        BufferedReader in = null;
        try {
            in = new BufferedReader(new FileReader("MyHash.java")); //NAME OF INPUT FILE


        } catch (FileNotFoundException ex) {
            Logger.getLogger(MyHash.class.getName()).log(Level.SEVERE, null, ex);
        }
        try {
            while ((text = in.readLine()) != null){
                text = text.trim();
                words = text.split("\\s+");
                for (int i = 0; i < words.length; i++){
                    list.add(words[i]);
                }
                for (int j = 0; j < reserved.length; j++){
                    if (list.contains(reserved[j])){
                        list.remove(reserved[j]);
                    }
                }


            }

        } catch (IOException ex) {
            Logger.getLogger(MyHash.class.getName()).log(Level.SEVERE, null, ex);
        }
        try {
            in.close();
        } catch (IOException ex) {
            Logger.getLogger(MyHash.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    public static int keyIt (int x) {
        int key = x % 997;
        return key;
    }

    public static int horner (String word){
        int length = word.length();
        char[] letters = new char[length];

        for (int i = 0; i < length; i++){
            letters[i]=word.charAt(i);
        }

        char[] alphabet = new char[26];
        String abc = "abcdefghijklmnopqrstuvwxyz";

        for (int i = 0; i < 26; i++){
            alphabet[i]=abc.charAt(i);
        }

        int[] numbers = new int[length];
        int place = 0;
        for (int i = 0; i < length; i++){
            for (int j = 0; j < 26; j++){
                if (alphabet[j]==letters[i]){
                    numbers[place]=j+1;
                    place++;

                }
            }
        }

        int hornered = numbers[0] * 32;

        for (int i = 1; i < numbers.length; i++){

            hornered += numbers[i];
            if (i == numbers.length -1){
                return hornered;
            }
            hornered = hornered % 997;
            hornered *= 32;
        }
        return hornered;
    }

    public static String[] makeReserved (){
        reserved[0] = "abstract";
        reserved[1] = "assert";
        reserved[2] = "boolean";
        reserved[3] = "break";
        reserved[4] = "byte";
        reserved[5] = "case";
        reserved[6] = "catch";
        reserved[7] = "char";
        reserved[8] = "class";
        reserved[9] = "const";
        reserved[10] = "continue";
        reserved[11] = "default";
        reserved[12] = "do";
        reserved[13] = "double";
        reserved[14] = "else";
        reserved[15] = "enum";
        reserved[16] = "extends";
        reserved[17] = "false";
        reserved[18] = "final";
        reserved[19] = "finally";
        reserved[20] = "float";
        reserved[21] = "for";
        reserved[22] = "goto";
        reserved[23] = "if";
        reserved[24] = "implements";
        reserved[25] = "import";
        reserved[26] = "instanceof";
        reserved[27] = "int";
        reserved[28] = "interface";
        reserved[29] = "long";
        reserved[30] = "native";
        reserved[31] = "new";
        reserved[32] = "null";
        reserved[33] = "package";
        reserved[34] = "private";
        reserved[35] = "protected";
        reserved[36] = "public";
        reserved[37] = "return";
        reserved[38] = "short";
        reserved[39] = "static";
        reserved[40] = "strictfp";
        reserved[41] = "super";
        reserved[42] = "switch";
        reserved[43] = "synchronize";
        reserved[44] = "this";
        reserved[45] = "throw";
        reserved[46] = "throws";
        reserved[47] = "trasient";
        reserved[48] = "true";
        reserved[49] = "try";
        reserved[50] = "void";
        reserved[51] = "volatile";
        reserved[52] = "while";
        reserved[53] = "=";
        reserved[54] = "==";
        reserved[55] = "!=";
        reserved[56] = "+";
        reserved[57] = "-";
        reserved[58] = "*";
        reserved[59] = "/";
        reserved[60] = "{";
        reserved[61] = "}";

        return reserved;
    }
}
Mark
  • 28,783
  • 8
  • 63
  • 92
Matt
  • 1,353
  • 3
  • 11
  • 9

8 Answers8

233

String.split takes a regex, and '.' has a special meaning for regexes.

You (probably) want something like:

String[] words = line.split("\\.");

Some folks seem to be having trouble getting this to work, so here is some runnable code you can use to verify correct behaviour.

import java.util.Arrays;

public class TestSplit {
  public static void main(String[] args) {
    String line = "aa.bb.cc.dd";
    String[] words = line.split("\\.");
    System.out.println(Arrays.toString(words));
    // Output is "[aa, bb, cc, dd]"
  }
}
clstrfsck
  • 14,715
  • 4
  • 44
  • 59
  • 6
    this did not work for me...line.split(Pattern.quote(".")); did – AutoMEta Jun 12 '14 at 11:15
  • 2
    @AutoMEta you must have mistyped something, or misinterpreted the results that you are getting. The regex `"\\."` is correct for matching a full-stop. The `Pattern.quote(".")` answer from @prunge to this question is also a good general solution for any splits that may include unwanted regex meta characters. – clstrfsck Jun 13 '14 at 00:28
  • @AutoMeta You are right. This method no longer works and always gives empty array. Your method worked for me. – Ishaan Jun 18 '16 at 10:12
57

When splitting with a string literal delimiter, the safest way is to use the Pattern.quote() method:

String[] words = line.split(Pattern.quote("."));

As described by other answers, splitting with "\\." is correct, but quote() will do this escaping for you.

prunge
  • 22,460
  • 3
  • 73
  • 80
  • 1
    Splitting with "\\." does not work any more. Your's and @AutoMeta's answer is correct. It should be the preferred way. – Ishaan Jun 18 '16 at 10:13
  • 5
    Splitting with `"\\."` still works for me with the current version. – Andrew Nov 09 '16 at 23:25
8

The argument to split is a regular expression. The period is a regular expression metacharacter that matches anything, thus every character in line is considered to be a split character, and is thrown away, and all of the empty strings between them are thrown away (because they're empty strings). The result is that you have nothing left.

If you escape the period (by adding an escaped backslash before it), then you can match literal periods. (line.split("\\."))

Ken Bloom
  • 57,498
  • 14
  • 111
  • 168
4

Have you tried escaping the dot? like this:

String[] words = line.split("\\.");

MexicanHacker
  • 2,685
  • 3
  • 19
  • 20
3

This is definitely not the best way to do this but, I got it done by doing something like following.

String imageName = "my_image.png";
String replace = imageName.replace('.','~');
String[] split = replace.split("~");

System.out.println("Image name : " + split[0]);
System.out.println("Image extension : " + split[1]);

Output,

Image name : my_image
Image extension : png
CLOUGH
  • 691
  • 11
  • 16
  • 1
    You know that this isn't a good way and this question already has some proper ways, so what is the point in sharing this "unclean" one? (Just asking) – Tom Aug 03 '16 at 21:18
  • 1
    I never knew why split is not working for dot. So, I tried different approach. I thought what I did was cool, after seeing this answers. Because I got it done without knowing the issue. I add this answer only to show that we can do somethings with what we have, even without the understanding about the whole concept. That's all. – CLOUGH Aug 03 '16 at 21:48
2

The argument to split is a regular expression. "." matches anything so your delimiter to split on is anything.

Rob Goodwin
  • 2,676
  • 2
  • 27
  • 47
1

If performance is an issue, you should consider using StringTokenizer instead of split. StringTokenizer is much much faster than split, even though it is a "legacy" class (but not deprecated).

Lars Andren
  • 8,601
  • 7
  • 41
  • 56
0

You might be interested in the StringTokenizer class. However, the java docs advise that you use the .split method as StringTokenizer is a legacy class.

Matt Mills
  • 8,692
  • 6
  • 40
  • 64
Nitrodist
  • 1,585
  • 5
  • 24
  • 34