You should use a Map to hold the count of occurrences, like so:
public static void main(String[] args) throws IOException {
Pattern splitter = Pattern.compile("\\s+");
try(Stream<String> stream = Files.lines(Paths.get("input.txt"))) {
Map<String,Long> result = stream.flatMap(splitter::splitAsStream)
.collect(Collectors.groupingBy(Function.identity(),
Collectors.counting()));
System.out.println(result);
}}
Or load the data and parse it in multiple stages:
public static void main(String[] args) throws IOException {
// 1. load the data array
String[][] data;
try(Stream<String> stream = Files.lines(Paths.get("numbers.txt"))) {
data = stream.map(line -> line.split("\\s+")).toArray(String[][]::new);
}
System.out.format("Total lines = %d%n", data.length);
// 2. count the occurrences of each word
Map<String,Long> countDistinct = Arrays.stream(data).flatMap(Arrays::stream)
.collect(Collectors.groupingBy(Function.identity(),
Collectors.counting()));
System.out.println("Count of 04 = " + countDistinct.getOrDefault("04", 0L));
// 3. calculate correlations
Map<String,Map<String,Long>> correlations;
correlations = Arrays.stream(data).flatMap((String[] row) -> {
Set<String> words = new HashSet<>(Arrays.asList(row));
return words.stream().map(word -> new AbstractMap.SimpleEntry<>(word, words));
}).collect(Collectors.toMap(kv -> kv.getKey(),
kv -> kv.getValue().stream()
.collect(Collectors.toMap(Function.identity(), v -> 1L)),
(map1, map2) -> {
map2.entrySet().forEach(kv -> map1.merge(kv.getKey(), kv.getValue(), Long::sum));
return map1;
}));
System.out.format("Lines with 04 = %d%n",
correlations.getOrDefault("04", Collections.EMPTY_MAP).getOrDefault("04", 0L));
System.out.format("Lines with both 04 and 07 = %d%n",
correlations.getOrDefault("04", Collections.EMPTY_MAP).getOrDefault("07", 0L));
}
EDIT:
Here is a (perhaps) easier to read version that doesn't use a Stream/functional approach:
public static void main(String[] args) throws IOException {
long lineCount = 0;
Map<String,Long> wordCount = new HashMap<>();
Map<String,Map<String,Long>> correlations = new HashMap<>();
try(Stream<String> stream = Files.lines(Paths.get("numbers.txt"))) {
Iterable<String> lines = stream::iterator;
Set<String> lineWords = new HashSet<>();
for(String line : lines) {
lineCount++;
for(String word : line.split("\\s+")) {
lineWords.add(word);
wordCount.merge(word, 1L, Long::sum);
}
for(String wordA : lineWords) {
Map<String,Long> relate = correlations.computeIfAbsent(wordA,
key -> new HashMap<>());
for(String wordB : lineWords) {
relate.merge(wordB, 1L, Long::sum);
}
}
}
}
System.out.format("Total lines = %d%n", lineCount);
System.out.println("Count of 04 = " + wordCount.getOrDefault("04", 0L));
System.out.format("Lines with 04 = %d%n",
correlations.getOrDefault("04", Collections.EMPTY_MAP).getOrDefault("04", 0L));
System.out.format("Lines with both 04 and 07 = %d%n",
correlations.getOrDefault("04", Collections.EMPTY_MAP).getOrDefault("07", 0L));
}
Output:
Total lines = 4
Count of 04 = 4
Lines with 04 = 4
Lines with both 04 and 07 = 3