0

In my code, I'm creating a Stream consisting of the lines of a file. I then use limit(1) on the stream to process only one of its elements. But actually, all of the elements get processed, as shown by the println() statements in the identity() method.

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Path;
import java.util.stream.Stream;

public class Main {

    public static void main(String[] args) {
        preprocessDataset();
    }

    private static int processedFiles = 0;

    public static void preprocessDataset() {
        Stream<Path> nqPaths = getProcessablePaths();
        Stream<String> lines = nqPaths.flatMap(Main::toLines).limit(1);
        writeLines(lines);
    }

    private static Stream<Path> getProcessablePaths() {
        return Stream.of(new File("/Users/sz/train/nq-train-00.jsonl").toPath());
    }

    private static Stream<String> toLines(Path path) {
        try {

            FileReader reader = new FileReader(path.toAbsolutePath().toString());
            BufferedReader bufferedReader = new BufferedReader(reader);
            return bufferedReader.lines().map(Main::identity);

        } catch (IOException e) {

            e.printStackTrace();
            return Stream.empty();

        }
    }

    private static String identity(String line) {
        System.out.println("processing line " + line);
        return line;
    }

    public static void writeLines(Stream<String> lines) {
        try {

            String filePath = "/Users/sz/test/data.json";
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(filePath));

            lines.forEach(line -> {
                try {
                    bufferedWriter.write(line);
                    bufferedWriter.newLine();
                } catch (IOException e) {
                    e.printStackTrace();
                }
                System.out.println(String.format("wrote %d lines.", ++processedFiles));
            });

            bufferedWriter.close();

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

The output:

processing line
wrote 1 lines.
processing line
processing line
processing line
processing line
processing line
processing line
processing line
processing line
processing line
processing line
processing line
processing line
processing line
processing line
processing line
processing line

If I remove the limit(1) call, it works as expected, with one line being processed for each written line:

processing line
wrote 1 lines.
processing line
wrote 2 lines.
processing line
wrote 3 lines.
processing line
wrote 4 lines.
processing line
wrote 5 lines.
processing line
wrote 6 lines.
processing line

Why is this happening? Why is Java executing the identity() method for every line in the file even when I set limit(1)?

Sahand
  • 7,980
  • 23
  • 69
  • 137

0 Answers0