0

My question is this: how can reading a huge (many millions of lines) file keep a thread alive, even after it finished doing the file stuff

The problem is that I have a Thread started from the javafx application thread, which then (the new thread) performs some read/write actions on a text file, won't exit naturally when faced with HUGE files to parse through, 17-million lines large, specifically.

I'm assuming this is due to the thread holding on to some resource that I'm missing, however, since I'm using the try-with-resource model, I'm not sure how that's even possible really.

Here's the javafx controller class (using fxml) that sparks off the thread:

MainController.java:

    /**
     * This method handles what happens when the calculate button is clicked.
     * The main thing this does is disable/enable a few Nodes, as well as sparks
     * off the background thread.
     *
     * @param event
     */
    @FXML
    private void convert_button_action(ActionEvent event) {
        closing_label.setVisible(true);
        convert_button.setDisable(true);
        input_text = input_NCLocation_field.getText();
        output_text = output_Location_Field.getText();
        indicator_node.setVisible(true);

        if (!toggleSwitch.isSelected()) {
            (new Thread(new FileWriter(input_text, output_text, indicator_node))).start();
        } else {
            DateWriter temp = new DateWriter(input_text, output_text, indicator_node, yr_mn_dy.isSelected());
            (new Thread(temp)).start();
        }

    }

nothing too fancy in there, simply some making things visible/not visible and the starting of the appropriate thread based on the input of the user. next is the entire Thread class since it's not too huge. All it does really is either turn a line that looks like: yearmonthday into year,month,day or it separates the year month and day columns into separate files if the user clicked the check box that asked for it. Simply a handy tool for the use case.

Please note the println statement at the end of the run() method. I see this println every single time, but after it happens, nothing happens. The program doesn't exit, the thread doesn't stop, nothing.

package File_Conversion;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import javafx.application.Platform;
import javafx.scene.control.ProgressIndicator;

/**
 * This class is the background 'worker' thread that does all of the heavy duty
 * file IO for splitting up the NC file. It periodically sends reports back to
 * the main application thread to update the progress indicator.
 *
 * @author William
 */
public class DateWriter implements Runnable {

    private final ProgressIndicator myIndicator;
    private static File ncFile;
    private final String outputLocationFile;
    private float zmax, zmin, xmax, xmin, ymax, ymin;
    private ArrayList<Float> xList, yList, zList;
    private final DecimalFormat numberFormat = new DecimalFormat("#.000000");
    private final DecimalFormat numberFormatMinMax = new DecimalFormat("#.00000");
    private final boolean yr_mon_day;

    /**
     * This is the main constructor, it needs a valid NC file to continue.
     *
     * @param inputNCFile
     * @param outputLocation
     * @param myIndicator
     * @param yr_mon_day
     */
    public DateWriter(String inputNCFile, String outputLocation, ProgressIndicator myIndicator, boolean yr_mon_day) {
        this.yr_mon_day = yr_mon_day;
        this.myIndicator = myIndicator;
        ncFile = new File(inputNCFile);
        outputLocationFile = outputLocation;

    }

    /**
     * The primary run() method, starts the thread.
     */
    @Override
    public void run() {
        convertDate();
        Platform.runLater(new Runnable() {

            @Override
            public void run() {
                File_Conversion.stage_returner().close();
            }

        });
        System.out.println("I'm at the end of the run...??");
    }

    public boolean convertDate() {

        BufferedReader br = null;
        java.io.FileWriter yearWriter = null, MonthWriter = null, DayWriter = null
                            ,fWriter = null;
        BufferedWriter yearBuf = null, monthBuf = null, dayBuf = null, writer = null;


        try {
                br = new BufferedReader(new FileReader(ncFile));
                if (yr_mon_day) {
                yearWriter = new java.io.FileWriter(outputLocationFile + "\\" + ncFile.getName().substring(0, ncFile.getName().lastIndexOf(".")) + "_modified_year.csv", false);
                yearBuf = new BufferedWriter(yearWriter);
                MonthWriter = new java.io.FileWriter(outputLocationFile + "\\" + ncFile.getName().substring(0, ncFile.getName().lastIndexOf(".")) + "_modified_month.csv", false);
                monthBuf = new BufferedWriter(MonthWriter);
                DayWriter = new java.io.FileWriter(outputLocationFile + "\\" + ncFile.getName().substring(0, ncFile.getName().lastIndexOf(".")) + "_modified_day.csv", false);

                dayBuf = new BufferedWriter(DayWriter);
                String input;
                String temp;
                String temp2;
                String temp3;
                while ((input = br.readLine()) != null) {
                    temp = input.substring(0, 4);

                    temp2 = input.substring(4, 6);

                    temp3 = input.substring(6);
                    Platform.runLater(new Runnable() {
                        @Override
                        public void run() {
                            myIndicator.setProgress(-1);
                        }
                    });
                    yearBuf.write(temp + "\n");
                    monthBuf.write(temp2 + "\n");
                    dayBuf.write(temp3 + "\n");
                }

            } else {
                fWriter = new java.io.FileWriter(outputLocationFile + "\\" + ncFile.getName() + "_modified.csv", false);
                writer = new BufferedWriter(fWriter);
                String input;
                String temp;
                while ((input = br.readLine()) != null) {
                    temp = input.substring(0, 4) + "," + input.substring(4, 6) + "," + input.substring(6);
                    Platform.runLater(new Runnable() {
                        @Override
                        public void run() {
                            myIndicator.setProgress(-1);
                        }
                    });
                    writer.write(temp + "\n");
                }

            }
        } catch (IOException e) {
            e.printStackTrace(System.out);
        }finally{
            try{
                if (br!=null) br.close();
                if (yearBuf !=null) yearBuf.close();
                if (monthBuf != null)monthBuf.close();
                if (dayBuf != null)dayBuf.close();
                if (yearWriter != null)yearWriter.close();
                if (MonthWriter != null)MonthWriter.close();
                if (DayWriter != null)DayWriter.close();
                if (fWriter != null) fWriter.close();
                if (writer != null) writer.close();

            }catch(IOException e){
                e.printStackTrace(System.out);
            }
        }

        return true;
    }

}

again, nothing fancy, some buffered streams and writers, and that's it! It's worth noting that this works perfectly for files that are small/not gigantic. It was only when faced with a multi-million line file am I seeing this behaviour.

Any help you can give would be much appreciated, thanks!

Edit 1

Just to help clarify, the reason part of the if/else is in try-with-resource madness, and the other is in the more traditional fashion, is simply to exemplify the fact that it has been tried both ways, the identical symptoms come out the thread running through either of the logical blocks, so I'm fairly certain the way I'm closing the resources has nothing to do with it.

WillBD
  • 1,919
  • 1
  • 18
  • 26

2 Answers2

1

Edit: Not that I can quickly read the code. Try this. I missed something earlier. join() simply waits for it to finish work. We need to call stop() or an equivalent later. stop() is deprecated, which is why I still recommend a thread pool. Executors.newCachedThreadPool() should do the trick.

Thread t=new Thread();
t.join();
t.stop();

Old solution (maybe useful): The easiest way to ensure your thread dies is to use Executors and ExecutorService

ExecutorService executorService = Executors.newFixedThreadPool(10);

executorService.execute(myRunnable);//execute right now
executorService.submit(myRunnable);//execute when <10 threads active
Future<MyType> future = executorService.submit(myCallable);//Runnable and Callable are efficient
MyType result = future.get();

executorService.submit(myThread);//more expensive to create threads and you are using a thread pool anyways

executorService.shutdown();//don't forget to do this when you are done executing or the program may hang

Use Runnable to simple execute work in a thread. Use Callable when you need a result back.

Another way is to call myThread.join();

Third method: SwingUtils.invokeLater(myRunnable);//easiest yet

Edit: solution to clean up try-catch: Java try/catch/finally best practices while acquiring/closing resources .. overkill solution, but simple Java io ugly try-finally block

Community
  • 1
  • 1
ldmtwo
  • 419
  • 5
  • 14
  • I tried it with Thread.join, no affect. After the 'join' from the main application thread was finished, the program stalled just the same. I'll try the `ExecutorService`, although it seems like a pretty overhead-heavy way to hande it. – WillBD Jun 16 '14 at 23:17
  • Your program may have something keeping the file open. First thing I recommend is to remove the 8 layers of TRY and make it a simple try{...}catch(Exception ex){ex.printStacktrace();} This will handle all of those. You are just going to give yourself a headache later. – ldmtwo Jun 16 '14 at 23:27
  • It originally was a simple try/catch/finally, with all of the stream closing in the finally, identical symptoms and outcomes. Although I do agree something in the reading of the file is holding it open, it ONLY happens on huge files, regular sized files (< 1 million lines) exit just fine. – WillBD Jun 16 '14 at 23:32
  • The large use of nesting kind of makes it harder to spot the problem. You are not closing the BufferedWriter br. – ldmtwo Jun 16 '14 at 23:51
  • I mean BufferedReader br – ldmtwo Jun 16 '14 at 23:52
  • BufferedReader br is part of a try-with-resource convention http://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html which means it is indeed closed at the end of the try block, that being said, to enhance clarity for people unfamiliar with the convention, I will edit it into a more traditional structure. – WillBD Jun 16 '14 at 23:54
  • edited original and removed the try-with-resource idiom. – WillBD Jun 17 '14 at 00:06
  • You could try adding a debug statement to the finally block to make sure that it is actually hitting that part of the code and not hanging while reading the large files. – chooks Jun 17 '14 at 13:51
  • @chooks it's hitting the println at the end of the run() method which is after the completion of the file handling method, which means it must be able to hit the finally block, but a good suggestion! – WillBD Jun 17 '14 at 15:18
  • Updated again. I always use thread pools when a living main thread needs to be managed or monitored. This should work though: t.join(); t.stop(); – ldmtwo Jun 17 '14 at 15:50
  • actually turns out it has nothing to do with the thread itself, it has to do with java holding onto too much memory and not being able to flush the streams properly, as such, it wouldn't allow any 'stop' operations on the Thread. The solution was to 'flush' the streams every single iteration. Huge slowdown, but it makes sure the thread releases properly. answer forthcoming, but I do appreciate all of your effort, thanks! – WillBD Jun 17 '14 at 15:55
  • 1
    Also, +1, because I do very much appreciate the effort, I'd never seen the executor service method of going about things, and I'll certainly employ that in the future, so thanks! – WillBD Jun 17 '14 at 16:05
0

So this turned out to be a solution I had a hint of a thought about yesterday, but never acted on.

Essentially the root of the problem I was encountering (I think) was due to overrunning my write buffer, which was causing undefined behaviour.

Now, I don't know if this is because of a bad java implementation of the java bufferedwriter, or what is exactly going on, but the solution turned out to be relatively simple: flush the streams every single iterations now, I know what you're thinking, Gah! that's so often! the slowdown must be immense! yes indeed, the slowdown WAS immense, it made a 17-million line file parse go from taking around 14 seconds, to around 4 minutes.

It's possible I could slightly up the iterations on the flushes to improve performance, but with a flush every 10 iterations, it still broke.

I'm sure this is a result of how the internals of read/write operations and memory management are handled in java, and I don't have time to delve into it. If anyone wants to take the time and give a good explanation of this behaviour, I'll be more than happy to switch my accepted answer to their post, since it is more complete.

The code of the fixed (and working now) DateWriter class for future inspection:

package File_Conversion;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import javafx.application.Platform;
import javafx.scene.control.ProgressIndicator;

/**
 * This class is the background 'worker' thread that does all of the heavy duty
 * file IO for splitting up the NC file. It periodically sends reports back to
 * the main application thread to update the progress indicator.
 *
 * @author William
 */
public class DateWriter implements Runnable {

    private final ProgressIndicator myIndicator;
    private static File ncFile;
    private final String outputLocationFile;
    private float zmax, zmin, xmax, xmin, ymax, ymin;
    private ArrayList<Float> xList, yList, zList;
    private final DecimalFormat numberFormat = new DecimalFormat("#.000000");
    private final DecimalFormat numberFormatMinMax = new DecimalFormat("#.00000");
    private final boolean yr_mon_day;

    /**
     * This is the main constructor, it needs a valid NC file to continue.
     *
     * @param inputNCFile
     * @param outputLocation
     * @param myIndicator
     * @param yr_mon_day
     */
    public DateWriter(String inputNCFile, String outputLocation, ProgressIndicator myIndicator, boolean yr_mon_day) {
        this.yr_mon_day = yr_mon_day;
        this.myIndicator = myIndicator;
        ncFile = new File(inputNCFile);
        outputLocationFile = outputLocation;
    }

    /**
     * The primary run() method, starts the thread.
     */
    @Override
    public void run() {
        convertDate();
        Platform.runLater(new Runnable() {
            @Override
            public void run() {
                File_Conversion.stage_returner().close();
            }
        });
        System.out.println("At the end of the method.");

    }

    public boolean convertDate() {

        BufferedReader br = null;
        java.io.FileWriter yearWriter = null, monthWriter = null, dayWriter = null, fWriter = null;
        BufferedWriter yearBuf = null, monthBuf = null, dayBuf = null, writer = null;
        try {
            br = new BufferedReader(new FileReader(ncFile));
            if (yr_mon_day) {
                yearWriter = new java.io.FileWriter(outputLocationFile + "\\" + ncFile.getName().substring(0, ncFile.getName().lastIndexOf(".")) + "_modified_year.csv", false);
                yearBuf = new BufferedWriter(yearWriter);
                monthWriter = new java.io.FileWriter(outputLocationFile + "\\" + ncFile.getName().substring(0, ncFile.getName().lastIndexOf(".")) + "_modified_month.csv", false);
                monthBuf = new BufferedWriter(monthWriter);
                dayWriter = new java.io.FileWriter(outputLocationFile + "\\" + ncFile.getName().substring(0, ncFile.getName().lastIndexOf(".")) + "_modified_day.csv", false);

                dayBuf = new BufferedWriter(dayWriter);
                String input;
                String temp;
                String temp2;
                String temp3;
                while ((input = br.readLine()) != null) {
                    temp = input.substring(0, 4);

                    temp2 = input.substring(4, 6);

                    temp3 = input.substring(6);
                    Platform.runLater(new Runnable() {
                        @Override
                        public void run() {
                            myIndicator.setProgress(-1);
                        }
                    });
                    yearBuf.write(temp + "\n");
                    monthBuf.write(temp2 + "\n");
                    dayBuf.write(temp3 + "\n");
                    yearBuf.flush();
                    monthBuf.flush();
                    dayBuf.flush();
                    temp = null;
                    temp2 = null;
                    temp3 = null;

                }
            } else {
                fWriter = new java.io.FileWriter(outputLocationFile + "\\" + ncFile.getName() + "_modified.csv", false);
                writer = new BufferedWriter(fWriter);
                String input;
                String temp;
                while ((input = br.readLine()) != null) {
                    temp = input.substring(0, 4) + "," + input.substring(4, 6) + "," + input.substring(6);
                    Platform.runLater(new Runnable() {
                        @Override
                        public void run() {
                            myIndicator.setProgress(-1);
                        }
                    });
                    writer.write(temp + "\n");
                    writer.flush();

                }

            }
        } catch (IOException e) {
            e.printStackTrace(System.out);
        } finally {
            try {
                if (br != null) {
                    br.close();
                }
                if (yearBuf != null) {
                    yearBuf.close();
                }
                if (monthBuf != null) {
                    monthBuf.close();
                }
                if (dayBuf != null) {
                    dayBuf.close();
                }
                if (yearWriter != null) {
                    yearWriter.close();
                }
                if (monthWriter != null) {
                    monthWriter.close();
                }
                if (dayWriter != null) {
                    dayWriter.close();
                }
                if (fWriter != null) {
                    fWriter.close();
                }
                if (writer != null) {
                    writer.close();
                }

            } catch (IOException e) {
                e.printStackTrace(System.out);
            }
        }
        return true;
    }

}
WillBD
  • 1,919
  • 1
  • 18
  • 26