0

I am trying to merge several .html files into one .html file using Jsoup. My thought was to get the list of .html files in the dir and store the names in an ArrayList. I would then loop through the ArrayList, passing each filename as a string to the Jsoup.parse() method.

I am able to populate the ArrayList without a problem and my code worked for one file at a time but when I added to the for loops below, the NEW_INFORMATION.html file is created but nothing populates. Any ideas as to what I'm missing?

Here is the current code:

public class mergeFiles {

    public static void main(String[] args) throws IOException {

        File outputFile = new File ("C:\\Users\\1234\\Desktop\\PowerShellOutput\\NEW_INFORMATION.html");
        File dir = new File ("C:\\Users\\1234\\Desktop\\PowerShellOutput\\");
        File [] paths;
        //Only capture files with extension .html
        FilenameFilter fileNameFilter = new FilenameFilter(){
            public boolean accept(File dir, String name) {
                // TODO Auto-generated method stub
                if (name.lastIndexOf('.') > 0) {
                    int lastIndex = name.lastIndexOf('.');
                    String extension = name.substring(lastIndex);
                    if(extension.equals(".html")){
                        return true;
                    }
                }
                return false;
            }
        };      
        paths = dir.listFiles(fileNameFilter);
        List<String> list = new ArrayList<String>();
        for (File x : paths){
            list.add(x.toString());
        }
        System.out.print(list);
        for (String s : list){
            File input = new File(s);
            Document doc = Jsoup.parse(input, "UTF-8"); 
            Elements links = doc.select("table");
            @SuppressWarnings("resource")
            BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new       FileOutputStream(outputFile), "UTF-8"));
            bw.append("<h2>" + s.toString() + "<h2>");
            bw.append(links.toString());
        }
    }
}

I also tried this variant without converting the paths into strings (same result):

for (File x : paths){
        Document doc = Jsoup.parse(x, "UTF-8"); 
        Elements links = doc.select("table");
        @SuppressWarnings("resource")
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8"));
        bw.append("<h2>" + x.toString() + "<h2>");
        bw.append(links.toString());
    }

Complete answer for anyone in the future that may want something like this:

package htmlMerge;

import java.io.*;
import org.jsoup.*;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;

public class mergeFiles {

public static void main(String[] args) throws IOException {

    try {
        String outFileName = System.getProperty("user.home") + "/Desktop/<Insert The Directory/name.html>";
        File outputFile = new File(outFileName);
        String desktopDir = System.getProperty("user.home") + "/Desktop/<Insert Dir name>";
        File dir = new File(desktopDir);
        File[] paths;
        //create a file filter that will only worry about .html files if your folder contains other extensions
        FilenameFilter fileNameFilter = new FilenameFilter() {
            public boolean accept(File dir, String name) {
                if (name.lastIndexOf('.') > 0) {
                    int lastIndex = name.lastIndexOf('.');
                    String extension = name.substring(lastIndex);
                    if (extension.equals(".html")) {
                        return true;
                    }
                }
                return false;
            }
        };
        paths = dir.listFiles(fileNameFilter);
        //use BufferedWriterd to create the initial .html file with a header
        BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
                new FileOutputStream(outputFile), "UTF-8"));
        bw.write("<h1>REPORT DATA</h1>");
        bw.close();
        /*Use file writer to append the .html file with additional .html files
        In this case, the .html files all contain One 'table', so this
        will append the tables to 'outputFile'.*/
        try {
            String file = outputFile.getAbsolutePath();
            FileWriter fw = new FileWriter(file, true);
            for (File x : paths) {
                Document doc = Jsoup.parse(x, "UTF-8");
                Elements links = doc.select("table");
                //adds the filename of the .html as a Level 2 heading
                fw.write("<h2>" + x.toString() + "</h2>");
                fw.write(links.toString());
            }
            fw.close();
        }catch (IOException ioe) {
            System.err.println(ioe.getMessage());
        } finally {
            bw.close();
        }
    } catch (IOException ioe) {
        System.out.println(ioe.getMessage());
    }
    System.out.println("\nMerge Completed Successfully");
  }
}
Chris
  • 934
  • 1
  • 17
  • 38

1 Answers1

2

You have to close the BufferedWriter in order to see the changes.

Alkis Kalogeris
  • 17,044
  • 15
  • 59
  • 113