0

I am trying to read a .txt file that has tree structure with tabs and i want to convert it in a .csv.

Category
  Subcategory
     Subcategory1
        Subcategory11
            Item1
            Item2     
        Subcategory12
            Item1
        Subcategory13
            Item1
                Item11

I want to create a .csv file with the structure

Category, Subcategory,Subcategory1, Subcategory11,Item1
Category, Subcategory,Subcategory1, Subcategory11,Item2 
Category, Subcategory,Subcategory1, Subcategory12,Item1
Category, Subcategory,Subcategory1, Subcategory13,Item1,Item11

What i have done so far is that

public static void main(String[] args) throws IOException {
    Scanner keywords = new Scanner(new File("keywords.txt"));

     ArrayList<ArrayList<String>> keywordsList = new ArrayList<ArrayList<String>>();
     ArrayList<String> newline = new ArrayList<String>();
        while(keywords.hasNext()){
            String line = keywords.nextLine();
            String[] tokens = line.split("\t");
            for(int i=0; i<tokens.length; i++){

                    if(tokens[i] != null && !tokens[i].isEmpty()){
                        newline.add(tokens[i]);
                    }
            }

            keywordsList.add(newline);

        }

}
flower
  • 121
  • 2
  • 14
  • I have created a basic TreeNode with the above data, it works if you expand more tree node or add further child to the parents, Plus number of indentation wont matters if the next line text is intended it will automatically treats it like a child node. Hope that helps. – Amit Kumar Lal May 12 '19 at 13:07

3 Answers3

1

This should work (warning: it could fail with unexpected input, i.e. a line with 2 tabs more than the previous):

    Scanner keywords = new Scanner(new File("keywords.txt"));

    ArrayList<String> stack = new ArrayList<String>();
    ArrayList<String> csvLines = new ArrayList<String>();

    // stores the number of elements of the last line processed
    int lastSize = -1;

    while (keywords.hasNext()) {
        String line = keywords.nextLine();

        int tabs = 0;
        // Count tabs of current line
        while (line.length() > tabs // to avoid IndexOutOfBoundsException in charAt()
                && line.charAt(tabs) == '\t') {
            tabs++;
        }

        line = line.substring(tabs); // delete the starting tabs

        if (tabs <= lastSize) {
            // if the current line has the same number of elements than the previous line, 
            // then we can save the previous processed line as CSV 
            String csvLine = "";
            for (String element : stack) {
                if (csvLine.length() > 0) {
                    csvLine += ", ";
                }
                csvLine += element;
            }
            csvLines.add(csvLine);
        }

        // if the current line has less tabs than the previous, then cut the stack 
        for (int i = stack.size() - 1; i >= tabs; i--) {
            stack.remove(i);
        }

        // if the current line has more tabs than the previous, then add the new element to the stack
        if (tabs >= stack.size()) {
            stack.add(line);
        }

        // save the number of tabs of the current line
        lastSize = tabs;
    }
    keywords.close();

    // we have to save the last line processed
    if (lastSize >= 0) {
        // save line
        String csvLine = "";
        for (String element : stack) {
            if (csvLine.length() > 0) {
                csvLine += ", ";
            }
            csvLine += element;
        }
        csvLines.add(csvLine);
    }

    // print out CSV
    for (String string : csvLines) {
        System.out.println(string);
    }
sador
  • 34
  • 4
1

I have created a very basic Tree-node structure based on the space/indentation of word in the file on each line, Below is the code (Hope the comment and variable name are self explanatory). P.S I have used Files.readAllLines to read the whole content into a single List.

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;

public class Sample {

    public static void main(String[] args) throws IOException {
        File file = new File("C:\\Users\\Untitled.txt");
        List<String> lines = Files.readAllLines(file.toPath(), StandardCharsets.UTF_8);

        Node root = new Node(lines.get(0));
        root.parent = null; 
        Node currentNode = root;
        for(int i=1; i<lines.size(); i++) {
            int cCount = lines.get(i).length()-lines.get(i).trim().length();
            int pCount = lines.get(i-1).length()-lines.get(i-1).trim().length();
            if(cCount > pCount) { //if spaces are more than previous add child node
                Node node = new Node(lines.get(i).trim());
                node.parent = currentNode;
                currentNode.childrens.add(node);
                currentNode = node;
            }
            else if(cCount == pCount) {//if spaces are same add node on same level
                Node node = new Node(lines.get(i).trim());
                currentNode.parent.childrens.add(node);
                node.parent=currentNode.parent;
            }
            else if(cCount < pCount) {//if spaces are less then add node to parent of parent
                Node node = new Node(lines.get(i).trim());
                currentNode.parent.parent.childrens.add(node);
                node.parent= currentNode.parent.parent;
                currentNode = node;
            }
        }
        String result = root.name;
        createResultString(root, result);
    }

    private static void createResultString(Node root, String result) {
        for(int i=0; i<root.childrens.size(); i++) {
            Node node = root.childrens.get(i);
            String newResult = result+" , "+ node.name;
            if(!node.childrens.isEmpty()) { //recursive search for children node name
                createResultString(node, newResult);
            }else {
                System.out.println(newResult); //**This is your csv data**
            }
        }
    }

    //Sample TreeNode to hold structure
    static class Node{
        Node(String word){
            this.name = word;
        }
        String name;
        List<Node> childrens = new ArrayList<Sample.Node>();
        Node parent;        
    }
}

the Output will be

Category , Subcategory , Subcategory1 , Subcategory11 , Item1
Category , Subcategory , Subcategory1 , Subcategory11 , Item2
Category , Subcategory , Subcategory1 , Subcategory12 , Item1
Category , Subcategory , Subcategory1 , Subcategory13 , Item1 , Item11
Amit Kumar Lal
  • 5,537
  • 3
  • 19
  • 37
0

I know this doesn't answer your question directly, but you are parsing a document and Finite State Machines are a great place to start if you're parsing a document.

hooknc
  • 4,854
  • 5
  • 31
  • 60