4

I'm trying to detect first level "if" conditions in a piece of text. Example text:

if (a == 5) {
    method1();
    method2()
}
if (a == 6) {
    method1();
    if (a < 2) {
        method3();
    }
}
if (a >= 8 && a <= 13) {
    method5(a);
    int[] b = new int[a];
    for(int i = 0; i < a; i++) {
        if (i == 0) {
            b[i] = i * 4;
            continue;
        }
        b[i] = i * 2;
    }
    method4(b);
}
if (a > 16) {
    method6();
}

This is what I got so far:

public class HelloWorld
{
  public static void main(String[] args)
  {
    String text = "if (a == 5) {\n\tmethod1();\n\tmethod2()\n}\nif (a == 6) {\n\tmethod1();\n\tif (a < 2) {\n\t\tmethod3();\n\t}\n}\nif (a >= 8 && a <= 13) {\n\tmethod5(a);\n\tint[] b = new int[a];\n\tfor(int i = 0; i < a; i++) {\n\t\tif (i == 0) {\n\t\t\tb[i] = i * 4;\n\t\t\tcontinue;\n\t\t}\n\t\tb[i] = i * 2;\n\t}\n\tmethod4(b);\n}\nif (a > 16) {\n\tmethod6();\n}";
    for(String line : text.split("if (.*) \\{")) {
      System.out.println("Line: " + line);
    }
  }
}

Output:

Line: 
Line: 
    method1();
    method2()
}

Line: 
    method1();

Line: 
        method3();
    }
}

Line: 
    method5(a);
    int[] b = new int[a];
    for(int i = 0; i < a; i++) {

Line: 
            b[i] = i * 4;
            continue;
        }
        b[i] = i * 2;
    }
    method4(b);
}

Line: 
    method6();
}

It also prints nested ifs. I only want the first level ones. And the if will disappear when printing the line. I want the if to show too.

I basically want to group all first level ifs into one string. Can some one help me with this?

Displee
  • 670
  • 8
  • 20

1 Answers1

1

Since that you have to deal with nested brackets, the appropriate regex will be hard to maintain as described here in SO How to match string within parentheses (nested) in Java?

My solution is:

  1. Do some preprocessing to replace the nested brackets
  2. Capture the if content using regex
  3. Finally, postprocessing to hand over the real brackets
package demo;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Parser {

    private static final char OPENED_BRACKET = '{';
    private static final char CLOSED_BRACKET = '}';
    private static final String OPENED_BRACKET_REPLACOR = "##OPENED_BRACKET_REPLACOR##";
    private static final String CLOSED_BRACKET_REPLACOR = "##CLOSED_BRACKET_REPLACOR##";

    private static final String REGEX    = "\\{((.|\\n|\r|\t)*?)\\}";
    private static final Pattern PATTERN = Pattern.compile(REGEX);

    public String preprocessing(String origin) {
        StringBuilder replaced = new StringBuilder();
        int opened = 0;
        for(int index = 0 ; index < origin.length() ; index++) {
            char current_char = origin.charAt(index);
            String processed  = Character.toString(current_char);

            if(current_char == OPENED_BRACKET) {
                if(opened++ > 0) {
                    processed = OPENED_BRACKET_REPLACOR; 
                }
            }

            else if(current_char == CLOSED_BRACKET) {
                if(--opened > 0) {
                    processed = CLOSED_BRACKET_REPLACOR; 
                }
            }

            replaced.append(processed);
        }
        return replaced.toString();
    }

    public List<String> extract(String source) {
        final Matcher matcher = PATTERN.matcher(source);
        List<String> list = new ArrayList<>();
        while(matcher.find()) {
            list.add(matcher.group(1));
        }
        return list;
    }

    public List<String> postprocessing(List<String> source) {
        List<String> result = new ArrayList<>();
        for(String src: source) {
            result.add(src.replaceAll(OPENED_BRACKET_REPLACOR, Character.toString(OPENED_BRACKET))
                          .replaceAll(CLOSED_BRACKET_REPLACOR, Character.toString(CLOSED_BRACKET)));
        }
        return result;
    }

    public static void main(String[] args) {
        Parser parser = new Parser();
        String code = "if (a == 6) { method1(); if (a < 2) { method3(); } }if (a == 5) { method1();\n\r" +
                      " method2() }";
        String preprocessed = parser.preprocessing(code);
        List<String> extracted = parser.extract(preprocessed);
        List<String> postprocessed = parser.postprocessing(extracted);
        for(String ifContent: postprocessed) {
            System.out.println("Line: " + ifContent);
        }
    }
}

Will ouptput:

Line: method1(); if (a < 2) { method3(); }
Line: method1();

method2()

Halayem Anis
  • 7,654
  • 2
  • 25
  • 45
  • Nice, a pure Java solution. I like this. There's only one thing left. How can I get the if condition for each if content? – Displee Jun 21 '18 at 14:00
  • @Displee you didn't asked for :) but you can play with regex in my solution: something like `private static final String REGEX = "if\((.*?)\) \\{((.|\\n|\r|\t)*?)\\}";` (not tested) and you will get the condiftion in matcher.group(1) and the if body in `matcher.group(2)` don't forget to upvote or mark as resolved to help other people.... – Halayem Anis Jun 21 '18 at 14:10
  • Well, at the bottom of my question it clearly states "And the if will disappear when printing the line. I want the if to show too.". But I guess you misread that. Anyways, I will try this when I'm home. Thanks a lot! – Displee Jun 21 '18 at 14:14
  • I got it working. Ended up with: `if (.*) \{((.| | | )*?)}`. Thanks again! – Displee Jun 21 '18 at 15:58