1

I have regexp, that should delete comments(multiline, single-line)

(\s*\/\/.*)|((\/\*)(.|\n)+?(\*\/))

When I checked it in Sublime, or on websites, it works ok for me. But when I used it in program, it didn't work properly. This is what I mean.

/**
 * Base class from which all RMI-IIOP stubs must inherit.
 */

public abstract class Stub extends ObjectImpl
implements java.io.Serializable {

// This can only be set at object construction time (no sync necessary).
private transient StubDelegate stubDelegate = null;

regexp only match single line comment, but in other text editors it works for all types.

Here is my code:

FileInputStream fis = new FileInputStream(filePath);
    BufferedReader br = new BufferedReader(new InputStreamReader(fis));
    String strLine;
    while ((strLine = br.readLine()) != null) {
        Matcher m = Pattern.compile(delCommentsPattern,Pattern.MULTILINE).matcher(strLine);
        while (m.find()) {
            String b = m.group();
            System.out.println(b);
  }
}

What am I doing wrong?

UPD: Adding delCommentsPattern :

 private String delCommentsPattern ="(\\s*\\/\\/.*)|((\\/\\*)(.|\\n)+?(\\*\\/))";

UPD2: I;ve done, what's RAVI recommended, but now i have an exception, when i use it on larger file:

    Exception in thread "JavaFX Application Thread" java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
    at javafx.fxml.FXMLLoader$MethodHandler.invoke(FXMLLoader.java:1774)
    at javafx.fxml.FXMLLoader$ControllerMethodEventHandler.handle(FXMLLoader.java:1657)
    at com.sun.javafx.event.CompositeEventHandler.dispatchBubblingEvent(CompositeEventHandler.java:86)
    at com.sun.javafx.event.EventHandlerManager.dispatchBubblingEvent(EventHandlerManager.java:238)
    at com.sun.javafx.event.EventHandlerManager.dispatchBubblingEvent(EventHandlerManager.java:191)
    at com.sun.javafx.event.CompositeEventDispatcher.dispatchBubblingEvent(CompositeEventDispatcher.java:59)
    at com.sun.javafx.event.BasicEventDispatcher.dispatchEvent(BasicEventDispatcher.java:58)
    at com.sun.javafx.event.EventDispatchChainImpl.dispatchEvent(EventDispatchChainImpl.java:114)
    at com.sun.javafx.event.BasicEventDispatcher.dispatchEvent(BasicEventDispatcher.java:56)
    at com.sun.javafx.event.EventDispatchChainImpl.dispatchEvent(EventDispatchChainImpl.java:114)
    at com.sun.javafx.event.BasicEventDispatcher.dispatchEvent(BasicEventDispatcher.java:56)
    at com.sun.javafx.event.EventDispatchChainImpl.dispatchEvent(EventDispatchChainImpl.java:114)
    at com.sun.javafx.event.EventUtil.fireEventImpl(EventUtil.java:74)
    at com.sun.javafx.event.EventUtil.fireEvent(EventUtil.java:49)
    at javafx.event.Event.fireEvent(Event.java:198)
    at javafx.scene.Node.fireEvent(Node.java:8411)
    at javafx.scene.control.Button.fire(Button.java:185)
    at com.sun.javafx.scene.control.behavior.ButtonBehavior.mouseReleased(ButtonBehavior.java:182)
    at com.sun.javafx.scene.control.skin.BehaviorSkinBase$1.handle(BehaviorSkinBase.java:96)
    at com.sun.javafx.scene.control.skin.BehaviorSkinBase$1.handle(BehaviorSkinBase.java:89)
    at com.sun.javafx.event.CompositeEventHandler$NormalEventHandlerRecord.handleBubblingEvent(CompositeEventHandler.java:218)
    at com.sun.javafx.event.CompositeEventHandler.dispatchBubblingEvent(CompositeEventHandler.java:80)
    at com.sun.javafx.event.EventHandlerManager.dispatchBubblingEvent(EventHandlerManager.java:238)
    at com.sun.javafx.event.EventHandlerManager.dispatchBubblingEvent(EventHandlerManager.java:191)
    at com.sun.javafx.event.CompositeEventDispatcher.dispatchBubblingEvent(CompositeEventDispatcher.java:59)
    at com.sun.javafx.event.BasicEventDispatcher.dispatchEvent(BasicEventDispatcher.java:58)
    at com.sun.javafx.event.EventDispatchChainImpl.dispatchEvent(EventDispatchChainImpl.java:114)
    at com.sun.javafx.event.BasicEventDispatcher.dispatchEvent(BasicEventDispatcher.java:56)
    at com.sun.javafx.event.EventDispatchChainImpl.dispatchEvent(EventDispatchChainImpl.java:114)
    at com.sun.javafx.event.BasicEventDispatcher.dispatchEvent(BasicEventDispatcher.java:56)
    at com.sun.javafx.event.EventDispatchChainImpl.dispatchEvent(EventDispatchChainImpl.java:114)
    at com.sun.javafx.event.EventUtil.fireEventImpl(EventUtil.java:74)
    at com.sun.javafx.event.EventUtil.fireEvent(EventUtil.java:54)
    at javafx.event.Event.fireEvent(Event.java:198)
    at javafx.scene.Scene$MouseHandler.process(Scene.java:3757)
    at javafx.scene.Scene$MouseHandler.access$1500(Scene.java:3485)
    at javafx.scene.Scene.impl_processMouseEvent(Scene.java:1762)
    at javafx.scene.Scene$ScenePeerListener.mouseEvent(Scene.java:2494)
    at com.sun.javafx.tk.quantum.GlassViewEventHandler$MouseEventNotification.run(GlassViewEventHandler.java:380)
    at com.sun.javafx.tk.quantum.GlassViewEventHandler$MouseEventNotification.run(GlassViewEventHandler.java:294)
    at java.security.AccessController.doPrivileged(Native Method)
    at com.sun.javafx.tk.quantum.GlassViewEventHandler.lambda$handleMouseEvent$354(GlassViewEventHandler.java:416)
    at com.sun.javafx.tk.quantum.QuantumToolkit.runWithoutRenderLock(QuantumToolkit.java:389)
    at com.sun.javafx.tk.quantum.GlassViewEventHandler.handleMouseEvent(GlassViewEventHandler.java:415)
    at com.sun.glass.ui.View.handleMouseEvent(View.java:555)
    at com.sun.glass.ui.View.notifyMouse(View.java:937)
    at com.sun.glass.ui.gtk.GtkApplication._runLoop(Native Method)
    at com.sun.glass.ui.gtk.GtkApplication.lambda$null$49(GtkApplication.java:139)
    at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.reflect.InvocationTargetException
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at sun.reflect.misc.Trampoline.invoke(MethodUtil.java:71)
    at sun.reflect.GeneratedMethodAccessor1.invoke(Unknown Source)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at sun.reflect.misc.MethodUtil.invoke(MethodUtil.java:275)
    at javafx.fxml.FXMLLoader$MethodHandler.invoke(FXMLLoader.java:1769)
    ... 48 more
Caused by: java.lang.StackOverflowError
    at java.util.regex.Pattern$LazyLoop.match(Pattern.java:4843)
    at java.util.regex.Pattern$GroupTail.match(Pattern.java:4717)
    at java.util.regex.Pattern$BranchConn.match(Pattern.java:4568)
    at java.util.regex.Pattern$CharProperty.match(Pattern.java:3777)
    at java.util.regex.Pattern$Branch.match(Pattern.java:4604)
    at java.util.regex.Pattern$GroupHead.match(Pattern.java:4658)
    at java.util.regex.Pattern$LazyLoop.match(Pattern.java:4847)
    at java.util.regex.Pattern$GroupTail.match(Pattern.java:4717)
    at java.util.regex.Pattern$BranchConn.match(Pattern.java:4568)
    at java.util.regex.Pattern$CharProperty.match(Pattern.java:3777)
    at java.util.regex.Pattern$Branch.match(Pattern.java:4604)
    at java.util.regex.Pattern$GroupHead.match(Pattern.java:4658)
    at java.util.regex.Pattern$LazyLoop.match(Pattern.java:4847)
    at java.util.regex.Pattern$GroupTail.match(Pattern.java:4717)
    at java.util.regex.Pattern$BranchConn.match(Pattern.java:4568)
    at java.util.regex.Pattern$CharProperty.match(Pattern.java:3777)
    at java.util.regex.Pattern$Branch.match(Pattern.java:4604)
    at java.util.regex.Pattern$GroupHead.match(Pattern.java:4658)
    at java.util.regex.Pattern$LazyLoop.match(Pattern.java:4847)
    at java.util.regex.Pattern$GroupTail.match(Pattern.java:4717)
    at java.util.regex.Pattern$BranchConn.match(Pattern.java:4568)
    at java.util.regex.Pattern$CharProperty.match(Pattern.java:3777)
    at java.util.regex.Pattern$Branch.match(Pattern.java:4604)
    at java.util.regex.Pattern$GroupHead.match(Pattern.java:4658)
    at java.util.regex.Pattern$LazyLoop.match(Pattern.java:4847)
    at java.util.regex.Pattern$GroupTail.match(Pattern.java:4717)
    at java.util.regex.Pattern$BranchConn.match(Pattern.java:4568)
    at java.util.regex.Pattern$CharProperty.match(Pattern.java:3777)
villat
  • 65
  • 1
  • 8
  • 1
    Hello, and welcome!!! Can you please show the line where you set `delCommentsPattern`? You may [edit] your question to do so. – Mariano Jul 31 '16 at 17:21

4 Answers4

4

Since you read the file line-by-line, your regular expression cannot match text that spans multiple lines. You need to read the whole file into a single string first, and then do the while loop.

But beware that your regular expression has problems with the following code, which to it looks like it starts a comment:

List<File> files = FileUtils.find("src/*.java");   // Note the /*

Instead of the regular expression, you should use a Java tokenizer and filter its output for comments. https://github.com/javaparser/javaparser looks like the appropriate tool for this job. It even has some test cases for parsing comments.

Roland Illig
  • 40,703
  • 10
  • 88
  • 121
1

As others have said, the whole file must be read into a string before running this regex.

Note that in order to parse comments, you have to parse strings at the same
time, otherwise either one can be hidden in the other.

This is the stringed regex.
Since you're stripping comments, this regex preserves formatting so it
doesn't look joined together after removal.

"(?m)((?:(?:^[ \\t]*)?(?:/\\*[^*]*\\*+(?:[^/*][^*]*\\*+)*/(?:[ \\t]*\\r?\\n(?=[ \\t]*(?:\\r?\\n|/\\*|//)))?|//(?:[^\\\\]|\\\\(?:\\r?\\n)?)*?(?:\\r?\\n(?=[ \\t]*(?:\\r?\\n|/\\*|//))|(?=\\r?\\n))))+)|(\"[^\"\\\\]*(?:\\\\[\\S\\s][^\"\\\\]*)*\"|'[^'\\\\]*(?:\\\\[\\S\\s][^'\\\\]*)*'|(?:\\r?\\n|[\\S\\s])[^/\"'\\\\\\s]*)"

Replace all with $2.

Explanation (if needed)

   (?m)                             # Multi-line modifier
   (                                # (1 start), Comments 
        (?:
             (?: ^ [ \t]* )?                  # <- To preserve formatting
             (?:
                  /\*                              # Start /* .. */ comment
                  [^*]* \*+
                  (?: [^/*] [^*]* \*+ )*
                  /                                # End /* .. */ comment
                  (?:                              # <- To preserve formatting 
                       [ \t]* \r? \n                                      
                       (?=
                            [ \t]*                  
                            (?: \r? \n | /\* | // )
                       )
                  )?
               |  
                  //                               # Start // comment
                  (?:                              # Possible line-continuation
                       [^\\] 
                    |  \\ 
                       (?: \r? \n )?
                  )*?
                  (?:                              # End // comment
                       \r? \n                               
                       (?=                              # <- To preserve formatting
                            [ \t]*                          
                            (?: \r? \n | /\* | // )
                       )
                    |  (?= \r? \n )
                  )
             )
        )+                               # Grab multiple comment blocks if need be
   )                                # (1 end)

|                                 ## OR

   (                                # (2 start), Non - comments 
        "
        [^"\\]*                          # Double quoted text
        (?: \\ [\S\s] [^"\\]* )*
        "
     |  '
        [^'\\]*                          # Single quoted text
        (?: \\ [\S\s] [^'\\]* )*
        ' 
     |  (?: \r? \n | [\S\s] )            # Linebreak or Any other char
        [^/"'\\\s]*                      # Chars which doesn't start a comment, string, escape,
                                         # or line continuation (escape + newline)
   )                                # (2 end)
0

It is not working because you are processing line by line.

First, You need to read complete file in a string. Then use regex on it.

String strLine;
StringBuffer sb = new StringBuffer("");
while ((strLine = br.readLine()) != null) {
    sb.append(strLine + "\n");
}

Matcher m = Pattern.compile(delCommentsPattern,Pattern.MULTILINE).matcher(sb.toString());
while (m.find()) {
    String b = m.group();
    System.out.println(b);
}
RAVI
  • 3,143
  • 4
  • 25
  • 38
-1

If I'm understanding correctly what you are trying to do (i.e. extract the comments) try with this string as a regex in Java:

String regex = "(/\\*\\*.*\\*/)|(//.*?$)";

BTW, you have to make sure you escape the asterisk and other reserved characters. The other thing is you need to read the file if you want to do multiple line regexes (i.e. you can't match on a single line if your regex is looking for multiple lines comments).

Be sure to use DOTALL and MULTILINE

Code example:

public static void main(String[] args) throws Exception {
    String text = readStream(new FileInputStream("test.txt"));
    Matcher m = Pattern.compile("(/\\*\\*.*\\*/)|(//.*?$)", Pattern.MULTILINE | Pattern.DOTALL).matcher(text);
    while (m.find()) {
        String b = m.group();
        System.out.println("GROUP: "+b);
    }
}

public static String readStream(InputStream is) {
    StringBuilder sb = new StringBuilder(512);
    try {
        Reader r = new InputStreamReader(is, "UTF-8");
        int c = 0;
        while ((c = r.read()) != -1) {
            sb.append((char) c);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return sb.toString();
}
mprivat
  • 21,582
  • 4
  • 54
  • 64