2

I need to write a program that can remove comments from a file. Removing standard comments is quite simple. However, I haven't been able to come up with a way to remove a "hyphenated" comment. For example, the first two lines of code are comments. My program does not understand this and removes only the first line of the comment.

// Function for output to console\
    ns2

#define _CRT_SECURE_NO_WARNINGS 
#include <stdio.h>
#include <stdlib.h>
#include <iso646.h>

int main() {
    FILE *in = fopen("inp.c", "r");
    FILE *out = fopen("out.c", "w");
    char ch;
    while ((ch = fgetc(in)) not_eq EOF) {
        if (ch == '/') {
            int flag_1 = 0;
            ch = fgetc(in);
            if (ch == '/') {
                flag_1 = 1;
                while ((ch = fgetc(in)) not_eq '\n') {
                    ch = fgetc(in);
                    if (ch == '\\') {
                        ch = '\n'; 
                        break;
                    }
                }
                ch = '\n';
            }
            else if (ch == '*') {
                flag_1 = 1;
                while ((ch = fgetc(in)) not_eq EOF) {
                    if ((ch = fgetc(in)) == '*' and (ch = fgetc(in)) == '/') {
                        ch = '\n';
                        break;
                    }
                }
            }
            if (flag_1 == 0)
                fputc('/', out);
        }
        if (ch == '"') {
            fputc(ch, out);
            while ((ch = fgetc(in)) not_eq '"') {
                fputc(ch, out);
            }
        }
        fputc(ch, out);
    }
    fclose(in);
    fclose(out);
    return 0;
}

I have been trying to supplement / change the program for more than an hour, but the knowledge of a beginner does not allow me to master this task. I will be glad to your advice!

chqrlie
  • 131,814
  • 10
  • 121
  • 189
  • This has [been asked](https://stackoverflow.com/questions/36454069/how-to-remove-c-style-comments-from-code), and perhaps may answer your question. ([and here](https://stackoverflow.com/questions/14975737/regular-expression-to-remove-comment)) – ryyker Jan 25 '22 at 15:55
  • 3
    Handling C comments thoroughly is hard. Note that `printf("// not a comment\n");` does not contain a comment. And `printf("/* not the start of a comment\n");` doesn't contain the start of a comment. (For the detail-oriented, you can write `int j = '//';` and that is not the start of a comment; ditto `int k = '/*';`. Multi-character constants are permitted but the result is implementation-defined.) To fix your code, you need to spot a backslash followed by a newline and continue the single-line comment if you encounter that. – Jonathan Leffler Jan 25 '22 at 16:01
  • The ```\``` when at the end of a line in `C` is used as a continuation of the previous line. You would have to incorporate that symbol into one of the regular expressions linked in the 1st comment . – ryyker Jan 25 '22 at 16:04
  • 2
    And, related to my previous comment, you do attempt to handle quoted strings, but `printf("This \" is mishandled /* and this does not start a comment\n");`. Backslashes make life complex. Don't ask about C++ and raw string literals, or punctuation in numbers (`0b0101'1100` is a valid binary literal in C++; it is not valid in C). – Jonathan Leffler Jan 25 '22 at 16:14
  • This `char ch = '"';` isn't the beginnering of a string literal (which might contain a non-commment) either, and so on. – Weather Vane Jan 25 '22 at 16:25

2 Answers2

2

In order to ignore the escaped newlines, sequences of \ followed by a newline, you could use a function that handles this transparently.

Note also these issues:

  • ch must be defined as an int to handle EOF correctly.
  • the macros defined in <iso646.h> make the code less readable.
  • \ should be handled when parsing strings.
  • character constants should be parsed too: '//' is a valid character constant, not a comment.
// Function for output to console\
    ns2
/\
*\ This is a valid comment too :) 
*\
/

#define _CRT_SECURE_NO_WARNINGS 
#include <stdio.h>

int mygetc(FILE *in) {
    for (;;) {
        int c = getc(in);
        if (c == '\\') {
            c = getc(in);
            if (c == '\n')
                continue;
            if (c != EOF)
                ungetc(c, in);
            c = '\\';
        }
        return c;
    }
}

int skip_line_comment(FILE *in) {
    int c;
    while ((c = mygetc(in)) != '\n' && c != EOF)
        continue;
    return c;
}

int skip_block_comment(FILE *in) {
    int c;
    for (;;) {
        while ((c = mygetc(in)) != '*') {
            if (c == EOF)
                return c;
        }
        while ((c = mygetc(in)) == '*')
            continue;
        if (c == EOF)
            return c;
        if (c == '/')
            return ' ';
    }
}

int main() {
    FILE *in = fopen("inp.c", "r");
    FILE *out = fopen("out.c", "w");
    int ch;
    while ((ch = mygetc(in)) != EOF) {
        if (ch == '/') {
            ch = skip_line_comment(in);
        } else
        if (ch == '*') {
            ch = skip_block_comment(in);
        } else
        if (ch == '"' || ch == '\'') {
            int sep = ch;
            fputc(ch, out);
            while ((ch = mygetc(in)) != sep && ch != EOF) {
                fputc(ch, out);
                if (ch == '\\') {
                    ch = mygetc(in);
                    if (ch == EOF)
                        break;
                    fputc(ch, out);
                }
            }
        }
        if (ch == EOF)
            break;
        fputc(ch, out);
    }
    fclose(in);
    fclose(out);
    return 0;
}
chqrlie
  • 131,814
  • 10
  • 121
  • 189
-2

Plan one, I wrote this myself, but I think it's a little bloated, but I'll put it here for your reference

#include <stdio.h>
#define TRUE 1
#define FALSE 0

typedef int BOOL;

int main (void) {

        FILE *in;
        FILE *out;
        // Declare the variable for the data to be read from file
        char dataToBeRead[50];

        in  = fopen("test.c", "r");
        out = fopen("output.c", "w");

        if (in == NULL) {
                printf("file failed to open.");
        } else {
                printf("The file is now opened.\n");

                BOOL isEscape         = FALSE;
                BOOL inDoubleQuotes   = FALSE;
                BOOL inSingleQuotes   = FALSE;
                BOOL firstSlash       = FALSE; // if (inDoubleQuotes == 1 || isEscape == 1) ignore '/'
                BOOL isCommentEnd     = FALSE; // '*' encountered in multi-line comments
                BOOL isCommentLineEnd = FALSE; // '\' encountered in a single-line comment
                BOOL isDoubleSlash    = FALSE; // whether in single-line comment
                BOOL isInComment      = FALSE; // whether in multi-line comments

                while (fgets(dataToBeRead, 50, in) != NULL) {

                        int i = 0;
                        while (dataToBeRead[i] != 0) {
                                char c = dataToBeRead[i];
                                // begin analyses comment

                                if (isDoubleSlash == FALSE && isInComment == FALSE) { // no in comments

                                        // The first bit is an escape character, so skip it
                                        if (isEscape == TRUE) {
                                                isEscape = FALSE;
                                                fprintf(out, "%c", c);
                                                i++;
                                                continue;
                                        }
                                        // An escape character is recognized
                                        if (c == '\\') {
                                                isEscape = TRUE;
                                                if (inDoubleQuotes == TRUE || inSingleQuotes == TRUE) {
                                                        fprintf(out, "%c", c);
                                                }
                                                i++;
                                                continue;
                                        }

                                        if (inSingleQuotes == TRUE) {
                                                inSingleQuotes = FALSE;
                                                fprintf(out, "%c", c);
                                                i++;
                                                continue;
                                        } else if (c == '\'') {
                                                inSingleQuotes = TRUE;
                                                fprintf(out, "%c", c);
                                                i++;
                                                continue;
                                        }

                                        // Double quotation marks are recognized
                                        if (c == '"') {
                                                inDoubleQuotes = inDoubleQuotes == TRUE ? FALSE : TRUE;
                                                fprintf(out, "%c", c);
                                                i++;
                                                continue;
                                        } else if (inDoubleQuotes == TRUE) { // It's currently in double quotes, so just skip it,
                                                printf("%c", c);
                                                fprintf(out, "%c", c);
                                                i++;
                                                continue;
                                        }
                                        // Here, it can't be in double quotes, and it can't be an escape character

                                        if (firstSlash == TRUE) {
                                                firstSlash = FALSE;
                                                if (c == '/') { // recognize '//'
                                                        isDoubleSlash = TRUE;
                                                        i++;
                                                        continue;
                                                } else if (c == '*') { // recognize '/*'
                                                        isInComment = TRUE;
                                                        i++;
                                                        continue;
                                                } else {
                                                        // Not a comment, Fill in the '/' sign
                                                        fprintf(out, "%c", '/');
                                                }
                                        } else if (c == '/') { // recognize '/'
                                                firstSlash = TRUE;
                                                // It could be a comment, so don't write the '/' sign
                                                i++;
                                                continue;
                                        }

                                        // For non-commented content, write
                                        fprintf(out, "%c", c);
                                } else { // in comment

                                        // single-line comment
                                        if (isDoubleSlash == TRUE) {
                                                if (c == '\\') {
                                                        // When you encounter a backslash, the backslash may be at the end of the line
                                                        isCommentLineEnd = TRUE;
                                                        i++;
                                                        continue;
                                                } else if (isCommentLineEnd == TRUE && c == 10) {
                                                        isCommentLineEnd = FALSE;
                                                        // The end of a single line comment is the transfer character
                                                        i++;
                                                        continue;
                                                } else if (c == 10) {
                                                        isDoubleSlash = FALSE;
                                                        fprintf(out, "%c", 10);
                                                        i++;
                                                        continue;
                                                }
                                        }


                                        if (isInComment == TRUE) {
                                                if (isCommentEnd == TRUE) {
                                                        isCommentEnd = FALSE;
                                                        if (c == '/') {
                                                                isInComment = FALSE;
                                                                i++;
                                                                continue;
                                                        }
                                                } else if (c == '*') {
                                                        isCommentEnd = TRUE;
                                                }
                                        }
                                }

                                // end analyses comment

                                i++;
                        }
                }

                fclose(in);
                fclose(out);
                printf("The file is now closed.");
        }

        return 0;
}

Plan two, this is an improved version of the other answer

#include <stdio.h>

#define TRUE 1
#define FALSE 0

typedef int BOOL;

// Reading a single character. But this newline character is ignored when process recognize '\\n' (a '\ 'symbol at the end of a line)
int mygetc (FILE *in) {
        for (;;) {
                int c = getc(in);
                if (c == '\\') {
                        c = getc(in);
                        if (c == '\n')
                                continue;
                        if (c != EOF)
                                ungetc(c, in);
                        c = '\\';
                }
                return c;
        }
}

// When in "one-line comment", this function can be used to skip the comment content and return the first value after the comment ends
int skip_line_comment (FILE *in) {
        int c;
        while ((c = mygetc(in)) != '\n' && c != EOF)
                continue;
        return c;
}

// When in "multi-line comment", this function can be used to skip the content of the multiline comment and return the first value after the comment ends
int skip_block_comment (FILE *in) {
        int c;
        for (;;) {
                while ((c = mygetc(in)) != '*') {
                        if (c == EOF)
                                return c;
                }
                while ((c = mygetc(in)) == '*')
                        continue;
                if (c == EOF)
                        return c;
                if (c == '/')
                        return ' ';
        }
}

// Delete the comments from the 'in' file and print them to the 'out' file
void removeComments (FILE *in, FILE *out) {
        int c;
        while ( (c = mygetc(in)) != EOF) {
                if (c == '"' || c == '\'') {
                        int separator = c;
                        fputc(c, out);
                        while ((c = mygetc(in)) != separator && c != EOF) {
                                fputc(c, out);
                                if (c == '\\') {
                                        c = mygetc(in);
                                        if (c == EOF)
                                                break;
                                        fputc(c, out);
                                }
                        }
                } else if (c == '/') {
                        c = mygetc(in);
                        if (c == '/') { // recognize '//'
                                c = skip_line_comment(in);
                        } else if (c == '*') { // recognize '/*'
                                c = skip_block_comment(in);
                        } else {
                                fputc('/', out);
                        }
                }
                if (c == EOF)
                        break;
                fputc(c, out);
        }
}

int main () {
        const char inName[20]  = "test.c";
        const char outName[20] = "output.c";
        FILE *in;
        FILE *out;
        in  = fopen(inName, "r");
        out = fopen(outName, "w");

        if (in == NULL || out == NULL) {
                printf("file opening error\n");
        } else {
                removeComments(in, out);
        }

        fclose(in);
        fclose(out);
        return 0;
}

test case:

// test.c
// \ Function for output to console\
    \ns2
/\
* This\
is a valid comment too :)
*\
/
#include <stdio.h>
int main (void) {
        // alksjdlkf // /**/*
        printf("// not a \
                comme\
                nt\n");
        printf("/* not the start of a comment\n");
        // int j = '//';
        // int k = '/*';
        char a  = '"';
        char b = '\'';
        char c = '\\';
        // int x = "'";

        char /* ** */ dataToBeRead[50];
        char test1[100] = "//**//"; /*
        al;skdjf
         */
        char test2[100] = "\\";

        return 0;
}

output:


 
#include <stdio.h>
int main (void) {
        
        printf("// not a                 comme                nt\n");
        printf("/* not the start of a comment\n");
        
        
        char a  = '"';
        char b = '\'';
        char c = '\\';
        

        char   dataToBeRead[50];
        char test1[100] = "//**//";  
        char test2[100] = "\\";

        return 0;
}

linhieng
  • 3
  • 2