A simple regular expression for a C comment is:
/\*([^\*]|\*[^\/])*\*\//
(Sorry for the escape characters) This allows any sequence inside a comment except */
. It translates to the following DFA (four states):
- state 0, input
/
, next state 1, output none
- state 0, input other, next state 0, output read char
- state 1, input
*
, next state 2, no output
- state 1, input
/
, next state 1, output /
- state 1, input other, next state 0, output
/
and read char
- state 2, input
*
, next state 3, output none
- state 2, input other, next state 3, output none
- state 3, input
/
, next state 0, output none
- state 3, input
*
, next state 3, output none
- state 3, input other, next state 2, output none
The possible inputs are /
, *
and any other character. The possible outputs are output read char, output /
and output *
.
This translates to the following code:
file uncomment.c:
#include <stdio.h>
int main()
{
int c, st = 0;
while ((c = getchar()) != EOF) {
switch (st) {
case 0: /* initial state */
switch (c) {
case '/': st = 1; break;
default: putchar(c); break;
} /* switch */
break;
case 1: /* we have read "/" */
switch (c) {
case '/': putchar('/'); break;
case '*': st = 2; break;
default: putchar('/'); putchar(c); st = 0; break;
} /* switch */
break;
case 2: /* we have read "/*" */
switch (c) {
case '*': st = 3; break;
default: break;
} /* switch */
break;
case 3: /* we have read "/* ... *" */
switch (c) {
case '/': st = 0; break;
case '*': break;
default: st = 2; break;
} /* switch */
break;
} /* switch */
} /* while */
} /* main */
In case you want to exclude both types of comments, we need to switch to a fifth state when receiving a second /
, resulting in the following code:
file uncomment2.c:
#include <stdio.h>
int main()
{
int c, st = 0;
while ((c = getchar()) != EOF) {
switch (st) {
case 0: /* initial state */
switch (c) {
case '/': st = 1; break;
default: putchar(c); break;
} /* switch */
break;
case 1: /* we have read "/" */
switch (c) {
case '/': st = 4; break;
case '*': st = 2; break;
default: putchar('/'); putchar(c); st = 0; break;
} /* switch */
break;
case 2: /* we have read "/*" */
switch (c) {
case '*': st = 3; break;
default: break;
} /* switch */
break;
case 3: /* we have read "/* ... *" */
switch (c) {
case '/': st = 0; break;
case '*': break;
default: st = 2; break;
} /* switch */
break;
// in the next line we put // inside an `old' comment
// to illustrate this special case. The switch has been put
// after the comment to show it is not being commented out.
case 4: /* we have read "// ..." */ switch(c) {
case '\n': st = 0; putchar('\n'); break;
} // switch (to illustrate this kind of comment).
} /* switch */
} /* while */
} /* main */