3

Take the following very simple C program below. My understanding is that the precompiler is first invoked to expand macros and header files etc.

My understanding was that the precompiler would first include all the code (declarations) from the header file stdio.h in the C file before compilation, therefore making the C file bigger and line numbers different, hence the printf() call would be further down the file.

If that's the case, why, during debugging, the line numbers are still correct?

#include <stdio.h>
int main()
{
    printf("Hello world!\n");
}
Engineer999
  • 3,683
  • 6
  • 33
  • 71

3 Answers3

4

Yes, the content of the header file(s) are included in the text.

The line numbers are preserved because the pre-processor has a mechanism to specify the line numbers:

#line 97 "original.c"
/* This is line 97 of the original C file */

These line numbers are used in reporting bugs. Typically, the compiler uses a shorthand notation which it understands — it often omits the word line and adds extra information — see GCC Preprocessor output for example.

You can see by compiling

#line 1000
pod variable = { 0 };

Put that in a file; try to compile it; notice that the compiler complains about an unknown type pod on line 1000, even though it is line 2 in the source file.

$ gcc -O -c original.c
original.c:1000:1: error: unknown type name ‘pod’
$

You can run the preprocessor and see the (voluminous) output using the -E option with GCC and most Unix-based C compilers:

$ gcc -E original.c
# 1 "original.c"
# 1 "<built-in>"
# 1 "<command-line>"
# 1 "original.c"
# 1000 "original.c"
pod variable = { 0 };
$

Note that if I included <stdio.h>, the output would be much larger (I got 577 lines on one machine — that's less than I expected).

You may also be able to run a standalone C preprocessor — typically called cpp — with a subset of the arguments for the C compiler. That will sometimes give you slightly different results from the main compiler, but it is often a good way to see what's going on.


Right now, I've got a problem with some code — my test case is:

/* Minimized GCC bug */

#include <assert.h>
#include <string.h>

extern int chk_arg(const char *arg);

int chk_arg(const char *arg)
{
    assert(strncmp(arg, "-", 1) == 0 && strncmp(arg, "--", 2) != 0);
    return (arg[1] == 'a');
}

The compilation error that's distressing me is:

$   gcc -std=c11 -Wall -Wmissing-prototypes -Wstrict-prototypes -Wextra -pedantic -Werror -g -O3   -c gccbug.c
gccbug.c: In function ‘chk_arg’:
gccbug.c:10:5: error: string length ‘4587’ is greater than the length ‘4095’ ISO C99 compilers are required to support [-Werror=overlength-strings]
   10 |     assert(strncmp(arg, "-", 1) == 0 && strncmp(arg, "--", 2) != 0);
      |     ^~~~~~
cc1: all warnings being treated as errors
$

It ain't obvious where there's a string of 4587 characters, until you use gcc -E:

int chk_arg(const char *arg)
{

# 10 "gccbug.c" 3 4
   (((__extension__ (__builtin_constant_p (
# 10 "gccbug.c"
   1
# 10 "gccbug.c" 3 4
   ) && ((__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) < ((size_t) (
# 10 "gccbug.c"
   1
# 10 "gccbug.c" 3 4
   ))) || (__builtin_constant_p (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) && strlen (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) < ((size_t) (
# 10 "gccbug.c"
   1
# 10 "gccbug.c" 3 4
   )))) ? __extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && __builtin_constant_p (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) && (__s1_len = strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ), __s2_len = strlen (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ), (!((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) || __s1_len >= 4) && (!((size_t)(const void *)((
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) == 1) || __s2_len >= 4)) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) : (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) && (__s1_len = strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ), __s1_len < 4) ? (__builtin_constant_p (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) == 1) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) : (__extension__ ({ __const unsigned char *__s2 = (__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ); register int __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) == 1) && (__s2_len = strlen (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ), __s2_len < 4) ? (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ) : (__extension__ ({ __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ); register int __result = __s1[0] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ))[1]); if (__s2_len > 1 && __result == 0) { __result = (__s1[2] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   ))[3]); } } __result; }))) : __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   )))); }) : strncmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "-"
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   1
# 10 "gccbug.c" 3 4
   ))) 
# 10 "gccbug.c"
   == 0 && 
# 10 "gccbug.c" 3 4
   (__extension__ (__builtin_constant_p (
# 10 "gccbug.c"
   2
# 10 "gccbug.c" 3 4
   ) && ((__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) < ((size_t) (
# 10 "gccbug.c"
   2
# 10 "gccbug.c" 3 4
   ))) || (__builtin_constant_p (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) && strlen (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) < ((size_t) (
# 10 "gccbug.c"
   2
# 10 "gccbug.c" 3 4
   )))) ? __extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && __builtin_constant_p (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) && (__s1_len = strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ), __s2_len = strlen (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ), (!((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) || __s1_len >= 4) && (!((size_t)(const void *)((
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) == 1) || __s2_len >= 4)) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) : (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) && (__s1_len = strlen (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ), __s1_len < 4) ? (__builtin_constant_p (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) == 1) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) : (__extension__ ({ __const unsigned char *__s2 = (__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ); register int __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) == 1) && (__s2_len = strlen (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ), __s2_len < 4) ? (__builtin_constant_p (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) && ((size_t)(const void *)((
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) + 1) - (size_t)(const void *)(
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ) == 1) ? __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ) : (__extension__ ({ __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   ); register int __result = __s1[0] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ))[1]); if (__s2_len > 1 && __result == 0) { __result = (__s1[2] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((__const unsigned char *) (__const char *) (
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   ))[3]); } } __result; }))) : __builtin_strcmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   )))); }) : strncmp (
# 10 "gccbug.c"
   arg
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   "--"
# 10 "gccbug.c" 3 4
   , 
# 10 "gccbug.c"
   2
# 10 "gccbug.c" 3 4
   ))) 
# 10 "gccbug.c"
   != 0
# 10 "gccbug.c" 3 4
   ) ? (void) (0) : (__assert_fail (
# 10 "gccbug.c"
   "(__extension__ (__builtin_constant_p (1) && ((__builtin_constant_p (arg) && strlen (arg) < ((size_t) (1))) || (__builtin_constant_p (\"-\") && strlen (\"-\") < ((size_t) (1)))) ? __extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p (arg) && __builtin_constant_p (\"-\") && (__s1_len = strlen (arg), __s2_len = strlen (\"-\"), (!((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) || __s1_len >= 4) && (!((size_t)(const void *)((\"-\") + 1) - (size_t)(const void *)(\"-\") == 1) || __s2_len >= 4)) ? __builtin_strcmp (arg, \"-\") : (__builtin_constant_p (arg) && ((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) && (__s1_len = strlen (arg), __s1_len < 4) ? (__builtin_constant_p (\"-\") && ((size_t)(const void *)((\"-\") + 1) - (size_t)(const void *)(\"-\") == 1) ? __builtin_strcmp (arg, \"-\") : (__extension__ ({ __const unsigned char *__s2 = (__const unsigned char *) (__const char *) (\"-\"); register int __result = (((__const unsigned char *) (__const char *) (arg))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (arg))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (arg))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((__const unsigned char *) (__const char *) (arg))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (\"-\") && ((size_t)(const void *)((\"-\") + 1) - (size_t)(const void *)(\"-\") == 1) && (__s2_len = strlen (\"-\"), __s2_len < 4) ? (__builtin_constant_p (arg) && ((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) ? __builtin_strcmp (arg, \"-\") : (__extension__ ({ __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (arg); register int __result = __s1[0] - ((__const unsigned char *) (__const char *) (\"-\"))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((__const unsigned char *) (__const char *) (\"-\"))[1]); if (__s2_len > 1 && __result == 0) { __result = (__s1[2] - ((__const unsigned char *) (__const char *) (\"-\"))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((__const unsigned char *) (__const char *) (\"-\"))[3]); } } __result; }))) : __builtin_strcmp (arg, \"-\")))); }) : strncmp (arg, \"-\", 1))) == 0 && (__extension__ (__builtin_constant_p (2) && ((__builtin_constant_p (arg) && strlen (arg) < ((size_t) (2))) || (__builtin_constant_p (\"--\") && strlen (\"--\") < ((size_t) (2)))) ? __extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p (arg) && __builtin_constant_p (\"--\") && (__s1_len = strlen (arg), __s2_len = strlen (\"--\"), (!((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) || __s1_len >= 4) && (!((size_t)(const void *)((\"--\") + 1) - (size_t)(const void *)(\"--\") == 1) || __s2_len >= 4)) ? __builtin_strcmp (arg, \"--\") : (__builtin_constant_p (arg) && ((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) && (__s1_len = strlen (arg), __s1_len < 4) ? (__builtin_constant_p (\"--\") && ((size_t)(const void *)((\"--\") + 1) - (size_t)(const void *)(\"--\") == 1) ? __builtin_strcmp (arg, \"--\") : (__extension__ ({ __const unsigned char *__s2 = (__const unsigned char *) (__const char *) (\"--\"); register int __result = (((__const unsigned char *) (__const char *) (arg))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (arg))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((__const unsigned char *) (__const char *) (arg))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((__const unsigned char *) (__const char *) (arg))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (\"--\") && ((size_t)(const void *)((\"--\") + 1) - (size_t)(const void *)(\"--\") == 1) && (__s2_len = strlen (\"--\"), __s2_len < 4) ? (__builtin_constant_p (arg) && ((size_t)(const void *)((arg) + 1) - (size_t)(const void *)(arg) == 1) ? __builtin_strcmp (arg, \"--\") : (__extension__ ({ __const unsigned char *__s1 = (__const unsigned char *) (__const char *) (arg); register int __result = __s1[0] - ((__const unsigned char *) (__const char *) (\"--\"))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((__const unsigned char *) (__const char *) (\"--\"))[1]); if (__s2_len > 1 && __result == 0) { __result = (__s1[2] - ((__const unsigned char *) (__const char *) (\"--\"))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((__const unsigned char *) (__const char *) (\"--\"))[3]); } } __result; }))) : __builtin_strcmp (arg, \"--\")))); }) : strncmp (arg, \"--\", 2))) != 0"
# 10 "gccbug.c" 3 4
   , "gccbug.c", 10, __PRETTY_FUNCTION__), (void) (0)))
# 10 "gccbug.c"
                                                                  ;
    return (arg[1] == 'a');
}

Ouch! I don't know that I'd recognized the assertion if it fired — it doesn't look like the text in the source code, that's for sure! (That's from GCC 9.2.0 running on an ancient RHEL 5 Linux.)

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • A nice addition to this answer is that one can run the pre-processor using e.g.: `cpp example.c` and look at the results. – Cheatah Oct 03 '19 at 19:19
2

The compiler keeps track of the filename and line number of each input line. That information is kept with each token, so that it can be used when needed (normally to create an error or warning message).

rici
  • 234,347
  • 28
  • 237
  • 341
0

The preprocessor outputs some special directives which can control the name of the source file and the line number that the compiler thinks it's working on.

Given these files:

x1.h:

int x = 4;

int printf(const char *, ...);

x1.c:

#include "x1.h"

int main()
{
    printf("x=%d\n", x);
    return 0;
}

The preprocessor for gcc outputs the following:

# 1 "x1.c"
# 1 "<built-in>"
# 1 "<command-line>"
# 31 "<command-line>"
# 1 "/usr/include/stdc-predef.h" 1 3 4
# 32 "<command-line>" 2
# 1 "x1.c"
# 1 "x1.h" 1
int x;

int printf(const char *, ...);
# 2 "x1.c" 2

int main()
{
    printf("x=%d\n", x);
    return 0;
}

Each line starting with # includes the current line number, the name of the current source file, plus some additional flags.

At the start you see source:line is set to x1.c:1 which is the start of the main source file. Then you can see right before the inclusion of x1.h that the source:line is set to x1.h:1. After the include the source:line is then set to x1.c:2.

This is just one example of how this is achieved. Other compilers perform something similar.

dbush
  • 205,898
  • 23
  • 218
  • 273