2

Possible Duplicate:
Program crashes when trying to set a character of a char array

I have a sample code which works as expected:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char str[] ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}

... unless I change char str[] to char* str which shouldn't make any differences in semantics:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char * str ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}

This is the unexpected result:

Splitting string "- This, a sample string." into tokens:
Segmentation fault

I compiled both examples with:

gcc -O0 main.c
gcc -O3 main.c
g++ -O0 main.c
g++ -O3 main.c

and even looked at the assembly ... But I can't figure out, what's wrong with the second version.

Here the working O1-Assembly:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC0:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC1:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    push    rbx
    .cfi_def_cfa_offset 16
    sub rsp, 48
    .cfi_def_cfa_offset 64
    mov rax, QWORD PTR fs:40
    mov QWORD PTR [rsp+40], rax
    xor eax, eax
    mov DWORD PTR [rsp], 1750343725
    mov DWORD PTR [rsp+4], 539784041
    mov DWORD PTR [rsp+8], 1634934881
    mov DWORD PTR [rsp+12], 1701605485
    mov DWORD PTR [rsp+16], 1920234272
    mov DWORD PTR [rsp+20], 778530409
    mov BYTE PTR [rsp+24], 0
    mov rdx, rsp
    mov esi, OFFSET FLAT:.LC0
    mov edi, 1
    .cfi_offset 3, -16
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC1
    mov rdi, rsp
    call    strtok
    mov eax, 0
    mov rdx, QWORD PTR [rsp+40]
    xor rdx, QWORD PTR fs:40
    je  .L3
    call    __stack_chk_fail
.L3:
    add rsp, 48
    pop rbx
    .p2align 4,,1
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits

and the broken one:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "- This, a sample string."
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC1:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1
.LC2:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    sub rsp, 8
    .cfi_def_cfa_offset 16
    mov edx, OFFSET FLAT:.LC0
    mov esi, OFFSET FLAT:.LC1
    mov edi, 1
    mov eax, 0
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC2
    mov edi, OFFSET FLAT:.LC0
    call    strtok
    mov eax, 0
    add rsp, 8
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits

The only obvious difference I can see is that in the working version GCC substitutes the string constant by MOVs directly in the code.

Help is very appreciated

edit gcc (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5,

All the best, Thomas

Community
  • 1
  • 1
Thomas Pototschnig
  • 241
  • 1
  • 3
  • 8
  • 2
    Umm... did you search this question up? It's been asked at least 5 times now... – flight Sep 01 '11 at 10:40
  • Read section 6 of [the c-faq](http://c-faq.com/). Basically it says **arrays are not pointers** and **pointers are not arrays**. As you're there already, read the other sections too :) – pmg Sep 01 '11 at 10:41
  • Please decide on a language. C and C++ are different languages. – Kerrek SB Sep 01 '11 at 10:46
  • "which shouldn't make any differences in semantics" - why would you say such a thing? `char str[] = "foo";` defines an array named `str`, and initializes its contents by copying from the string literal. `char *str = "foo";` defines a pointer named `str`, and initializes it to point at the string literal. Quite different things. Also, when writing new C code in gcc you can usually use `-Wwrite-strings` without getting any spurious warnings. That warns for defining a `char*` that points at a string literal (it would have to be `const char*`), catching the problem at compile time. – Steve Jessop Sep 01 '11 at 10:48
  • I flagged this as a duplicate but now I hope they don't close it because there are several very good answers. – Tom Zych Sep 01 '11 at 10:55
  • @Steve: note that `-Wwrite-strings` and `-std=c99` are (kinda) incompatible. The 1st option effectively makes gcc compile for a language that isn't C. – pmg Sep 01 '11 at 11:02
  • Agreed, because the way that gcc implements `-Wwrite-strings` in C is hacky, it changes the type of a string literal to something other than what the standard says it is. One option is to compile with `-Wwrite-strings` to detect errors but then compile again without it (and with `-std=c99 -pedantic`) for conformance. In C++ the warning is not hacky, it just triggers when you use a particular deprecated language feature, which is why it's enabled by default. – Steve Jessop Sep 01 '11 at 11:04

4 Answers4

5

In the second case, you're pointing str at a static object somewhere in memory that can't be changed. The strtok man page warns that it changes its first argument and can't be used on a constant string. Hence the error.

Tom Zych
  • 13,329
  • 9
  • 36
  • 53
5

strtok() requires a modifiable buffer, because it replaces the delimiter by a null byte. So you cannot say char * str = "- This, a sample string.";, because that should really have been const char * str = "- This, a sample string."; and points to read-only memory. Instead, you have several options:

char str[] = "- This, a sample string.";  // local array
char * pch = strtok (str," ,.-");


char * str = strdup("- This, a sample string.");  // malloc()ed
char * pch = strtok (str," ,.-");
/* ... */
free(str);
Kerrek SB
  • 464,522
  • 92
  • 875
  • 1,084
4

char * str allocates room for a pointer to a string that happens to be a constant literal (i.e., not writable).

char str[] allocates room for an array whose size is specified by the assigned literal. The array is writable.

strtok() modifies the string it works on. This is allowed with str[] but not with *str.

mouviciel
  • 66,855
  • 13
  • 106
  • 140
3

When you use char[] p = "literal", the many a compiler will allocate a character array of the appropriate length, and then copies the string from wherever string constants are kept into the array, so you end up with modifiable copy of the string.

When you use char* p = "literal", you have a pointer that point to that unmodifiable copy of the string. When you attempt to modify it, the behavior is undefined. In fact, at some point g++ started issuing a warning when you do char *p = "literal", because the correct way to specify it is const char* p="literal" since it is a pointer to a constant string.

Dave S
  • 20,507
  • 3
  • 48
  • 68