0

IN my code, I have a random character that appears when I send a char array through a function, like so:

struct TokenizerT_ {        //Defintion of the struct
char * sep;
char * toks;
};

TokenizerT *TKCreate(char *separators, char *ts) {
TokenizerT * inu = malloc(sizeof(*inu));
inu->toks = malloc(sizeof(char)); //Initialize char array that will store the tokens

strcpy(inu->toks, hr);      
return inu;
}

....... 
best = "sein";
printf("%s\n", best);
char * rondo = malloc(sizeof(char));                       
printf("%s\n", rondo);
TokenizerT * Ray = TKCreate(copy, rondo);                          /
printf("%s\n", Ray->toks);

For the last bit, the printed out values are as follows:

sein
sein
sein?

Why is the question mark appearing? This is usually a random character and not always a question mark.

  Edit: Full code, really desperate



 struct TokenizerT_ {        //Defintion of the struct
char * sep;
char * toks;
 };

 char nulines[10] = "ntvbrfa\\\"";           //for the arguments with backslashes
 char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
 typedef struct TokenizerT_ TokenizerT;


  TokenizerT *TKCreate(char *separators, char *ts) {

if (ts==NULL) {                 //If there are no tokens to be parsed (empty entry)
    return NULL;
}int lim = 1;

char yr[strlen(separators)]; //Initializes delimitors
yr[0] = *separators;
if(strlen(separators)>0){

int h =1;                          
char zmp = *(separators+h);
for(h=1; h<strlen(separators); h++){
    zmp = *(separators+h);
    int z=0;

    for (z=0; z<lim; z++) {
        if (zmp==yr[z]) {
            z=-1;
            break;
        }
    }

    if(z>-1){
        yr[lim] = zmp;
        lim++;}
    else{
        continue;
    }                                   //yr is local variable that contains delimitors
}}
TokenizerT * inu = malloc(sizeof(*inu));    //Creates TokenizerT
inu->sep = malloc((int)strlen(yr)*sizeof(char)); 
strcpy(inu->sep, yr);              


char hr [strlen(ts)];                       
lim = 0; int q = 0; int wy=0;
for(q=0; q<strlen(ts); q++){
    if(ts[q]=='\\'){
        q++;
        for(wy = 0; wy<strlen(nulines); wy++){
            if (nulines[wy]==ts[q]) {
     hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
     hr[++lim] = resp[wy*4+3];
                hr[++lim] = ']'; lim++;
                break;
            }
        }
        continue;
    }
    else{                               
        hr[lim] = ts[q];
        lim++;
    }
}



inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);

strcpy(inu->toks, hr);      //Makes copy
return inu;
 }



void TKDestroy(TokenizerT *tk) {
free(tk->toks); //Free Memory associated with the token char array
free(tk->sep);  //Free Memory associated with the delimitor char array
free(tk); //Free Memory associated with the tokenizer
}


 char *TKGetNextToken(TokenizerT *tk) {
char * stream = tk->toks;
char * dels = tk->sep;

/*The following two  lines intialize the char array to be printed
 as well as the integers to be used in the various loops*/

char * temps = malloc(sizeof(char)); int g = 0;
int z = 0, x= 0, len = 0;
if (strlen(dels)==0) {          
    return stream;
}



for(z = 0; z<strlen(stream); z++){
    char b = *(stream+z);           

    for(x = 0; x<strlen(dels); x++){ 
        len = (int)strlen(temps); 
        char c = *(dels+x);

        if(c==b){   //Here, the current character is a delimitor
            g = -1;
            break;
        }

    }
    if (g==-1) {    //If delimitor, then return the current token
        return temps;
    }
        *(temps+len) = b;   
}
len = (int)strlen(temps);
*(temps+len) = '\0';    //Returns the string with the null character ending it
return temps;
 }



void TKN(TokenizerT * tin, int sum){

char * tmp = TKGetNextToken(tin);      
char * copy = malloc(sizeof(char));

   strcpy(copy, tin->sep);                 

   int difference = (int)strlen(tmp)+1;
   sum = sum-difference;
  char * best = malloc(sizeof(char));
  strcpy(best, tin->toks + difference);   


    if((int)strlen(tmp)>0){              
   printf("%s\n", tmp);           
  }                                 
  TKDestroy(tin);
tin = TKCreate(copy, best);
while(sum>0){
    tmp = TKGetNextToken(tin);
    if((int)strlen(tmp)>0){                
        printf("%s\n", tmp);
    }
    difference = (int)strlen(tmp)+1;
    sum = sum-difference;
    free(best);
    best = malloc(sizeof(char));
    strcpy(best, tin->toks + difference);
       TKDestroy(tin);
       tin = TKCreate(copy, best);
 }

free(copy);
free(best);
free(tmp);

  TKDestroy(tin); //Freeing up memory associated with the Tokenizer
  return;
}

int main(int argc, char **argv) {
if(argc<2){
    printf("%s\n", "Not enough arguments");
    return 0;
}
else if(argc>3){
    printf("%s\n", "Too many arguments");
    return 0;
}
 else{
char * arr = argv[1];   //Represents delimitors
char * y = argv[2];       //Represents string to be tokenized

TokenizerT * jer = TKCreate(arr, y);    //Create and initialize tokenizer
 //printf("%s\n", jer->toks);
  TKN(jer, (int)strlen(jer->toks)); 
 }
return 0;
 }
AbhishekSaha
  • 705
  • 3
  • 9
  • 24

2 Answers2

0

In most of your malloc, you don't only allocate for one character:

malloc(sizeof(char))

while you should write:

malloc(sizeof(char) * n + 1)

Where n is the length of string that you want and +1 is for the terminating null character. You are seeing the random character it is because both C and C++ use null character as the termination for string datatype and by not allocating correctly, it starts for read until it gets to null.

struct TokenizerT_ {        //Defintion of the struct
    char * sep;
    char * toks;
};

char nulines[10] = "ntvbrfa\\\"";           //for the arguments with backslashes
char resp[37] = "0x0a0x090x0b0x080x0d0x0c0x070x5c0x22";
typedef struct TokenizerT_ TokenizerT;


TokenizerT *TKCreate(char *separators, char *ts) {

    if (ts==NULL) {                 //If there are no tokens to be parsed (empty entry)
        return NULL;
    }int lim = 1;

    char yr[strlen(separators)]; //Initializes delimitors
    yr[0] = *separators;
    if(strlen(separators)>0){

        int h =1;
        char zmp = *(separators+h);
        for(h=1; h<strlen(separators); h++){
            zmp = *(separators+h);
            int z=0;

            for (z=0; z<lim; z++) {
                if (zmp==yr[z]) {
                    z=-1;
                    break;
                }
            }

            if(z>-1){
                yr[lim] = zmp;
                lim++;}
            else{
                continue;
            }                                   //yr is local variable that contains delimitors
        }}
    TokenizerT * inu = (TokenizerT *)malloc(sizeof(*inu));    //Creates TokenizerT
    inu->sep = (char *)malloc((int)strlen(yr)*sizeof(char));
    strcpy(inu->sep, yr);


    char hr [strlen(ts)];
    lim = 0; int q = 0; int wy=0;
    for(q=0; q<strlen(ts); q++){
        if(ts[q]=='\\'){
            q++;
            for(wy = 0; wy<strlen(nulines); wy++){
                if (nulines[wy]==ts[q]) {
                    hr[lim] = '['; hr[++lim] = '0'; hr[++lim] = 'x'; hr[++lim] = resp[wy*4+2];
                    hr[++lim] = resp[wy*4+3];
                    hr[++lim] = ']'; lim++;
                    break;
                }
            }
            continue;
        }
        else{
            hr[lim] = ts[q];
            lim++;
        }
    }



    inu->toks = (char *)malloc(sizeof(char) * strlen(hr) + 1);

    strcpy(inu->toks, hr);      //Makes copy
    return inu;
}



void TKDestroy(TokenizerT *tk) {
    free(tk->toks); //Free Memory associated with the token char array
    free(tk->sep);  //Free Memory associated with the delimitor char array
    free(tk); //Free Memory associated with the tokenizer
}


char *TKGetNextToken(TokenizerT *tk) {
    char * stream = tk->toks;
    char * dels = tk->sep;

    /*The following two  lines intialize the char array to be printed
     as well as the integers to be used in the various loops*/

    char * temps = (char *)malloc(sizeof(char)); int g = 0;
    int z = 0, x= 0, len = 0;
    if (strlen(dels)==0) {
        return stream;
    }



    for(z = 0; z<strlen(stream); z++){
        char b = *(stream+z);

        for(x = 0; x<strlen(dels); x++){
            len = (int)strlen(temps);
            char c = *(dels+x);

            if(c==b){   //Here, the current character is a delimitor
                g = -1;
                break;
            }

        }
        if (g==-1) {    //If delimitor, then return the current token
            return temps;
        }
        *(temps+len) = b;
    }
    len = (int)strlen(temps);
    *(temps+len) = '\0';    //Returns the string with the null character ending it
    return temps;
}



void TKN(TokenizerT * tin, int sum){

    char * tmp = TKGetNextToken(tin);
    char * copy = (char *)malloc(sizeof(char));

    strcpy(copy, tin->sep);

    int difference = (int)strlen(tmp)+1;
    sum = sum-difference;
    char * best = (char *)malloc(sizeof(char));
    strcpy(best, tin->toks + difference);


    if((int)strlen(tmp)>0){
        printf("%s\n", tmp);
    }
    TKDestroy(tin);
    tin = TKCreate(copy, best);
    while(sum>0){
        tmp = TKGetNextToken(tin);
        if((int)strlen(tmp)>0){
            printf("%s\n", tmp);
        }
        difference = (int)strlen(tmp)+1;
        sum = sum-difference;
        free(best);
        best = (char *)malloc(sizeof(char));
        strcpy(best, tin->toks + difference);
        TKDestroy(tin);
        tin = TKCreate(copy, best);
    }

    free(copy);
    free(best);
    free(tmp);

    TKDestroy(tin); //Freeing up memory associated with the Tokenizer
    return;
}

int main(int argc, char **argv) {
    if(argc<2){
        printf("%s\n", "Not enough arguments");
        return 0;
    }
    else if(argc>3){
        printf("%s\n", "Too many arguments");
        return 0;
    }
    else{
        char * arr = argv[1];   //Represents delimitors
        char * y = argv[2];       //Represents string to be tokenized

        TokenizerT * jer = TKCreate(arr, y);    //Create and initialize tokenizer
                                                //printf("%s\n", jer->toks);
        TKN(jer, (int)strlen(jer->toks));
    }
    return 0;
}
manman
  • 4,743
  • 3
  • 30
  • 42
  • Didn't fix it unfortunately – AbhishekSaha Feb 04 '14 at 04:06
  • @user2624018 updated the sample code, it is working for me with some assumptions about missing codes, so if you want you can update your question with full code for further help – manman Feb 04 '14 at 04:15
  • It is working for me on Xcode 5, I just added some type casting for `malloc` retuning types since Xcode was complaining about it. – manman Feb 04 '14 at 04:50
  • @manman don't ever cast the return value of malloc. it is wrong. include and carry on with your malloced value without using casts. google why casting the return value of malloc is bad. or read [this](http://stackoverflow.com/questions/1565496/specifically-whats-dangerous-about-casting-the-result-of-malloc) – Koushik Shetty Feb 04 '14 at 05:06
  • @manman Using "rs" "tursein" as arguments, the output is: tu ein? The question mark of course is random character – AbhishekSaha Feb 04 '14 at 05:09
  • @user2624018 what kind of IDE are you using? I don't get such result in Xcode. – manman Feb 04 '14 at 05:13
  • Mac OSX terminal. I'm sorry about all the questions, this is just my third class in C so I'm clearly missing something – AbhishekSaha Feb 04 '14 at 05:19
  • @Koushik Thanks for mentioning that, I've never known about that topic. However, I have to do it since Xcode gives compiler errors if I don't cast return value. – manman Feb 04 '14 at 05:21
  • @user2624018 I compiled the code with gcc and run it with "rs" and "tu ein" and the output was two lines: 1. tu, 2. ein. So You might missing something. Just copy past the code above and paste it in a file and compile and run that one – manman Feb 04 '14 at 05:25
0
char * rondo = malloc(sizeof(char));                       
printf("%s\n", rondo);

is a UB(Undefined behaviour) condition.
This is what you are doing:

free store(heap) -> allocate memory of size char(usually 1 byte) and get the address of that location and store it(address) in rondo.
so when you dereference rondo i.e *rondo you can legally only access the location that is of the size of char accessing anything next to it or near it is illegal.

so in printf("%s\n", rondo); what you do is tell printf that what pointer you give is a pointer to string and so print till you get a \0(NULL) character. but you did not actually do that. which means printf is actually accessing memory that was not allocated. what you saw is out of pure luck(or rather unfortunate).

you can only do this

printf("%c\n", *rondo); but even before this you have to initialize for e.g

char * rondo  = malloc(sizeof(char));
*rondo = 'K';
printf("%c\n",*rondo);

but I bet you dint mean that you would have meant

char * rondo = malloc(sizeof(char)*no_of_characters_in_string+1);  

where +1 is for the NULL character.

What characters you saw is not related to your program. you accessed someone else's memory(if it was allocated to some one else or OS's property).

Edit : there is also a huge problem in you code. you are mallocing memory but are never freeing it. for small demo programs its ok(not really) but it definitely is very bad. please always associate a malloc with a free();

My advice get a good text book. it will tell you in more details about these things.

Koushik Shetty
  • 2,146
  • 4
  • 20
  • 31