0

I have a file with names, surnames, ids, and e-mails which is in a random order. I have to organize these datas, write to structures and to an output file as organized. There may be more than one name and surname. Here is an example of disordered.txt:

abc@gmail.com Andee Kenny SMITH 1234
ADAM ADAM abc@gmail.com Andeee 21654
Anderea abc@gmail.com SAMMY 3524654
abc@gmail.com Andi BROWN 1245
Andie abc@gmail.com KNOWY 2485
Andra abc@gmail.com BRUCE 52445
Andrea abc@gmail.com 246574 DENNIS
2154 Andreana abc@gmail.com CHASE
Andree 21524 SIERRRA abc@gmail.com
Andrei 154 MONDY abc@gmail.com
4564765 Andria LE BARC abc@gmail.com
78 Andriana abc@gmail.com WALLS

My code works fine with this 12 people but if I copy-paste it a lot or add new people, after 33 people, it prints invalid characters in front of the names and surnames in repeating manner.

Here is the screenshot of: organized.txt

I preferred to use char pointers in my structure.

Here is my code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define TRUE 1
#define WORD_SIZE 30
#define NAME 1
#define SURNAME 2
#define EMAIL 3
#define ID 4

typedef struct ppl {
    char* name;
    char* surname;
    char* eMail;
    int id;
} PEOPLE;

int whichDataType (char* buffer);
void writeData (PEOPLE* person, char* buffer, int whichData, int* nameTimes, int* surnameTimes, int personNumber);  
void printData (PEOPLE* person, FILE* sptr, int personNumber);

int main (void) {
    FILE* fptr = NULL;

    fptr = fopen("disorganized.txt", "r");
    if (fptr == NULL) {
        printf ("Disorganized file couldn't open\n");
        printf ("Exiting the program\n");
        exit(TRUE);
    }

    FILE* sptr = NULL;

    sptr = fopen("organized.txt", "w");
    if (sptr == NULL) {
        printf ("Organized file couldn't open\n");
        printf ("Exiting the program\n");
        exit(TRUE);
    }

    int whichData;
    int personNumber = 0;
    int* nameTimes;
    int* surnameTimes;
    int forOnce = 0;
    char* buffer;
    int* buffer2;
    PEOPLE* person;

    person = (PEOPLE*) malloc (sizeof(PEOPLE));     
    buffer = (char*) malloc ( WORD_SIZE * sizeof(char));
    nameTimes = (int*) malloc ( sizeof(int));
    surnameTimes = (int*) malloc (sizeof(int));

    *nameTimes = 0;
    *surnameTimes = 0;

    //gets word 'till EOF
    while ((fscanf(fptr, "%s", buffer)) == 1) {
        if (personNumber != 0) {
            //creates new structure
            person = (PEOPLE*) realloc (person, personNumber * sizeof(PEOPLE));
        }
        //looks what type of data
        whichData = whichDataType(buffer);
        //allocates inside of structures and writes
        writeData(person, buffer, whichData, nameTimes, surnameTimes, personNumber);

        buffer2 = (int*) malloc (sizeof(int));
        *buffer2 = fgetc(fptr); //checks what's coming next

        if (*buffer2 == '\n') {
            if (forOnce == 0) {
                //to open a place for next person in my structure pointer, since personNumber = 0; increasing it with 1 and reallocating it with 1*sizeof(PEOPLE) would be the allocating memory for person 1 twice.
                personNumber = personNumber + 2;
                free(buffer2);
                free(buffer);
                buffer = (char*) malloc ( WORD_SIZE * sizeof(char));
                *nameTimes = 0;
                *surnameTimes = 0;
                ++forOnce;
            }
            else {
                ++personNumber;
                free(buffer2);
                free(buffer);
                buffer = (char*) malloc ( WORD_SIZE * sizeof(char));
                *nameTimes = 0;
                *surnameTimes = 0;
            }
        }
        else if (*buffer2 == ' ' || *buffer2 == '\t') {
            free(buffer2);
            free(buffer);
            buffer = (char*) malloc ( WORD_SIZE * sizeof(char));
        }
    }

    --personNumber; //my algorithm increases it 1 more time which is redundant

    printData (person, sptr, personNumber);
    int i;

    for (i = 0; i<personNumber; ++i) {
        free((person+i)->name);
        free((person+i)->surname);
        free((person+i)->eMail);
    }

    free(person);
    free(buffer);
    free(buffer2);
    free(nameTimes);
    free(surnameTimes);

    fclose(fptr);
    fclose(sptr);

    return 0;
}

int whichDataType (char* buffer) {
    if (buffer[0] >= 'A' && buffer[0] <= 'Z') {
        if (buffer[1] >= 'a' && buffer[1] <= 'z') {
            return NAME;
        }
        else if (buffer[1] >= 'A' && buffer[1] <= 'Z') {
            return SURNAME;
        }
    }
    else if (buffer[0] >= 'a' && buffer[0] <= 'z') {
        return EMAIL;
    }
    else if (buffer[0] >= '0' && buffer[0] <= '9') {
        return ID;
    }
} 

void writeData (PEOPLE* person, char* buffer, int whichData, int* nameTimes, int* surnameTimes, int personNumber) {
    if (personNumber != 0) {
        --personNumber;
    }

    switch (whichData) {
    case NAME:
        if (*nameTimes == 0) {
            (person + personNumber)->name = (char*) malloc ( WORD_SIZE * sizeof(char));
            ++(*nameTimes);
        }
        break;

    case SURNAME:
        if (*surnameTimes == 0) {
            (person+personNumber)->surname = (char*) malloc ( WORD_SIZE * sizeof(char));
            ++(*surnameTimes);
        }
        break;

    case EMAIL:
        (person + personNumber)->eMail = (char*) malloc ( WORD_SIZE * sizeof(char));
        break;
    }

    char space[2];
    strcpy(space, " ");

    switch (whichData) {
    case NAME:
        if (*nameTimes == 0) {
            strcpy( (person+personNumber)->name, buffer);
        }
        else {
            strcat ( (person+personNumber)->name, space);
            strcat( (person+personNumber)->name, buffer);
        }
        break;

    case SURNAME:
        if (*surnameTimes == 0) {
            strcpy ( (person+personNumber)->surname, buffer);
        }
        else {
            strcat( (person + personNumber)->surname, space);
            strcat( (person + personNumber)->surname, buffer);
        }
        break;

    case EMAIL:
        strcpy( (person + personNumber)->eMail, buffer);
        break;

    case ID:
        (person+personNumber)->id = atoi(buffer);
        break;
    }

}

void printData (PEOPLE* person, FILE* sptr, int personNumber) {
    fprintf(sptr, "-------------------------------------------------------------------------------------------------------------------------------------------------------------------");
    fprintf(sptr, "\n|%30s\t\t", "***NAME***");
    fprintf(sptr, "|%30s\t\t", "***SURNAME***");
    fprintf(sptr, "|%30s\t\t", "***E-MAIL***");
    fprintf(sptr, "|%30s\t|\n", "***ID NUMBER***");
    fprintf(sptr, "-------------------------------------------------------------------------------------------------------------------------------------------------------------------");

    int i;

    for (i = 0; i<personNumber; ++i) {
        fprintf(sptr, "\n|%d%30s\t\t", i, (person+i)->name);
        fprintf(sptr, "|%30s\t\t", (person+i)->surname);
        fprintf(sptr, "|%30s\t\t", (person+i)->eMail);
        fprintf(sptr, "|%30d\t|\n", (person+i)->id);
        fprintf(sptr, "-------------------------------------------------------------------------------------------------------------------------------------------------------------------");
    }
}

I try to malloc every new struct and inside of it, free my buffers after I finish my work with them and allocate them again for next ones. If there is more than one name or surname for one person, I allocate its name or surname once and and strcpy my buffer to appropriate place. If I go to my writeData function for name or surname again for same person, I pass the allocated memory because I already did that. Then I'm basically concatenating my new buffer (name or surname) next to old one.

My question is, why am I getting these invalid characters, where did I make a mistake and how can I prevent it?

Cœur
  • 37,241
  • 25
  • 195
  • 267
  • 2
    Welcome to Stack Overflow! [don't cast malloc](https://stackoverflow.com/questions/605845/do-i-cast-the-result-of-malloc) – Barmar Dec 25 '18 at 21:31
  • Why are you using dynamic allocation for `nameTimes` and `surnameTimes`? These should just be ordinary local variables. – Barmar Dec 25 '18 at 21:34
  • When you're reading the 2nd person, `personnumber` is only `1`, so you don't `realloc()` a large enough size for 1 people. – Barmar Dec 25 '18 at 21:38
  • Since `writeData` subtracts 1 from `personnumber` except when it's `0`, you overwrite the first person with the 2nd person. That solves the problem I mentioned in my previous comment, but means you lose the first person. Try to avoid treating `0` as a special case, and simplify the code. – Barmar Dec 25 '18 at 21:44
  • Thanks for your comments @Barmar. I need to use them in another function and if i create them as pointers, i need to allocate a space for them to write something in it but now i realise that it is meaningless to use them as pointers since i can send adresses of them. Played too much with pointesr :p For your third comment, to avoid that, `forOnce` i increase `personNumber` twice so it allocates enough for second person too. – repelliuss Dec 25 '18 at 22:28
  • For your fourth comment, i use `personNumber` to reach another structure like `(person+personNumber)` . When it comes to `writeData` for first person, `personNumber` is 0 so it is at first structure. At second time program goes to `writeData` function, and since `personNumber` increased twice after first person, `(person+personNumber)` would point to 3. person but i need the struct of second person so i decrease it 1 if it is not 0. I do that cause i use `personNumber` while reallocating too. @Barmar – repelliuss Dec 25 '18 at 22:43
  • 1
    In the second `switch` series, in the `else` parts, shouldn’t the first call be `strcpy` and only the *second* `strcat`? It says [here](https://www.tutorialspoint.com/c_standard_library/c_function_strcat.htm) that `strcat` needs a string to be there already. – Jim Danner Dec 26 '18 at 00:15
  • OT: regarding `if (fptr == NULL) { printf ("Disorganized file couldn't open\n"); printf ("Exiting the program\n"); exit(TRUE);` Error messages should be output to `stderr`, not `stdout` and when the error indication is from a C library function, should also output the text reason the system thinks the error occurred. To do so, call `perror( "my error message" );` – user3629249 Dec 26 '18 at 02:21
  • regarding the function: `whichDataType()` when all the `if()` statements fail, then there is a path that does not set nor return a valid value. This causes the compiler to output a message about "warning: control reaches end of non-void function." that needs to be corrected – user3629249 Dec 26 '18 at 02:25
  • when exiting a program, due to an error, rather than using: `exit( TRUE )` strongly suggest using: `exit( EXIT_FAILURE );` also, rather than defining `TRUE`, suggest adding the statement: `#include ` (which defines `true`, `false`, etc – user3629249 Dec 26 '18 at 02:27
  • OT: regarding: `buffer = (char*) malloc ( WORD_SIZE * sizeof(char)); (and all calls to `malloc()` ) 1) the expression: `sizeof(char)` is defined in the C standard as 1. Multiplying anything by 1 has no effect. Suggest removing that expression. 2) the returned type is `void*` which can be assigned to any pointer. Casting just clutters the code, making it more difficult to understand, debug, etc. 3) Always check (!=NULL) the returned value to assure the operation was successful. If not successful, then call `perror( "malloc failed" );` – user3629249 Dec 26 '18 at 02:43
  • regarding: `while ((fscanf(fptr, "%s", buffer)) == 1) {` when using the input format specifier: '%s' and/or '%[...]', always include a MAX CHARACTERS modifier that is 1 less than the length of the input buffer because those input specifiers always append a NUL byte. The result is the buffer will never be overflowed so never get the resulting undefined behavior – user3629249 Dec 26 '18 at 02:47
  • when calling `realloc()`, always assign the returned value to a temp variable, then check (!=NULL) and only if not NULL then assign to the target variable. Otherwise, when `realloc()` fails, the pointer to the original allocated heap memory will be lost, resulting in a (unrecoverable) memory leak. – user3629249 Dec 26 '18 at 02:50
  • Hi @JimDanner, first one is `strcat` too, cause `else` part works if there is already a name or surname, so there is no problem with using `strcat`, i suppose. If i used `strcpy` there, i would lost the first name/surname. – repelliuss Dec 27 '18 at 17:07
  • Hi @user3629249 . Thanks for your all suggestions. Now i handle my`whichDataType()`, `exit()` , `malloc()` and `realloc()` functions better. I put a max character modifier too and it became `while ((fscanf(fptr, "%29s", buffer)) == 1)` . After all, my problem still exists. I'm still getting invalid characters after 33. person. Also isn't better for readers to let `sizeof(char)` there? – repelliuss Dec 27 '18 at 18:05
  • I solved the code and updated the topic. Thanks everyone. – repelliuss Dec 27 '18 at 19:28
  • `I saw the problem and solved it` - the proper way is to post an answer and accept it ; ) – KamilCuk Dec 27 '18 at 19:32
  • lol, sorry about that :^) – repelliuss Dec 27 '18 at 22:03

1 Answers1

0

I saw the problem and solved it. There was an algorithm problem. When program goes to allocate memory for name, to not allocate memory again for person's name string when it comes to the second name, to don't lose first one, i was increasing nameTimes once. Then at second switch, since it is first name, it should have enter the if (*nameTimes == 0) part to strcpy. But since i increase it already while allocating, it was never entering to that part and it was always using strcat to copy string. But there was no string to concatenate so that was causing problems. I changed the condition of that part to if (*nameTimes == 1). Then second problem appeared which is it was just printing the last name only cause i wasn't increasing it again so it was stuck at strcpy part. So i increased it again after strcpy . Same goes for surname. I also improved the code thanks to @Barmar and @user3629249.

|New Code|

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define WORD_SIZE 30
#define NAME 1
#define SURNAME 2
#define EMAIL 3
#define ID 4


typedef struct ppl {

    char* name;
    char* surname;
    char* eMail;
    int id;

} PEOPLE;

int whichDataType (char* buffer);
void writeData (PEOPLE* person, char* buffer, int whichData, int* nameTimes, int* surnameTimes, int personNumber);  
void printData (PEOPLE* person, FILE* sptr, int personNumber);

int main (void) {

    FILE* fptr = NULL;

    fptr = fopen("disorganized.txt", "r");
    if (fptr == NULL) {

        perror("Error: ");
        exit( EXIT_FAILURE );
    }

    FILE* sptr = NULL;

    sptr = fopen("organized.txt", "w");
    if (sptr == NULL) {

        perror("Error: ");
        exit( EXIT_FAILURE );
    }



    int whichData;
    int personNumber = 0;
    int nameTimes = 0;
    int surnameTimes = 0;
    int forOnce = 0;
    char* buffer = NULL;
    int* buffer2 = NULL;
    PEOPLE* person = NULL;
    PEOPLE* realloctemp = NULL;



    person = malloc (sizeof(PEOPLE));
    if ( person == NULL ) {

        perror("Error, malloc failed for person. ");
        exit ( EXIT_FAILURE );
    }

    buffer = malloc ( WORD_SIZE * sizeof(char));
    if ( buffer == NULL ) {

        perror("Error, malloc failed for buffer. ");
        exit ( EXIT_FAILURE );
    }


    while ((fscanf(fptr, "%29s", buffer)) == 1) {

        if (personNumber != 0) {

            realloctemp = realloc (person, personNumber * sizeof(PEOPLE));
            if ( realloctemp == NULL ) {

                perror("Error, reallocating. ");
                exit ( EXIT_FAILURE );
            }

            else {

                person = realloctemp;
            }

        }

        whichData = whichDataType(buffer);

        writeData(person, buffer, whichData, &nameTimes, &surnameTimes, personNumber);

        buffer2 = malloc (sizeof(int));
        if ( buffer2 == NULL ) {

            perror("Error, malloc failed for buffer2. ");
            exit ( EXIT_FAILURE );
        }

        else {

            *buffer2 = fgetc(fptr);
        }

        if (*buffer2 == '\n') {

            if (forOnce == 0) {

                personNumber = personNumber + 2;
                free(buffer2);
                free(buffer);

                buffer = malloc ( WORD_SIZE * sizeof(char));
                if ( buffer == NULL ) {

                    perror("Error*, malloc failed for buffer. ");
                    exit ( EXIT_FAILURE );
                }

                nameTimes = 0;
                surnameTimes = 0;

                ++forOnce;

            }

            else {

                ++personNumber;
                free(buffer2);
                free(buffer);

                buffer = malloc ( WORD_SIZE * sizeof(char));
                if ( buffer == NULL ) {

                    perror("Error**, malloc failed for buffer. ");
                    exit ( EXIT_FAILURE );
                }

                nameTimes = 0;
                surnameTimes = 0;

            }

        }

        else if (*buffer2 == ' ' || *buffer2 == '\t') {

            free(buffer2);
            free(buffer);
            buffer = malloc ( WORD_SIZE * sizeof(char));
            if ( buffer == NULL ) {

                perror("Error***, malloc failed for buffer. ");
                exit ( EXIT_FAILURE );
            }

        }
    }

    --personNumber;

    printData (person, sptr, personNumber);

    int i;

    for (i = 0; i<personNumber; ++i) {

        free((person+i)->name);
        free((person+i)->surname);
        free((person+i)->eMail);

    }

    free(person);
    free(buffer);
    free(buffer2);

    fclose(fptr);
    fclose(sptr);

    return EXIT_SUCCESS;
}

int whichDataType (char* buffer) {

    if (buffer[0] >= 'A' && buffer[0] <= 'Z') {

        if (buffer[1] >= 'a' && buffer[1] <= 'z') {

            return NAME;
        }

        else if (buffer[1] >= 'A' && buffer[1] <= 'Z') {

            return SURNAME;
        }
    }

    else if (buffer[0] >= 'a' && buffer[0] <= 'z') {

        return EMAIL;
    }

    else if (buffer[0] >= '0' && buffer[0] <= '9') {

        return ID;
    }

    else {

        perror("Invalid data type. ");
        exit( EXIT_FAILURE );

    }

} 

void writeData (PEOPLE* person, char* buffer, int whichData, int* nameTimes, int* surnameTimes, int personNumber) {

    if (personNumber != 0) {

        --personNumber;
    }

    switch (whichData) {

        case NAME:

            if (*nameTimes == 0) {

                (person + personNumber)->name = malloc ( WORD_SIZE * sizeof(char));
                if ( (person+personNumber)->name == NULL ) {

                    perror("Error. malloc failed for (person+personNumber)->name");
                    exit ( EXIT_FAILURE );
                }

                ++(*nameTimes);
            }

            break;

        case SURNAME:

            if (*surnameTimes == 0) {

                (person+personNumber)->surname = malloc ( WORD_SIZE * sizeof(char));
                if ( (person+personNumber)->surname == NULL ) {

                    perror("Error. malloc failed for (person+personNumber)->surname");
                    exit ( EXIT_FAILURE );
                }

                ++(*surnameTimes);
            }

            break;

        case EMAIL:

            (person + personNumber)->eMail = malloc ( WORD_SIZE * sizeof(char));
            if ( (person+personNumber)->eMail == NULL ) {

                perror("Error. malloc failed for (person+personNumber)->eMail");
                exit ( EXIT_FAILURE );
            }

            break;
    }


    char space[2];
    strcpy(space, " ");


    switch (whichData) {

        case NAME:

            if (*nameTimes == 1) {

                strcpy( (person+personNumber)->name, buffer);
                ++(*nameTimes);
            }

            else if (*nameTimes>1) {

                strcat ( (person+personNumber)->name, space);
                strcat( (person+personNumber)->name, buffer);
            }

            else {

                perror("Error, invalid nameTimes value. ");
                exit ( EXIT_FAILURE );
            }

            break;

        case SURNAME:

            if (*surnameTimes == 1) {

                strcpy ( (person+personNumber)->surname, buffer);
                ++(*surnameTimes);
            }

            else if (*surnameTimes>1) {


                strcat( (person + personNumber)->surname, space);
                strcat( (person + personNumber)->surname, buffer);

            }

            else {

                perror("Error, invalid surnameTimes value. ");
                exit ( EXIT_FAILURE );
            }

            break;

        case EMAIL:

            strcpy( (person + personNumber)->eMail, buffer);

            break;

        case ID:

            (person+personNumber)->id = atoi(buffer);

            break;

    }


}

void printData (PEOPLE* person, FILE* sptr, int personNumber) {

    fprintf(sptr, "-------------------------------------------------------------------------------------------------------------------------------------------------------------------");
    fprintf(sptr, "\n|%30s\t\t", "***NAME***");
    fprintf(sptr, "|%30s\t\t", "***SURNAME***");
    fprintf(sptr, "|%30s\t\t", "***E-MAIL***");
    fprintf(sptr, "|%30s\t|\n", "***ID NUMBER***");
    fprintf(sptr, "-------------------------------------------------------------------------------------------------------------------------------------------------------------------");

    int i;

    for (i = 0; i<personNumber; ++i) {

        fprintf(sptr, "\n|%d%30s\t\t", i+1, (person+i)->name);
        fprintf(sptr, "|%30s\t\t", (person+i)->surname);
        fprintf(sptr, "|%30s\t\t", (person+i)->eMail);
        fprintf(sptr, "|%30d\t|\n", (person+i)->id);
        fprintf(sptr, "-------------------------------------------------------------------------------------------------------------------------------------------------------------------");
    }

}