2

I am trying to separate the string

"CristinaRodriguezRiveraComputacion210302414RamiroSilvaPerezIndustrial217890453PatriciaDuranSanchezCivil215643525RaulColinGranadosComputacion215678342"

read from a file but when I separate and print this string, the following is not being separated correctly:

enter image description here

Required output:

Cristina Rodríguez Rivera Computación 210302414 //simulating that each string is inside a block of 15 bytes

I don't know what's wrong with the code, I've been trying to figure out if my logic is wrong for a while

#include <stdio.h>
#include <errno.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>

typedef struct{
   char name[15];
   char father[15];
   char mother[15];
   char degree[15];
   char id[15];
}Student;


Student al;

int main(){

    FILE*  ent = fopen("DatosEntrada.txt","r");
    FILE*  sal = fopen("longitud.txt","a");

    if(ent != NULL){

        char name[15];
        char father[15];
        char mother[15];
        char degree[15];
        char id[15];

        fseek(ent, 0, SEEK_END);  //getting file length
        int longarch = ftell(ent);
        rewind(ent); //go back to the start

        char dinamic[longarch];

        fscanf(ent,"%s",&dinamic);

        int longitud =  strlen(dinamic);


        int contador=0,iterador=0;
        for(int i=0;i<longarch;i++){

            if( isupper(dinamic[i]) ){

                    if( islower(dinamic[i-1])  && islower(dinamic[i+1])  ){
                        iterator=0;
                        counter++; 

                    }


                    if(counter== 0){ //name
                        iterator=0;
                        name[iterator] = dinamic[i];
                         //printf("%c",name[iterator]);
                        iterator++;

                    }else if(counter== 1){ //father

                        father[iterator] = dinamic[i];
                        //printf("%c",father[iterator] );
                        iterator++;

                    }else if(counter== 2){  //mother

                        mother[iterator] = dinamic[i];
                        //printf("%c",mother[iterator]);
                        iterator++;

                    }else if(counter== 3){  //degree

                        degree[iterator] = dinamic[i];
                        //printf("%c",degree[iterator]);
                        iterator++;

                    }

            }else if( islower(dinamic[i])  ){

                    if(counter== 0){ //name

                        name[iterator] = dinamic[i];
                        //printf("%c",name[iterator]);
                        iterator++;

                    }else if(counter== 1){  //father

                        father[iterator] = dinamic[i];
                        //printf("%c",father[iterator]);
                        iterator++;

                    }else if(counter== 2){ //mother

                        mother[iterator] = dinamic[i];
                        //printf("%c",mother[iterator]);
                        iterator++;

                    }else if(counter== 3){ //degree

                        degree[iterator] = dinamic[i];
                        //printf("%c",degree[iterator]);
                        iterator++;

                    }

            }else if( isdigit(dinamic[i])  ){

                    if( islower(dinamic[i-1]) && isdigit(dinamic[i+1]) ){
                        iterator=0;
                        counter++;

                    }else if(   isupper(dinamic[i+1]) && isdigit(dinamic[i-1]) ){

                        id[iterator] = dinamic[i];

                        //printf("%c",id[iterator]);

                        counter=0;

                        printf("(%s,%s,%s,%s,%s)\n",name,father,mother,degree,id);
                        strcpy(al.name,name);
                        strcpy(al.father,father);
                        strcpy(al.mother,mother);
                        strcpy(al.degree,degree);
                        strcpy(al.id,id);

                        fwrite(&al,sizeof(Student), 1, sal);


                    }

                    if(counter== 4){  //id

                        id[iterator] = dinamic[i];
                       // printf("%c",id[iterator]);
                        iterator++;

                    }

            }

        }


        fclose(ent);
        fclose(sal);

    }else{

       fprintf(stdout, "ERROR: %s", strerror(errno));
    }


}
SwagKiller
  • 47
  • 5
  • Are there newlines in the input file between records? Are the lines truly without spaces or commas separating the fields? Is the input truly just one line? – Craig Estey Aug 07 '22 at 18:20
  • 5
    It isn't helpful to have the indentifiers and code comments in another language when asking on an English site. – Weather Vane Aug 07 '22 at 18:21
  • Did you consider that any C string needs to end in the end-of-string character `'\0'`? You need to add it, and you need space for it. – the busybee Aug 07 '22 at 18:25
  • @WeatherVane It's worse than that. The names appear to be of the form: `NameFatherMother`, so `nombre: Christina`, `Paterno: Rodriguez`, `Materno: Rivera` – Craig Estey Aug 07 '22 at 18:26
  • @CraigEstey the input is on a single line there are no spaces or commas separating these fields – SwagKiller Aug 07 '22 at 18:27
  • @CraigEstey I removed the first comment when I noticed that and added the 'language' one. – Weather Vane Aug 07 '22 at 18:28
  • @Veleta I'm sorry, on the Spanish page they usually take too long to answer, it would be of great help to you by changing the name of variables, I'll do it in a moment – SwagKiller Aug 07 '22 at 18:31
  • Have you tried running your code line-by-line in a debugger while monitoring the values of all variables and the character codes of all characters of all strings, in order to determine in which line your program stops behaving as intended? If you did not try this, then you may want to read this: [What is a debugger and how can it help me diagnose problems?](https://stackoverflow.com/q/25385173/12149471) You may also want to read this: [How to debug small programs?](https://ericlippert.com/2014/03/05/how-to-debug-small-programs/) – Andreas Wenzel Aug 07 '22 at 18:33
  • Does this answer your question? [What are null-terminated strings?](https://stackoverflow.com/questions/72436704/what-are-null-terminated-strings) – n. m. could be an AI Aug 07 '22 at 19:34

4 Answers4

1

I think the key problem here is that you aren't terminating the fields with NUL (\0) after copying characters into them.

Consider your first field, nombre. You write data to it on two different lines. But you never write \0 to it anywhere. The data you copy into the field is Cristina, which is 8 characters, so the remaining 7 characters of the field just contain whatever happened to be in memory. When you call printf you get Cristina, but there's no \0, so printf just keeps going and prints whatever, until it finds a \0 that's there by coincidence.

So when you detect the end of a field, you need to add a \0 before moving on to the next field.

There are other issues, though. One is here:

        for(int i=0;i<longarch;i++){

            if( isupper(dinamico[i]) ){

                    if( islower(dinamico[i-1])  && islower(dinamico[i+1])  ){
                        iterador=0;
                        contador++; 

                    }

Consider what happens the first time through the loop, when i == 0. The isupper call will return true, because the first character of your data is uppercase. It will then execute the next line, which does islower(dianamico[i-1]) to see if the previous character was lowercase. But i == 0 here, so this is accessing the byte before the start of your buffer. You need to avoid this check when i == 0.

You also repeat the code to copy a character into each line twice. You only need to do it once. Your code should look something like:

while (there is more data) {
  if (it's the start of a new field) {
    add \0 to the end of the current field;
    iterador = 0;
    contador++;
  }
  copy character into current field;
  iterator++;
}
add \0 to the end of the last field;
Andreas Wenzel
  • 22,760
  • 4
  • 24
  • 39
Willis Blackburn
  • 8,068
  • 19
  • 36
  • the last thing he recommends to do is do it after storing the contents of the file in my char dynamic[longarch]; certain? – SwagKiller Aug 07 '22 at 18:55
1

I tried out your program and got the extraneous data in the printout. However, when the five work fields were initialized to zeros, ensuring that valid character terminators were in place, the data appeared clean. Following, is the additional code added to ensure the strings contained the data you want.

        if(ent != NULL)
    {

        char nombre[15];
        char Paterno[15];
        char Materno[15];
        char carrera[15];
        char matricula[15];

        for (int x=0; x < 15; x++) /* Work field initialization */
            {
            nombre[x] = '\0';
            Paterno[x] = '\0';
            Materno[x] = '\0';
            carrera[x] = '\0';
            matricula[x] = '\0';
        }

Give that a try.

NoDakker
  • 3,390
  • 1
  • 10
  • 11
  • It helped me but I have to check my code since it is not separating some fields correctly :) – SwagKiller Aug 07 '22 at 18:50
  • I tried this also but it will erroneously put "Cristina" into the second field if the `islower(dinamico[i-1])` check when `i == 0` happens to find a lowercase letter in whatever sitting before `dinamico` in memory; see my answer below. – Willis Blackburn Aug 07 '22 at 18:58
0

I'm glad your code now seems to work.

However, if it were me, I'd make the code a bit more modular.

I'd also use a state machine/variable to simplify the code.

Also, it makes sense to actually read in the data and save it to an array of Alumno structs.

Here is the refactored code. It is annotated:

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <stdbool.h>
#include <ctype.h>
#include <string.h>

#ifdef DEBUG
#define dbgprt(_fmt...)     printf(_fmt)
#else
#define dbgprt(_fmt...)     do { } while (0)
#endif

#define ALLMODE(_cmd) \
    _cmd(NAME) \
    _cmd(FATHER) \
    _cmd(MOTHER) \
    _cmd(CAREER) \
    _cmd(DATE)

#define ENUM(_sym)      MODE_##_sym,
enum {
    ALLMODE(ENUM)
    MODE_MAX
};

#define TAG(_sym)   [MODE_##_sym] = #_sym,
char *tags[MODE_MAX] = {
    ALLMODE(TAG)
};

typedef struct {
    char nombre[15];
    char apPat[15];
    char apMat[15];
    char carrera[15];
    char matricula[15];
} Alumno;

Alumno al;

int icur;                           // current index into dinamico
char *dinamico;                     // buffer for entire file contents

// getstring -- fill in a struct field
void
getstring(char *field)
{
    char *out = field;
    int chr;

    // get and save first char
    chr = dinamico[icur++];
    dbgprt("getstring: FIRST chr='%c'\n",chr);
    *out++ = chr;

    // do we want:
    //   0 -- Cristina
    //   1 -- 210302414
    int dateflg = isdigit(chr);

    while (1) {
        // look at next character
        chr = dinamico[icur];
        if (chr == 0)
            break;
        dbgprt("getstring: PEEK chr='%c'\n",chr);

        int isdig = isdigit(chr);

        // we want a date -- stop if char is _not_ a digit
        if (dateflg) {
            if (! isdig)
                break;
        }

        // we want a name -- stop if char is upper (start of new name) or
        // is a digit (start of a date)
        else {
            if (isupper(chr))
                break;
            if (isdig)
                break;
        }

        *out++ = chr;
        ++icur;
    }

    *out = 0;

    dbgprt("getstring: field='%s'\n",field);
}

int
main(void)
{

    FILE *ent = fopen("DatosEntrada.txt", "r");
    FILE *sal = fopen("longitud.txt", "w");

    if (ent == NULL) {
        fprintf(stdout, "ERROR: %s", strerror(errno));
        return 1;
    }

    // get size of file
    fseek(ent, 0, SEEK_END);
    int longarch = ftell(ent);
    rewind(ent);                    // go back to the start

    // get buffer for entire file
    dinamico = malloc(longarch + 1);
    fscanf(ent, "%s", dinamico);
    longarch = strlen(dinamico);

    int state = MODE_NAME;

    // empty list
    Alumno *list = NULL;
    int count = 0;

    // current record
    Alumno *rec = NULL;

    for (icur = 0; icur < longarch;) {
        if (dinamico[icur] == '\n')
            break;

        dbgprt("main: STATE state=%d (%s)\n",state,tags[state]);

        switch (state) {
        case MODE_NAME:
            // increase size of list
            list = realloc(list,sizeof(*list) * (count + 1));
            rec = &list[count];
            ++count;

            getstring(rec->nombre);
            state = MODE_FATHER;
            break;

        case MODE_FATHER:
            getstring(rec->apPat);
            state = MODE_MOTHER;
            break;

        case MODE_MOTHER:
            getstring(rec->apMat);
            state = MODE_CAREER;
            break;

        case MODE_CAREER:
            getstring(rec->carrera);
            state = MODE_DATE;
            break;

        case MODE_DATE:
            getstring(rec->matricula);
            state = MODE_NAME;
            printf("%s %s %s %s %s\n",
                rec->nombre,rec->apPat,rec->apMat,rec->carrera,rec->matricula);
            fwrite(rec, sizeof(Alumno), 1, sal);
            break;
        }
    }

    fclose(ent);
    fclose(sal);

    free(dinamico);
    free(list);

    return 0;
}
Craig Estey
  • 30,627
  • 4
  • 24
  • 48
0

There are many ways to achieve your objective. I offer this as a working example that outputs both the version in parenthesis and shows how a 'record' (15 chars wide * 5) can be accumulated (for display or storage)...

My hope is that this will provide material to learn more about another way to reach your goal.

void showFlds( char flds[][15], int n ) {
    printf( "****\n" );
    for( int i = 0; i < n; i++ )
        printf( "'%-15s'\n", flds[i] );
    printf( "****\n" );
}

int main() {
    char *in =
        "CristinaRodriguezRiveraComputacion210302414"
        "RamiroSilvaPerezIndustrial217890453"
        "PatriciaDuranSanchezCivil215643525"
        "RaulColinGranadosComputacion215678342";

    const int nFlds = 5;
    int fldCnt = 0;
    char fldCopys[5][15];
    int cCnt = 0;

    putchar( '(' );
    bool firstDig = true;
    for( char *cp = in; *cp; cp++ ) {
        if( cp > in && ( isupper( *cp ) || ( isdigit( *cp ) && firstDig ) ) ) {
            fldCopys[ fldCnt++ ][ cCnt ] = '\0';
            if( fldCnt < nFlds ) {
                putchar( ' ' );
            } else {
                printf( ")\n" );
                showFlds( fldCopys, fldCnt );
                putchar( '(' );
                fldCnt = 0;
            }
            cCnt = 0;
        }
        putchar( *cp );
        fldCopys[ fldCnt ][ cCnt++ ] = *cp;
        firstDig = !isdigit( *cp );
    }
    printf( ")\n" );
    fldCopys[ fldCnt++ ][ cCnt ] = '\0';
    showFlds( fldCopys, fldCnt );
    return 0;
}

Output:

(Cristina Rodriguez Rivera Computacion 210302414)
****
'Cristina       '
'Rodriguez      '
'Rivera         '
'Computacion    '
'210302414      '
****
(Ramiro Silva Perez Industrial 217890453)
****
'Ramiro         '
'Silva          '
'Perez          '
'Industrial     '
'217890453      '
****
(Patricia Duran Sanchez Civil 215643525)
****
'Patricia       '
'Duran          '
'Sanchez        '
'Civil          '
'215643525      '
****
(Raul Colin Granados Computacion 215678342)
****
'Raul           '
'Colin          '
'Granados       '
'Computacion    '
'215678342      '
****
Fe2O3
  • 6,077
  • 2
  • 4
  • 20