0

User inserts data in a format:" [NAME], [SURNAME], [INDEX] ". Errors codes:
0 -- everything is loaded to the structure properly
1 -- not loaded to structure properly (user did not use commas or p->name went wrong)
2 -- only name loaded properly
3 -- name and surname loaded properly (index went wrong)

struct student_t
{
    char name[20];
    char surname[40];
    int index;
};

exapmples:
input: John, Deep
err_code: 2

input: John, Deep, 999
err_code: 0

input: NULL //(nothing)
err_code: 1
... so I'm unable to detect if user inserts for example: "John, Deep" (err 2) or "John, Deep, "(err 3) or "John, " (err 2),..(it results in err 1; or if everything is fine, err 0)
My attempts: //edit1: working version using this approach, further below this one.

char buffer[1024], *pch1, *pch2;
if (fgets(buffer,1024, stdin)!=NULL) 
{
    pch1=strchr(buffer, ',');
    pch2=strrchr(buffer, ',');

    if (pch1!=pch2 && pch1!=NULL) //detects if inserted are 2 different commas
    {
        char *name = strtok(buffer,","); // returns pointer to the beginning of the token
        if (name) {//the place where "," occurs, becomes a "NULL" character
            sscanf(name," %19s", p->name);  // skip leading spaces
            char *surname = strtok(NULL,",");
            if (surname) {
                sscanf(surname," %39s", p->surname); // skip leading spaces
                char *index = strtok(NULL,",");
                if (index) {
                    p->index = (int)strtol(index, NULL, 10);
                           } else {*err_code=3; return NULL;} //only NAME and SURNAME correctly, INDEX is loaded wrong
                         } else {*err_code=2; return NULL;} //only NAME loaded correctly
                  }
    } else  if (pch1==pch2 && pch1!=NULL) 
    {//but if there is 1 comma, input may be like: "John, Deep" so name'd be ok
            char *name = strtok(buffer,","); 
            if (name) { 
                sscanf(name," %19s", p->name);  
                char *surname = strtok(NULL,",");
                if (surname) {
                    sscanf(surname," %39s", p->surname); 
                    char *index = strtok(NULL,",");
                    if (index) {
                        p->index = (int)strtol(index, NULL, 10);
                               }else if (p->index==0||p->index==0||p->index==' ') {*err_code=2; return NULL;}
                             } 
                      } 

    } else {*err_code=1; return NULL;} //if there were 0 commas, err_code=1
}

if (p==NULL || p->name==NULL)
{
    *err_code=1;
    return NULL;
}
 if (p->surname && p->name==NULL)
{
    *err_code=2;
    return NULL;
}
//because the above "if" construction didn't work, I added another one here:
 if (p->index==NULL || p->index==0) //so if input was "John, Deep" then p->index should be NULL? 
{
    *err_code=3;
    return NULL;
}

//edit1: Okay, this code works for me, everything goes as enwisaged. However it is very messy, so I'll try to adopt and use answers in another versions...

char buffer[1024], *pch1, *pch2;

if (fgets(buffer,1024, stdin)!=NULL) 
{
    pch1=strchr(buffer, ',');
    pch2=strrchr(buffer, ',');

    if (pch1!=pch2 && pch1!=NULL)
    {
        char *name = strtok(buffer,","); // returns pointer to the beginning of the token
        if (name) { //the place where "," is occured becomes a "NULL" character
            sscanf(name," %19s", p->name);  // skip leading spaces
            char *surname = strtok(NULL,",");
            if (surname) {
                sscanf(surname," %39[^\t\n]", p->surname); // skip leading spaces
                char *index = strtok(NULL,",");
                if (index) {
                    p->index = (int)strtol(index, NULL, 10);
                    if (p->index==0) {*err_code=3; return NULL;}
                           } //else {*err_code=3; return NULL;} //only NAME and SURNAME correctly, INDEX is loaded wrong
                         } else {*err_code=2; return NULL;} //only NAME loaded correctly
                  }
    } else  if (pch1==pch2 && pch1!=NULL)
    {
            char *name = strtok(buffer,","); // returns pointer to the beginning of the token
            if (name) { //the place where "," is occured becomes a "NULL" character
                sscanf(name," %19s", p->name);  // skip leading spaces
                char *surname = strtok(NULL,",");
                if (surname) {
                    sscanf(surname," %39[^\t\n]", p->surname); // skip leading spaces
                    char *index = strtok(NULL,",");
                    if (index) {
                        p->index = (int)strtol(index, NULL, 10);
                               } else if (p->index==0||p->index==' ') {*err_code=2; return NULL;} 
                             } else {*err_code=1; return NULL;} 
                                          } else {*err_code=2; return NULL;}

    } else {*err_code=1; return NULL;}
}

if (p==NULL || p->name==NULL)
{
    *err_code=1;
    return NULL;
}

I have a feeling it could be done in a totally different way... I'll take all hints and answers to my heart and do my best to understand and learn them all.

//edit1: if my ugly code infuriated someone, I'd really be happy to do some gardener work and cut off some of these demonic bushes, to clean it a bit. I think some of if cases are not at all necessary for it to work...
PS. (It's a continuation of my previous problem where typed commas were assigned to structure: How to scanf commas, but with commas not assigned to a structure? C but in this topic there, I ask about doing it in a way that there'd be an information what user typed wrong)

Immo
  • 19
  • 6
  • What would you expect with the following input: *John Harry* (a space but no comma? I assume err_code is 2, but what for name? – Serge Ballesta Jul 16 '18 at 11:57
  • @SergeBallesta "_John Harry_" I think would be error 1, because there is no comma at the end, if it was: "_John Harry,_ " then error 2, the same as " _John, Harry_"(no comma at the end); in this attempt of mine above, however, sscanf would load only "_John_" for p->name, I think, because then there goes a whitespace – Immo Jul 16 '18 at 12:10
  • indeed, to be able to sscanf surename/ name consisting of multiple 'elements' made me use `sscanf(surname," %39[^\t\n]", p->surname);` – Immo Jul 16 '18 at 17:03

3 Answers3

1

I would use this pseudo-code:

isolate first comma-separated token
if no token, return 1
if length >= sizeof(s.name), return 1
copy first token to s.name
isolate second token
if no token, return 2
if length >= sizeof(s.surname), return 2
copy first token to s.surname
isolate third token
if no token, return 3
if token not numeric, return 3
set s.index = atoi( third token )
return 0

If you code that up in C, you should end up with something nice and short and clean and reliable, without too many annoyingly redundant checking and backtracking.

(Actually, if it was me, I'd use one general-purpose function to do the token isolating all at once, up front, then simply test if the number of found tokens was 0, 1, 2, 3, or more than 3. See this web page for additional ideas.)

Steve Summit
  • 45,437
  • 7
  • 70
  • 103
  • what is length? – Immo Jul 16 '18 at 14:29
  • If the input is `John,Jacob Jinkleheimer Smith,10`, the length of the first token is 4, and the length of the second token is 24. Assuming that your method of isolating tokens resulted in ordinary null-terminated strings, you could compute these lengths by calling `strlen()`. – Steve Summit Jul 16 '18 at 15:19
  • I can't help but correct your usage of the traditional children's song title. It's "John Jacob Jingleheimer Schmidt" (see https://en.wikipedia.org/wiki/John_Jacob_Jingleheimer_Schmidt). Apologies for the OT sidenote... – OregonJim Jul 17 '18 at 07:27
1

Here what you need if beyond what the scanf familly functions can do, because you want to strip blank characters at the beginning or the end of a name but still want to allow spaces inside a name. IMHO, you should use a dedicated function for that parsing. Code could be:

/* Find a string delimited with a character from delims.
 * Blanks at the beginning or the end of the string will be trimed out
 * At return, *end points one past the end of the string and
 * *next points after the delimiter (so delimiter will be next[-1])
 * Returns a pointer to the beginning of the string, or NULL if
 * no delimiter was found
 * */
const char* find(const char * start, char **end, char **next, const char *delims) {
    static const char blanks[] = " \t\r";
    start += strspn(start, blanks);     // trim blanks at the beginning
    *end = strpbrk(start, delims);      // search first delimiter
    if (end == NULL) {
        return NULL;
    }
    *next = *end + 1;                   // next find will start after the delimiter
    while(*end > start) {               // trim blanks at the end
        bool found = false;
        for (int i=0; i<sizeof(blanks); i++) {
            if ((*end)[-1] == blanks[i]) {
                --*end ;
                found = true;
                break;
            }
        }
        if (! found) break;
    }
    return start;
}
// parse a line to fill a student_t
struct student_t* getstruct(struct student_t *p, int *err_code) {
    char buffer[1024];
    *err_code = 1;       // be prepared to worst case
    *buffer = '\0';
    if (fgets(buffer,1024, stdin)!=NULL) 
    {
        char *end, *next;
        const char delims[] = ",\r\n";
        const char *name = find(buffer, &end, &next, delims) ; // returns pointer to the beginning of the token
        if (name && (next[-1] == ',')) {  // control the delimiter
            int l = end - name;
            if (l > 19) l = 19;
            memcpy(p->name, name, l);
            p->name[l] = '\0';
            *err_code = 2;                // Ok, we have a name followed with a comma
            const char *surname = find(next, &end, &next, delims);
            if (surname && (next[-1] == ',')) { // control delimiter
                int l = end - surname;
                if (l > 19) l = 19;
                memcpy(p->surname, surname, l);
                p->surname[l] = '\0';
                *err_code = 3;            // Ok, we have a name followed with a comma
                if (*end != ',') return NULL;
                const char *index = find(next, &end, &next, delims);
                if (index) {              // no need to control the delimiter: scanf will control  
                    char dummy[2];        // that there is no non blank char after the index
                    if (1 == sscanf(index, "%d%1s", &(p->index), dummy)) {
                        *err_code = 0;
                    }
                }
            }
        }
    }
    return (*err_code == 0) ? p : NULL;
}
Serge Ballesta
  • 143,923
  • 11
  • 122
  • 252
  • I tried to use your "architecture" of getstruct function (the way errors are beaitifuly handled) and to combine it with sscanf(name," %19s", p->name); sscanf(surname," %39[^\t\n]", p->surname); to "automaticaly" trim blanks but I failed. – Immo Aug 12 '18 at 16:11
  • there is one thing I don't understand. Why there is "-1" in the part: `if ((*end)[-1] == blanks[i]) {` ? Where does (*end)[-1] point to? I'd imagine *end points to right before the comma occurs? So *end[-1] would be moved one character left wards compared to Comma? (also I'm wondering, was using *next necessary? I mean, could there be instead of next[-1] just an end[0]?) – Immo Aug 12 '18 at 16:18
  • *end = strpbrk(start, delims);<--- Either the condition 'end==0' is redundant or there is possible null pointer dereference: end. -- cppcheck – Immo Aug 13 '18 at 16:55
  • if ((*end)[-1] == blanks[i]) { //<-- clang: Out of bound memory access (access exceeds upper limit of memory block) -- Logic error bug – Immo Aug 20 '18 at 20:28
0

Consider using return value from sscanf:

#include <stdio.h>
#include <assert.h>
typedef struct student_t
{
   char name[20];
   char surname[40];
   int index;
} student_t;

enum parse_error_t {
   E_OK,
   E_NOT_LOADED_PROPERLY,
   E_ONLY_NAME_LOADED,
   E_NAME_SURNAME_LOADED
};

enum parse_error_t parse_input(char *buf, student_t *out) {
    int matches_count;
    if (buf == NULL) {
        return E_NOT_LOADED_PROPERLY;
    }
    matches_count = sscanf(buf, "%20s %[^, 40]%*[, ]%d", out->name, out->surname, &out->index);
    switch(matches_count) {
        case 0:
            return E_NOT_LOADED_PROPERLY;
        case 1:
            return E_ONLY_NAME_LOADED;
        case 2:
            return E_NAME_SURNAME_LOADED;
        case 3:
            return E_OK;
        default:
            return E_NOT_LOADED_PROPERLY;
    }
}

int main() {
    char *in1 = NULL;
    char *in2 = "John";
    char *in3 = "John, Deep";
    char *in4 = "John, Deep, 999";
    student_t student;

    assert(parse_input(in1, &student) == E_NOT_LOADED_PROPERLY);
    assert(parse_input(in2, &student) == E_ONLY_NAME_LOADED);
    assert(parse_input(in3, &student) == E_NAME_SURNAME_LOADED);
    assert(parse_input(in4, &student) == E_OK);
}

String matching expression is based on this answer.

pbn
  • 2,406
  • 2
  • 26
  • 39