0

I have a txt file where I just want to get lines bigger than 12 characters. Than these lines are inserted in a variable called graph of type graph_node.

txt file:

1,"execCode(workStation,root)","OR",0
2,"RULE 4 (Trojan horse installation)","AND",0
3,"accessFile(workStation,write,'/usr/local/share')","OR",0
4,"RULE 16 (NFS semantics)","AND",0
5,"accessFile(fileServer,write,'/export')","OR",0
6,"RULE 10 (execCode implies file access)","AND",0
7,"canAccessFile(fileServer,root,write,'/export')","LEAF",1
6,7,-1

graph node type:

#ifndef Graph_Structure
#define Graph_Structure

struct Graph_Node{

    char id[50];
    char node_name[50];

    struct Graph_Node* next;
    struct Graph_Node* edges;

    };  

typedef struct Graph_Node graph_node;


#endif   

this is the method to insert data into graph variable:

void insert_node(graph_node** node, graph_node* data){

    printf("\nINSERTING\n");

    graph_node* temp = (graph_node*)malloc(sizeof(graph_node));

    for(int i = 0; i < strlen(data->id); i++)
        temp->id[i] = data->id[i];

    for(int i = 0; i < strlen(data->node_name) - 1; i++)
        temp->node_name[i] = data->node_name[i];

    temp -> next = *node;
    *node = temp;

    }

this is the method to get the lines in the txt file bigger than 12 characters:

void generate_nodes(graph_node** graph, char* file_name){

    graph_node* data = (graph_node*)malloc(sizeof(graph_node));

    FILE* f_Data_Pointer = fopen(file_name, "r");
    FILE* f_Aux_Pointer = fopen(file_name, "r");

    char c = 0; char line[256];

        int counter = 0; 
        int q = 0; //quotation marks

        bool jump_line = false;

    while(!feof(f_Data_Pointer)){

        c = 0; memset(line, 0, sizeof(line));

        while(c != '\n' && !feof(f_Aux_Pointer)){ // check line

            c = fgetc(f_Aux_Pointer); 
            line[counter] = c;
            counter++;

        }

        if(strlen(line) > 12){ //lines with no edges  

         /*line[counter-3] != '-' && line[counter-2] != '1'*/

         int size = strlen(line); printf("\nline size: %d\n", size);

            counter = 0; c = 0;

            while(c != ','){ //id

                c = fgetc(f_Data_Pointer);

                if(c != ','){

                    data->id[counter] = c;          
                    counter++;
                }

                printf("%c", c);

            }


            counter = 0; c = 0;

            while(1){ //node_name

                c = fgetc(f_Data_Pointer);

                if(c != '"'){

                data->node_name[counter] = c;
                counter++;

                } 

                else{

                    q++;                
                }

                if(q > 1 && c == ',')
                    break;

                printf("%c", c);

            }

            counter = 0; c = 0; 

            while(c != '\n'){ 

                c = fgetc(f_Data_Pointer);
                printf("%c", c);

            }

            insert_node(graph, data);
            memset(data->id, 0, sizeof(data->id));
            memset(data->node_name, 0, sizeof(data->node_name));

        }

        else{ //lines with edges

            while(c != '\n' && !feof(f_Data_Pointer)){ 

                c = fgetc(f_Data_Pointer);

            }

        }

    }

    fclose(f_Data_Pointer);
    fclose(f_Aux_Pointer);

}

I am getting the errors inside the insert method on the commands "for" in "strlen" it says that data->id and data->node_name are not initialized but I don't understand why. I used malloc on data:

graph_node* data = (graph_node*)malloc(sizeof(graph_node));

error:

Conditional jump or move depends on uninitialised value(s) ==3612== at 0x4C30B18: strcpy (vg_replace_strmem.c:510) ==3612== by 0x4008B2: insert_node (mulval.c:44) ==3612== by 0x400C03: generate_nodes (mulval.c:159) ==3612== by 0x400CE8: main (mulval.c:187)
user8955046
  • 27
  • 1
  • 9
  • Please post the [Minimal, Complete, and Verifiable example](http://stackoverflow.com/help/mcve) that shows the problem. It would be more readable without the double line spacing, but with sensible whitespace. – Weather Vane Feb 13 '18 at 21:06
  • In `insert_node` you are copying all characters expect the 0-terminating one. Use `strcpy`: `strcpy(tmp->id, data->id);` – Pablo Feb 13 '18 at 21:07
  • have you heard about `strcpy`/`strncpy`? :) – Aif Feb 13 '18 at 21:07
  • 1
    also [why while(!foef(...)) is alway wrong](https://stackoverflow.com/questions/5431941/why-is-while-feof-file-always-wrong) – Pablo Feb 13 '18 at 21:08
  • Always test the return value from `fopen`. Also `char c;` ==> `int c;` always read the man pages. – Weather Vane Feb 13 '18 at 21:10
  • that is what I get guys running with valgrind: Conditional jump or move depends on uninitialised value(s) ==3612== at 0x4C30B18: strcpy (vg_replace_strmem.c:510) ==3612== by 0x4008B2: insert_node (mulval.c:44) ==3612== by 0x400C03: generate_nodes (mulval.c:159) ==3612== by 0x400CE8: main (mulval.c:187) – user8955046 Feb 13 '18 at 21:30
  • I ran valgrind with option "--track-origins=yes" and it says "Uninitialised value was created by a heap allocation" in the line I use malloc when I declare data – user8955046 Feb 13 '18 at 21:41
  • So, look out for uninitialised values. Note that local variables must be explicity initialised. – Weather Vane Feb 13 '18 at 21:49

1 Answers1

3

The biggest problem in your code is that you are constantly ignoring the '\0'-terminating byte and pass to functions like strlen which expects a valid string (one that is 0-terminated).

For example in insert_node:

for(int i = 0; i < strlen(data->id); i++)
        temp->id[i] = data->id[i];

Here you are copying all characters expect the '\0'-terminating byte, temp->id will store a sequence of characters but it is not a string. strlen(data->id) will have an undefined behaviour because data->id is most likely not 0-terminated if you initialized it without 0-terminating the string.

Here you should use strcpy if you know that the source strings is smaller than 49 characters long, or strncpy to be completely sure:

strncpy(temp->id, data->id, sizeof temp->id);
temp->id[sizeof(temp->id) - 1] = 0;

Same thing with node_name. Also you are not checking if malloc returned NULL.

Also the foef lines are wrong, see Why while(!foef(...)) is always wrong.

You should rewrite this

while(c != '\n' && !feof(f_Aux_Pointer)){ // check line
    c = fgetc(f_Aux_Pointer); 
    line[counter] = c;
    counter++;
}

like this:

int c; // <-- not char c

while((c = fgetc(f_Aux_Pointer)) != '\n' && c != EOF)
    line[counter++] = c;

line[counter] = 0;  // terminating the string!

fgetc returns an int, not a char, it should be int, otherwise the comparison with EOF will go wrong. And here you ignored to set the 0-terminating byte, the result was a sequence of characters, but not a string. The next line if(strlen(line) ... overflow because of this, as strlen will keep looking for the 0-terminating byte beyond the limits. For me is not clear why you even check for EOF, as you would keep reading when the outer while loop resumes. If f_Aux_Pointer got to the end, shouldn't the function just return? I'm not really sure what you are doing there. Also there is no strategy for when the newline is not in the first 49 read characters. I think you should rethink your strategy here.

It also would have been easier to do:

if(fgets(line, sizeof line, f_Aux_Pointer) == NULL)
{
    // error handling
    return; // perhaps this?
}

line[strcspn(line, "\n")] = 0; // removing the newline

if(strlen(line) > 12)
...

Here

while(c != ','){ //id

    c = fgetc(f_Data_Pointer);

    if(c != ','){

        data->id[counter] = c;          
        counter++;
    }

    printf("%c", c);

}

you have same error as above, you never set the \0-terminating byte. At the end of the while you should have data->id[counter] = 0;. The same thing in the next while loop.

In generate_nodes you don't need to allocate memory for the temporal graph_node object as insert_node will create a copy anyway. You can do:

graph_node data;

...
data.id[counter] = c;
...
data.node_name[counter] = c;
...

insert_node(graph, &data);
data.id[0] = 0;
data.node_name[0] = 0;

and you will have one malloc less to worry about.


EDIT

Because your IDs are always numerical, I'd change the struct to this:

struct Graph_Node{
    int id;
    char node_name[50];

    struct Graph_Node* next;
    struct Graph_Node* edges;
};  

which would make life easier. If your conditions are true that only the lines you need are longer than 12 characters and that the lines you are interested are always of the same format (no spaces between the commas, second column is always quotes) as the ones posted in your answer, then you can parse it like this:

int generate_nodes(graph_node **graph, const char *file_name)
{
    if(file_name == NULL || graph == NULL)
        return 0; // return error

    // no need to allocate memory for it
    // if the insert_node is going to make a
    // copy anyway
    struct Graph_Node data = { .next = NULL, .edges = NULL };

    FILE *fp = fopen(file_name, "r");
    if(fp == NULL)
    {
        fprintf(stderr, "Error opening file %s: %s\n", file_name,
                strerror(errno));
        return 0;
    }

    // no line will be longer than 1024
    // based on your conditions
    char line[1024];

    size_t linenmr = 0;

    while(fgets(line, sizeof line, fp))
    {
        linenmr++;
        // getting rid of the newline
        line[strcspn(line, "\n")] = 0;

        if(strlen(line) <= 12)
            continue; // resume reading, skipt the line

        char *sep;
        long int id = strtol(line, &sep, 0);

        // assuming that there is not white spaces
        // before and after the commas
        if(*sep != ',')
        {
            fprintf(stderr, "Warning, line %lu is malformatted, '<id>,' exepcted\n", linenmr);
            continue;
        }

        data.id = id;

        // format is: "....",

        if(sep[1] != '"')
        {
            fprintf(stderr, "Warning, line %lu is malformatted, \"<string>\", exepcted\n", linenmr);
            continue;
        }

        // looking for ",
        char *endname = strstr(sep + 2, "\","); 

        if(endname == NULL)
        {
            fprintf(stderr, "Warning, line %lu is malformatted, \"<string>\", exepcted\n", linenmr);
            continue;
        }

        // ending string at ",
        // easier to do strcnpy
        *endname = 0;

        strncpy(data.node_name, sep + 2, sizeof data.node_name);
        data.node_name[sizeof(data.node_name) - 1] = 0;

        insert_node(graph, &data);
    }

    fclose(fp);
    return 1;
}

Now the interesting bits are these:

    char *sep;
    long int id = strtol(line, &sep, 0);

    // assuming that there is not white spaces
    // before and after the commas
    if(*sep != ',')
    {
        fprintf(stderr, "Warning, line %lu is malformatted, '<id>,' exepcted\n", linenmr);
        continue;
    }

strtol is a function that converts a number in a string to an actual integer. This function is better than atoi, because it allows you to convert numbers in different bases, from binary to hexadecimal. The second advantage is that it tells you where it stopped reading. This is great for error detection, and I use this behviour to detect if the line has the correct format. If the line has the correct format, a comma , must appear next to the number, I check for this and if it's not true, I print an error message and skip the line and continue reading the file.

The next if checks if the next character is a quote ", because based on your file, the second argument is enclosed in quotes. Sadly the separtor of the arguments is a comma which is also used as a normal character in the string. This makes things a little bit more complex (you cannot use strtok here). Once again, I assume that the whole argument ends in ",. Note that this does not take into consideration escaped quotes. A line like this:

3,"somefunc(a,b,\"hello\",d)","OR",0

would be parsed incorrectly. When ", is found, I set the quote to '\0' so that it is more easier to use strncpy, otherwise I would have to calculate the length of the string, check that it's length doesn't surpass the destination size, etc. If you need to keep parsing, endname + 1 should point to the next comma, otherwise the format is wrong.

    strncpy(data.node_name, sep + 2, sizeof data.node_name);
    data.node_name[sizeof(data.node_name) - 1] = 0;

Here I copy the string. The source is sep + 2, because sep points to the first comma, so you have to skip it. The next character is the quote, so you have to skip it as well, hence sep + 2. Because the legth of the name is unknown, it's better to use strncpy, this will ensure that you don't write more bytes than you need.

The last step is to insert the node into the graph. Note that I didn't allocate memory, because I know that insert_node is a going to make a new copy anyway. Because the data is only used temporarily, you don't need to allocate memory, just pass a pointer to data (by passing &data) to insert_node.

I've tested this function with a dummy graph and a dummy insert_node that only prints the node:

void insert_node(graph_node** node, graph_node* data)
{
    printf("ID: %d, node_name: %s\n", data->id, data->node_name);
}

and this is the output:

ID: 1, node_name: execCode(workStation,root)
ID: 2, node_name: RULE 4 (Trojan horse installation)
ID: 3, node_name: accessFile(workStation,write,'/usr/local/share')
ID: 4, node_name: RULE 16 (NFS semantics)
ID: 5, node_name: accessFile(fileServer,write,'/export')
ID: 6, node_name: RULE 10 (execCode implies file access)
ID: 7, node_name: canAccessFile(fileServer,root,write,'/export')

Now if you use this code and you keep getting valgrind errors, then that means that you still have errors in other parts of your code that you haven't shown us.

Pablo
  • 13,271
  • 4
  • 39
  • 59
  • thank you for the answer. The aux_pointer is to check first if the line is smaller than 12 characters because I don't want to add them to graph variable. And graph->node_name never gets newline charactere. – user8955046 Feb 13 '18 at 22:30
  • the last part I didn't understand. You mean change "graph_node* data = (graph_node*)malloc(sizeof(graph_node));" to "graph_node data;" so it is not a pointer anymore and than I dont need to use malloc? – user8955046 Feb 13 '18 at 22:32
  • I made the changes but I still get segmentation fault. The only change I didn't make was the last one because I didnt understand – user8955046 Feb 13 '18 at 22:33
  • *so it is not a pointer anymore and than I dont need to use malloc?* yes, that's correct. By not being a pointer, it's an object on the stack. ---- *but I still get segmentation fault* I still have trouble understanding your parsing of the data. Can you edit your question and show a few lines of the input file and explain what the lines means, which are the ones you want, which are not. At some point you look for a comma, I'm not sure if that's correct, I don't know your algorithm. – Pablo Feb 13 '18 at 23:06
  • yes that is right I look for a comma, because I want the id before the first comma and than the name of the node after the first comma. But I don't want lines in which there are only numbers followed by commas – user8955046 Feb 13 '18 at 23:44
  • @user8955046 like I said edit you question and add a few lines of the input file and which lines you want to read and which you don't. – Pablo Feb 13 '18 at 23:47
  • I made the edit. In the txt file as you can see above there is I just want lines like the first "1,"execCode(workStation,root)","OR",0" but in this line I just want "1,"execCode(workStation,root)"," they are the id and the name of the node. – user8955046 Feb 13 '18 at 23:50
  • The last line which is "6,7,-1" I don't want to get them and they are allways smaller than 12 characters – user8955046 Feb 13 '18 at 23:51
  • and `"1"` is the ID and `"execCode"` node_name? Are IDs always numeric? – Pablo Feb 13 '18 at 23:53
  • yes that is correct! and IDs are allways numeric. But the node name would be: execCode(workStation,root) – user8955046 Feb 13 '18 at 23:59
  • @user8955046 I've made an update of my answer addressing your last comments. – Pablo Feb 14 '18 at 01:48
  • thank you for the changes! it is almost working the only problem is that it's returning only the first three letters of node name. I mean for example: execCode(workStation,root) just returns exe – user8955046 Feb 14 '18 at 15:03
  • @Pablo I am having problems in generating the edges in my graph now according to this question: https://stackoverflow.com/questions/48832974/why-my-c-code-to-generate-a-graph-from-a-txt-file-is-not-working – user8955046 Feb 16 '18 at 19:39
  • @user8955046 I'll take a look at this later. – Pablo Feb 16 '18 at 20:08
  • @Pablo now is only missing the edges from the graph in this question the nodes were dealt – user8955046 Feb 16 '18 at 23:13