0

I am a new C programmer and so you will have to excuse my lack of knowledge. Slowly but surely I am improving. I am wondering why I am unable to access the members of my structure after I have sent them to a function that intends to modify them. This issue occurs in the code shown below. Basically my program analyzes an XML document and looks for certain patterns to extract text. In my main function I make the struct: artinfo artIn[128] = {}; which is defined in my header file as being:

#ifndef FILE_TABLE_H
#define FILE_TABLE_H
#ifdef  __cplusplus
extern "C" {
#endif
    typedef struct article_info {
        char author1[512];
        char author2[512];
        char author3[512];
        char author4[512];
        char author5[512];
        char author6[512];
        char title[2048];
        char version[4];
        char year[4];
        char page[64];
        char abstract[4096];
        char notes[4096];
} artinfo;

#ifdef  __cplusplus
}
#endif
#endif  

After instantiating this and clearing it with memset I proceed to send it to another function which will extract text and intends to put it back in the structure. The function is shown below.

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo *artIn[]){

int isNewline;             /* Boolean indicating we've read a CR or LF */
long lFileLen;             /* Length of file */
long lIndex;               /* Index into cThisLine array */
long lLineCount;           /* Current line number */
long lLineLen;             /* Current line length */
long lStartPos;            /* Offset of start of current line */
long lTotalChars;          /* Total characters read */
char cThisLine[BUFSIZE];     /* Contents of current line */
char *cFile;                  /* Dynamically allocated buffer (entire file) */
char *cThisPtr;               /* Pointer to current position in cFile */
char cNextLine[BUFSIZE];
char buffer[BUFSIZE];
char title[] = "<text top=\"245\"";
char ending[] = "$";
char author[] = "$</text>";
bool tfound, afound;
char *match, *cNextLinePtr;
long lNextLineCount;
int i, j;

//initialize some values
tfound = false;
afound = false;
fseek(input, 0L, SEEK_END);  /* Position to end of file */
lFileLen = ftell(input);     /* Get file length */
rewind(input);               /* Back to start of file */
memset(&cThisLine,0,sizeof(cThisLine));
memset(&cNextLine,0,sizeof(cNextLine));
memset(&buffer,0,sizeof(buffer));

printf("TEST: Entered read parser\n");
cFile = calloc(lFileLen + 1, sizeof(char));
printf("TEST:\n");
if(cFile == NULL )
{
    printf("\nInsufficient memory to read file.\n");
    return 0;
}

fread(cFile, lFileLen, 1, input); /* Read the entire file into cFile */
printf("TEST: read the file in\n");
lLineCount  = 0L;
lTotalChars = 0L;

cThisPtr    = cFile;              /* Point to beginning of array */
printf("TEST: Got to here.\n");

while (*cThisPtr)                 /* Read until reaching null char */
{
    //printf("TEST: Got to here.\n");
    lIndex    = 0L;                 /* Reset counters and flags */
    isNewline = 0;
    lStartPos = lTotalChars;

    while (*cThisPtr)               /* Read until reaching null char */
    {
        if (!isNewline)               /* Haven't read a CR or LF yet */
        {
            if (*cThisPtr == CR || *cThisPtr == LF) /* This char IS a CR or LF */
            isNewline = 1;                        /* Set flag */
            //printf("Flag was set");
            //exit(0);
        }

        else if (*cThisPtr != CR && *cThisPtr != LF) /* Already found CR or LF */
            break;                                     /* Done with line */

        cThisLine[lIndex++] = *cThisPtr++; /* Add char to output and increment */
        ++lTotalChars;

    } /* end while (*cThisPtr) */

    cThisLine[lIndex] = '\0';     /* Terminate the string */
    ++lLineCount;                 /* Increment the line counter */
    lLineLen = strlen(cThisLine); /* Get length of line */
    /* THIS is where I look for the matches to the patterns for my info. */
//    printf("TEST: Printing 1 line\n%s", cThisLine);
//    exit(0);

    if(strstr(cThisLine,title)!= NULL && tfound == false)
    {
        printf("TEST: Looking for title info.\n");
        match = strstr(cThisLine,">");
        //printf("TEST: match first points to %c\n", *match);
        //exit(0);
        j = 0;
        match++;
        //printf("TEST: match points to %c\n", *match);
        while(*match!='<')
        {
            //pridntf("TEST: match is %c\n", *match);
            //printf("TEST: %c", *match);
            buffer[j] = *match;
            //printf("TEST: %c", buffer);
            j++;
            match++;
        }
        lNextLineCount = lLineCount;
        do
        {
            lNextLineCount = lNextLineCount + 1;
            readaheadone(cFile, lNextLineCount, cNextLinePtr, lbuf);
            strcpy(cNextLine, lbuf->bline);
            cNextLinePtr = cNextLine;
            printf("TEST: the current line is - %s\nthe next line is %s\n",cThisLine,cNextLine);
            //printf("TEST: Before test exit");
            //exit(0);
            if(strstr(cNextLinePtr,ending)!=NULL)
            {
                printf("TEST: Author Info spans more than 1 line.\n");
                match = strstr(cThisLine,">");
                j++; //i DON'T RESET COUNTER SO IT JUST KEEPS FILLING THE BUFFER AFTER LEAVING A SPACE
                match++;
                //printf("TEST: match points to %c\n", *match);
                while(*match!='<')
                {
                    //pridntf("TEST: match is %c\n", *match);
                    //printf("TEST: %c", *match);
                    buffer[j] = *match;
                    //printf("TEST: %c", buffer);
                    j++;
                    match++;
                }
            }

        } while(strstr(cNextLinePtr,ending)!=NULL);

    strcpy((*artIn[0]).title, buffer);
    printf("The title is: %s\n", buffer);//artinfo[0].author);
    printf("The title is: %s\n", (*artIn[0]).title);
    tfound = true;
    }

    if(strstr(cThisLine,author)!= NULL && afound == false)
    {
        printf("TEST: Looking for author info.\n");
        match = strstr(cThisLine,">");

    }

Everything seems to work just fine until its reaches the: strcpy((*artIn[0]).title, buffer); and printf("The title is: %s\n", ((*artIn[0]).title); statements. I always get an error and a stack dump - i.e. 1 [main]Parst_Text 7296 open_stackdumpfile: Dumping stack trace to Parse_Text.exe.stackdump I am not really sure what I am doing wrong - I think it is the call to access the member of the struct array as if I remove these statements the program runs without flaw. I am not sure if I am just putting too much on the stack or how to use the stack dump to figure out the problem. All advice is appreciated but please keep it constructive. Thanks. Also please let me know if I overloaded this question as I wasn't sure how much detail to provide.

EDIT: as per request here is the function which calls reader parser

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>   //for close()
#include <ctype.h>    //for toupper()
#include <stdbool.h>  //for bool type
#include <errno.h>    //where the standard error is written for calls to OS
#include "file_table.h"
#define BUFSIZE 1024
#define CR 13            /* Decimal code of Carriage Return char */
#define LF 10            /* Decimal code of Line Feed char */
#define EOF_MARKER 26    /* Decimal code of DOS end-of-file marker */

void fillFileTable(char *filedir, ftable *table, bool populate, ftinfo *info);
bool readerParser(FILE *InputFile, FILE *OutputFile, textlbuf *lbuf, artinfo *artIn[]);
void readaheadone(char *cFile, long nextLineCount ,char *currentPtr, textlbuf *lbuf);

int main()
{
    /*Define variables & Instantiate Structs.*/
    ftable files[128];
    ftinfo info;
    artinfo artIn[128] = {};
    textlbuf store[1];
    char buffer[BUFSIZE];
    char dir[260] = "C:/DTEST/Parse_Text/91_1_text"; //would be better if user could enter this - fix for later
    ftinfo *fti = &info; 
    textlbuf *lbuf = &store;
    FILE *fp, *op; //fp is input file, op is output file
    int i, j, k;
    bool iReadReturn;

    /*Clear Out Memory Structures*/
    memset(&files,0,sizeof(files));
    memset(&info,0,sizeof(info));
    memset(&artIn,0,sizeof(artIn));
    memset(&buffer,0,sizeof(buffer));
    memset(&store,0,sizeof(store));

    /*Fill the File Table*/
    printf("TEST: Preparing to fill the file table...\n");
    fillFileTable(dir,files,true,fti);
    if(info.success == false)
    {
        printf("Something went wrong. Now exiting");
        exit(1);
    }
    printf("File table has been filled: %s\n",info.notes);
    printf("File table contains: %d\n", info.fileCount);
    printf("TEST: 'fillFileTable' is successful? --- %s\n", info.success?"true":"false");
    for(i=0;i<info.fileCount;i++)
    {
        if(files[i].type == 'd')
            printf("File Folder is: %s\t its size is %d\n",files[i].fileName,files[i].fileSize);
        else
            printf("File is: %s\t its size is %d\n",files[i].fileName,files[i].fileSize);
    }
    printf("\n\n");
   //printf("TESTd: Exiting after file table printout.\n\n"); exit(0);

   op=fopen("./test_Out.txt", "a");
   if (op == NULL )             /* Could not open file */
    {
        printf("Error opening output file: %s (%u)\n", strerror(errno), errno);
        return 1;
    }

    //for(i=0;i<info.fileCount;i++) //Figure out how to loop through all files - improvement for later
    fp=fopen("./91_1_text/test1.txt", "r");
    if (fp == NULL )             /* Could not open file */
    {
        printf("Error opening input file: %s (%u)\n", strerror(errno), errno);
        return 1;
    }

    iReadReturn = readerParser(fp, op, lbuf, artIn); /* Read the file and print output */
    if (iReadReturn == false)
    {
        printf("ERROR: The file did not read correctly.\n");
        exit(1);
    }
    k = fclose(fp);
    if (k != 0)
        exit(k);

    k = fclose(op);
    if (k != 0)
        exit(k);
    printf("Program Completed Successfuly.\n");
    return 0;

}

TheCount
  • 25
  • 5
  • FYI: `printf("TEST: Got to here %d.\n");` missing an argument. What is your compiler? This should generate a warning with `-Wall` on GCC. – Jonathon Reinhart May 20 '14 at 03:51
  • 1
    Can you post the code from the function that calls `readerParser`? – R Sahu May 20 '14 at 03:55
  • Add in a check that `strlen(buffer) < sizeof artin[0]->title` – M.M May 20 '14 at 04:18
  • I don't see where you are null-terminating `buffer` – M.M May 20 '14 at 04:19
  • You also don't handle the case of `strstr` returning NULL, nor the case of a `<` not being found after the `>`. THe `while(*match!='<')` will read off into oblivion if there is no `<`, since you don't check for null termination. – M.M May 20 '14 at 04:20
  • @JonathonReinhart my compiler is Cygwinn GCC -- I just started using code blocks so it is possible I somehow disabled warnings without realizing it cause it currently doesn't generate anything besides that stack dump. I see the issue though. Thank-you for noticing it. – TheCount May 20 '14 at 16:05
  • @JonathonReinhart I got the warnings turned on now and there are indeed a ton - my bad – TheCount May 20 '14 at 16:30
  • @RSahu - as per your request I added the main function code. – TheCount May 20 '14 at 16:38
  • May I suggest `-Wall -Wextra -Werror`. You'll hate me at first, and thank me eventually. – Jonathon Reinhart May 20 '14 at 20:18

2 Answers2

2

With this function declaration:

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo *artIn[])

the 4th argument will be an array of pointers to artinfo structures, not a pointer to an array of artinfo structures. When it tries to de-reference these supposed pointers, it fails.

Instead it should be:

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo (*artIn)[])

In addition, fixing these would at least make your program more robust.

  1. There does not seem to be any check for overflowing buffer
  2. You do not explicitly place a null character at the end of buffer. Although you do memset it before entering your loop, if it did overflow then not only might it be too long, it might also not be null terminated.
  3. You should use strncpy instead of strcpy, otherwise you run the risk of overflowing the space allocated in the destination string. Also as noted by Matt, you should manually add a null character to the end of your string after calling strncpy.
harmic
  • 28,606
  • 5
  • 67
  • 91
  • I suspect he actually wants just `artinfo artIn[]` – M.M May 20 '14 at 04:24
  • If using `strncpy` you must also manually null-terminate, as `strncpy` does not do so if the whole buffer was used. Another option is to do a length-check and use `strcpy` only if it succeeds; or use `snprintf`. – M.M May 20 '14 at 04:26
  • Hey @harmic - Great points about the overflow issues and null termination - I was just going for functionality but I definitely see the issues caused by not checking these cases. I will add in code to fix this. – TheCount May 20 '14 at 15:59
  • @MattMcNabb - great points about improving robustness I will make sure to implement those improvements – TheCount May 20 '14 at 16:00
  • @MattMcNabb you clearly read my mind. Thank-you for the correction. I am quite confused though now about sending structs to functions. As per my textbook and a few other reference books I believed you should send the address to the function and then de-reference it to access the data. Is it because I am using an array and so its name is an address? Thanks again. – TheCount May 20 '14 at 17:05
  • @TheCount using an array's name is equivalent to `&name[0]`, except when it's the operand of `&` or `sizeof`. So yes, you are passing the address of the first array member (and since array members are contiguous, the other elements can be found after it in memory) – M.M May 20 '14 at 20:54
0

There are already many suggestions on how you can improve the robustness of your code. I'm going to answer to the main problem you mentioned in your original post.

You said,

Everything seems to work just fine until its reaches the: strcpy((*artIn[0]).title, buffer); and printf("The title is: %s\n", ((*artIn[0]).title); statements. I always get an error and a stack dump - i.e. 1 [main]Parst_Text 7296 open_stackdumpfile: Dumping stack trace to Parse_Text.exe.stackdump I am not really sure what I am doing wrong

The culprit for that problem is that you have

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo *artIn[])

but when you are calling the function, you are using:

readerParser(fp, op, lbuf, artIn);

I am surprised that the compiler didn't flag a warning or an error. artIn is declared as:

artinfo artIn[128] = {};

When you use artIn in a function call, it degrades to a pointer of type artinfo*. It would be good when the argument type of an argument is artinfo* artIn or artinfo artIn[] but not artinfo* artIn[].

You should change the type of artIn in readerParser, both in the declaration and the definition.

bool readerParser(FILE *input, FILE *output, textlbuf *lbuf, artinfo artIn[])

And then replace all usages of *artIn in that function with just artIn.

Hope this helps.

R Sahu
  • 204,454
  • 14
  • 159
  • 270
  • @R Sahu it definitely helps. Thank-you so much. I am still a bit confused though as earlier I tried a number of different combinations and did try sending the address of artIn with my current setup (i.e. `&artIn`) to the function. What is it your version is requesting exactly? Does `artinfo artIn[]` request an address? I thought I could send a pointer and de-refence within the function. Any clarification would definitely help with my learning process. Thanks-again. – TheCount May 20 '14 at 17:20
  • @TheCount This short SO post might be helpful in understanding arrays and using them in function calls. http://stackoverflow.com/questions/3997581/passing-whole-array-to-a-function – R Sahu May 20 '14 at 17:32