0
#include "offsetFinder.h"

/**  Reads a GIS record file (as described in the corresponding project
 *   specification), and determines, for each GIS record contained in that
 *   file, the offset at which that record begins.  The offsets are stored
 *   into an array supplied by the caller.
 * 
 *   Pre:  gisFile is open on a GIS record file
 *         offsets[] is an array large enough to hold the offsets
 *   Post: offsets[] contains the GIS record offsets, in the order
 *            the records occur in the file
 *   Returns:  the number of offsets that were stored in offsets[]
 */
uint32_t findOffsets(FILE* gisFile, uint32_t offsets[]) {
    FILE *op;
    /***  Complete the implementation of this function  ***/
    int count = 0;
    char offsets[1000];
    char *reader;
    op = fopen(gisFile, "r");
    if (!op) {
        perror("Failed to open file!\n");
        exit(1);
    }
    else {
        reader = offsets;
        while (*reader != '\n' && fgets(offsets, sizeof(offsets), op)) {
            count++;
        }
    }
    return count;
}

Hello all, I have a question about this assignment. Is this set up alright? For the GISData.txt, I am supposed to read through the file and I have to return the number of offsets that were stored in offsets[].

FEATURE_ID|FEATURE_NAME|FEATURE_CLASS|STATE_ALPHA|STATE_NUMERIC|COUNTY_NAME|COUNTY_NUMERIC|PRIMARY_LAT_DMS|PRIM_LONG_DMS|PRIM_LAT_DEC|PRIM_LONG_DEC|SOURCE_LAT_DMS|SOURCE_LONG_DMS|SOURCE_LAT_DEC|SOURCE_LONG_DEC|ELEV_IN_M|ELEV_IN_FT|MAP_NAME|DATE_CREATED|DATE_EDITED
885513|Siegrest Draw|Valley|NM|35|Eddy|015|323815N|1043256W|32.6376116|-104.5488549|323859N|1043732W|32.6498321|-104.6255227|1095|3592|Parish Ranch|11/13/1980|
885526|AAA Tank|Reservoir|NM|35|Eddy|015|321043N|1041456W|32.1786543|-104.2489615|||||1006|3300|Bond Draw|11/13/1980|06/23/2011
885566|Adobe Draw|Valley|NM|35|Eddy|015|322820N|1042141W|32.4723375|-104.361345|322704N|1042129W|32.4511111|-104.3580556|1007|3304|Carlsbad West|11/13/1980|
885567|Adobe Flat|Flat|NM|35|Eddy|015|322849N|1042119W|32.4803932|-104.3552339|||||1006|3300|Carlsbad West|11/13/1980|
885607|Alacran Hills|Range|NM|35|Eddy|015|322812N|1041055W|32.4701183|-104.1818931|||||1009|3310|Carlsbad East|11/13/1980|
885684|Alkali Lake|Lake|NM|35|Eddy|015|323039N|1041133W|32.5109371|-104.1924802|||||966|3169|Angel Draw|11/13/1980|06/23/2011
885697|Allen Well|Well|NM|35|Eddy|015|322309N|1042120W|32.3859489|-104.3555084|||||1038|3405|Carlsbad West|11/13/1980|

This is a snippet of the GISData.txt and each region data (a line) is considered a GIS record. "The offsets referred to in the assignment are the positions at which the GIS records begin in the GIS data file.

Since each GIS record occupies a whole line, the offset of a GIS record is simply the offset of the first byte in the GIS record.

And, of course, the first line in the GIS data file does not contain a GIS record, so there is no GIS record at offset 0."

Can someone look over my code and revise it if I'm completely wrong? Thank you!!

  • 1
    The first argument to `fopen` should be a `char *`. Passing a `FILE *` is wrong. – William Pursell Mar 15 '21 at 19:29
  • `fgets` is not suited for this problem. If you use `fgets` you need to add logic to verify that a full line was read. Just use `fgetc` and take action whenever you see a newline. – William Pursell Mar 15 '21 at 19:32
  • 1
    This: `uint32_t offsets[]` (argument of function) and this: `char offsets[1000];` (declared in function.) are in conflict. Did you get a compiler warning about this? – ryyker Mar 15 '21 at 19:36
  • Suggest fixing prototype to have a `char *fileSpec` in the function prototype ( `uint32_t findOffsets(const char *fileSpec, uint32_t offsets[])` ), and pass that as the first argument to `FILE *op = fopen(filespec, "r");`. – ryyker Mar 15 '21 at 19:42
  • To clarify then, in the following: `885513|Siegrest Draw|Valley|NM|35|Eddy|015...`, would `885513` be considered the _offset_? – ryyker Mar 15 '21 at 19:44
  • _"the offset of a GIS record is simply the offset of the first byte"_. It would be better to use a word other than _offset_ to describe what an _offset_ is. _"...offset of the first byte"_ ***from what***? – ryyker Mar 15 '21 at 19:50
  • @ryyker yes I think every beginning of a new line is an offset. –  Mar 15 '21 at 19:53
  • So, for example in my comment above, `885513` is considered the offset? – ryyker Mar 15 '21 at 19:55
  • You should push back on the assignment. In any reasonable universe, the function would take a 3rd parameter that gives the size of `offsets`. Without that information, it is impossible for the function to be used safely, as it is not possible for the caller to know how large `offsets` needs to be to be large enough without doing all of the work that this function is going to do. – William Pursell Mar 15 '21 at 20:07
  • Based on how you have responded in comments, the problem can be more generically described as _"how to parse a single field from all lines in a file"_. The rest is specific information that only you need to know. i.e. it is not important that anyone answering this question really needs to know that the first field in each line is called an offset, or what an offset is. Only that the first '|' delimited field in eacn line (except the header line) is to be converted into `int` value, and stored. – ryyker Mar 16 '21 at 18:52

1 Answers1

0

"For the GISData.txt, I am supposed to read through the file and I have to return the number of offsets that were stored in offsets[]....Can someone look over my code and revise it if I'm completely wrong?"

First it appears that there is some confusion about what offset means in this question. And, after Googling "gid offset", I can understand why. Here is a GIS specific definition:

_" offset

[cartography] In cartography, the displacement or movement of features so that they do not overlap when displayed at a given scale.

For example, a road can be offset from a river if the symbols are wide enough that they overlap.

[symbology] In symbology, the shift of the origin or insertion point of a symbol in an x and/or y direction.

[ESRI software] In ArcGIS, a change in or the act of changing the z-value for a surface or features in a scene by a constant amount or

by using an expression. Offsets may be applied to make features draw just above a surface."_

And this "GIS System in C" definition:

"The file can be thought of as a sequence of bytes, each at a unique offset from the beginning of the file, just like the cells of an array. So, each GIS record begins at a unique offset from the beginning of the file"

These two definitions, although both deriving from searches on gis offset, are so different as to not offer any clarification on what the terms mean in this question. For purposes of this answer then I am taking my queues from your responses in the comments, and will address how to parse the first field in each record of the file. (excluding the header record in line 1.)

Here are some suggested steps to consider that could be used to implement this.

Steps to consider:

Prototype design As described in comments, the prototype to the findOffsets() function should provide following: filespec, size of array, array. Not mentioned in comments, but also useful might be the length of the longest record that will be read. eg:

uint32_t findOffsets(const char *fileSpec, size_t longestElement, size_t numElements, uint32_t offsets[numElements]);

From calling function

  • read file once to determine number of records. eg: numRecords. See
    int count_names(const char *filename, size_t *count){...}
    example here for example of how to read number of records, (and when needed to get longest record.) in file. close file when done:
  • use number of records from previous step to size the array.

Example:

uint32_t offsets[numRecords-1];  //-1 skipping header line 
memset(records, 0, sizeof records);
  • call findOffsets()

Example:

size_t numOffsets = sizeof records/sizeof *records
uint32_t count = findOffsets("c:\\gis\\data.gis", longestRecord, numOffsets, offsets);
if(count > 0)
{
   //do something with records
}   

Inside findOffsets()

  • Open file for second read of process
  • Read each line of file (skipping header line)
  • Parse first '|' delimited token from each line
  • convert parsed token from string to integer
  • close file
  • return count of lines processed.

A code example (with very limited safeties/error checking) is below showing how this could be done. It was tested using your sample file contents, and borrows from code linked above, adapted to this purpose:

const char *fileSpec = "C:\\some_directory\\gisData.gis";

uint32_t findOffsets(const char *fileSpec, size_t longestElement, size_t numElements, uint32_t offsets[numElements]);
int count_lines_in_file(const char *filename, size_t *count);
size_t filesize(const char *fn);

int main(void)
{
    size_t numRecords = 0;
    
    int longestRecord = count_lines_in_file(fileSpec, &numRecords)+1;//+1 room for null terminator
    uint32_t offsets[numRecords -1];//-1 - skipping header line
    memset(offsets, 0, sizeof offsets);//initialize VLA offsets
    int recordsProcessed = findOffsets(fileSpec, longestRecord, numRecords -1, offsets);//do the work
    
    return 0;
}

uint32_t findOffsets(const char *fileSpec, size_t longestElement, size_t numElements, uint32_t offsets[numElements])
{
      char *delim = "|";
      char *tok = NULL;
      char line[longestElement+1]; //+1 - room for null terminator during read.
      memset(line, 0, sizeof line);//initialize VLA line to all zeros 
      int inx = 0;
      FILE *fp = fopen(fileSpec, "r");
      if(fp)
      {
            while(fgets(line, sizeof line, fp))//loop to read all lines in file
            {
                if(!strstr(line, "FEATURE_ID"))//skip header line, process all other lines
                {
                    tok = strtok(line, delim);//extract first field
                    if(tok)
                    {
                        offsets[inx] = atoi(tok);//convert token and store number
                        inx++;
                    }
                }
            }
            fclose(fp);
      }
      return inx;
}

//passes back count of lines in file, and return longest line
int count_lines_in_file(const char *filename, size_t *count)
{
    int len=0, lenKeep = 0;
    FILE *fp = fopen(filename, "r");
    if(fp)
    {
        char *tok = NULL;
        char *delim = "\n";
        int cnt = 0;
        size_t fSize = filesize(filename);
        char *buf = calloc(fSize, 1);
        while(fgets(buf, fSize, fp)) //goes to newline for each get
        {
            tok = strtok(buf, delim);
            while(tok)
            {
                cnt++;
                len = strlen(tok);
                if(lenKeep < len) lenKeep = len;
                tok = strtok(NULL, delim);
            }
        }
        *count = cnt;
        fclose(fp);
        free(buf);
    }
    
    return lenKeep;
}

//return file size in bytes (binary read)
size_t filesize(const char *fn)
{
    size_t size = 0;
    FILE*fp = fopen(fn, "rb");
    if(fp)
    {
        fseek(fp, 0, SEEK_END); 
        size = ftell(fp); 
        fseek(fp, 0, SEEK_SET); 
        fclose(fp);
    }
    return size;
}
ryyker
  • 22,849
  • 3
  • 43
  • 87