0

I have a .dat file needs to be read and prints records for people that have the attributes specified as command line arguments. The format of the file is as follows. The records for each person begins with an unsigned 32-bit integer that contains various information about the person:

Bits

0-4 : length of first name (key=first)

5-9 : length of middle name (key=middle)

10-14 : length of last name (key=last)

15-21 : age (key=age)

22 : sex (0=male, 1=female) (key=sex)

23-28 : state (0 to 49 in alphabetical order) (key=state)

29 : currently married (0=false, 1=true) (key=married)

30 : employed full time (0=false, 1=true) (key=employed)

31 : attended college (0=false, 1=true) (key=college)

I think I suppose to read the whole 32-bit(4 bytes) first, then read bit by bit from the 32-bit integer. I am new to the fread and fseek so I really don't know if I am on the right track, any help will be appreciated. Here are my codes so far.

  int main(int argc, char *argv[]) {

  char *buf;
  long lSize;
  size_t result;

  FILE *fp;
  fp = fopen("/u1/junk/people.dat","r");
  if(fp == NULL) {
    printf("Error: can't open file to read\n");
    return -1;
  }
  else {
    printf("File people.dat opened successfully to read\n");
  }


  //obtian file size
  fseek(fp, 0, SEEK_END);
  lSize = ftell(fp);
  rewind(fp);

  //allocate memory to contain the whole file
  buf = (char*) malloc (sizeof(char)*lSize);

  while (!feof(fp)) {
    fread(buf, 4, 1, fp);
    fseek(fp, i, SEEK_CUR);
    fread(buf, 32, 1, fp);
    printf("%s\n", buf);
    i+=32;
  }

  fclose(fp);
  return 0;  
}
Joey Zeng
  • 63
  • 5
  • 2
    Two basic tips: Don't cast the return value of `malloc()` in C, don't use `feof()` to detect end of file like that. – unwind Apr 20 '15 at 15:56
  • I think you should come up with a structure with bit fields and read the whole information in one go. – facebook-100001358991487 Apr 20 '15 at 15:57
  • @facebook-100001358991487 I kindly disagree because that is not a portable implementation, although the OP hasn't mentioned if they need portability. I'd prefer to see OP populate the structure field-by-field. – siliconwafer Apr 20 '15 at 16:42
  • @siliconwafer why it is not portable implementation? – facebook-100001358991487 Apr 20 '15 at 17:19
  • @facebook-100001358991487 siliconwafer, thanks for the advice. I do not need portability, and the bit fields suggestion sounds great. The problem I have now is how to get the 32-bit integer, then I can compare them with my struct members. – Joey Zeng Apr 20 '15 at 17:19
  • @facebook-100001358991487 The structure will be padded differently on different platforms. – siliconwafer Apr 20 '15 at 17:20
  • Here's some more information on bitfields and portability in C. http://stackoverflow.com/questions/1044654/bitfield-manipulation-in-c – siliconwafer Apr 20 '15 at 17:22
  • regarding this line: 'buf = (char*) malloc (sizeof(char)*lSize);' there are two problems. 1) as previously stated, in C, do not cast the returned value from malloc 2) always check (!=NULL) the returned value to assure the operation was successful – user3629249 Apr 20 '15 at 17:35
  • have you compiled this code? From the posted code, the variable 'i' is not declared, must less contains a valid value. so the line: fseek(fp, i, SEEK_SET) will fail to compile. regarding the two calls to fread() the first call reads 4 bytes from the file into buf[] the second call overlays that value with the (from some unknown location in the file, a 32 byte sequence. There seems to be some confusion between 32 bits and 32 bytes – user3629249 Apr 20 '15 at 17:41
  • the code needs to check the returned values from fread() fseek() ftell() to assure the operation was successful – user3629249 Apr 20 '15 at 17:43
  • @user3629249 yeah, I had i as an global variable, and yes I am kinda confused with fread... – Joey Zeng Apr 20 '15 at 17:51

2 Answers2

1

This is untested but hopefully gives you the idea of the "mask and shift" technique. You can read more about it: What are bitwise shift (bit-shift) operators and how do they work? and Bitfield manipulation in C

Let's say you read the unsigned 32-bit integer:

unsigned long myint;
fread(&myint, sizeof(myint), 1, fid);

Now, shift and mask to pull values out.

// bits 0-4 (5 bits)
unsigned long firstNameLength = myint & 0xF1;

// bit 31
unsigned long attendedCollege = (myint & 0x0000000E) << 31;
Community
  • 1
  • 1
siliconwafer
  • 732
  • 4
  • 9
  • this line: 'unsigned long attendedCollege = (myint & 0x0000000E) << 31;' is not correct. 1) the attended college is only 1 bit, whilie 'E' is 3 bits. 2) the bit (if shifted) needs to be shifted to the right 31 bits, not to the left. Suggest: 'unsigned long attendedCollege = (myint & 0x80000000) >> 31;' BTW: there is no guarantee that 'long int' is 4 bytes, the only guarantee is that it is AT LEAST 4 bytes. For our purposes, it would be much better to to declare an array of 4 characters and manipulate the contents. – user3629249 Apr 20 '15 at 20:04
1
#include <stdio.h>

// define first 32 bits of record
struct personRecord
{
    unsigned first    :5;
    unsigned middle   :5;
    unsigned last     :5;
    unsigned age      :7;
    unsigned sex      :1;
    unsigned state    :6;
    unsigned married  :1;
    unsigned employed :1;
    unsigned college  :1;
};

// prototypes
void processRecord(FILE* fp, char* buf);


int main(int argc, char *argv[])
{

    struct personRecord key;

    FILE *fp;
    fp = fopen("/u1/junk/people.dat","r");
    if(fp == NULL)
    {
        printf("Error: can't open file to read\n");
        return -1;
    }

    // implied else, fopen successful

    printf("File people.dat opened successfully to read\n");



    while ( 1 == fread( &key, 4, 1, fp ) )
    {
        processRecord(fp, key);
    }

    fclose(fp);
    return 0;
} // end function: main



void processRecord(FILE* fp, personRecord key)
{
      int result;
      int bufFirst[key.first] = {'\0'};
      ...

      // use  lengths to determine how much more  bytes to read for each field
      result = fread( bufFirst, key.first, 1, fp );
      if ( result == key.first )
      { // then successful acquire first name
          ...
      }
      ...
      // first, middle, last,

      // use lookup table to get state name from state field
      char * pState = alphabeticalState[key.state];

      // do something with the extracted info
      ...
} // end function: processRecord


const char * alphabeticalStates[] =
{
    "alabama";
    ...
    "washington";
    "wisconsin";
};
user3629249
  • 16,402
  • 1
  • 16
  • 17
  • if you want the fields first, middle, last to be strings, then make each array to be the max length of a name+1 and use fgets rather than fread to input the value – user3629249 Apr 20 '15 at 19:46