-1

I was trying to search for string keys in my BST.

Structure for node and keydata_t as following.

struct node {
char *key;
char *details;
BST* left;
BST* right;
};

typedef struct{
 char *name;
 char *data;
}datatype_t;    

typedef struct{
 char *name;
}keydata_t;

My code for search function:

struct node*search(BST *root, keydata_t *t){
    BST *r = root;  

    if (r != NULL){
        printf("here\n");   
    if(strcmp(r->key, t->name) == 0){
        printf("found\n");
    }
    else if(strcmp(r->key,t->name) < 0){
        printf("left\n");
        return search(r->left, t);
    }
    else {
        printf("right\n");
        return search(r->right, t);
}
}

Insert Function:

struct node*insert(struct node*r, datatype_t *d){

if(r == NULL)
{
    //printf("empty\n");
    r = (struct node*)malloc(sizeof(struct node));
    r->key = d->name;
    r->details = d->data; 
    r->left = r->right = NULL;
    return r; 
}

else if(strcmp(r->key, d->name ) <= 0){
    r->left = insert(r->left ,d);
}
else if(strcmp(r->key, d->name ) > 0){
    r->right = insert(r->right,d);
}
return r;

}

Read functions:

FILE* safe_open_file(const char* file_name, const char* mode)
{
FILE* csv = fopen(file_name,mode);
if(csv==NULL){
    printf("%s %c ad\n",file_name,*mode);
    exit(EXIT_FAILURE);
    return NULL;
}
return csv;
}
void printfile(FILE* fp, datatype_t* d)
{
char name[64+1];
char data[1465+1];
fprintf(fp, "%s--> %s", d->name, d->data);
}

datatype_t*read(FILE* fp)
{
 char name[66];
 char data[1466];
 if (fscanf(fp, "%[^,] %[^\n]", name, data) == 2) {
    datatype_t *d = (datatype_t*)malloc(sizeof(datatype_t));
    d->name = strdup(name);
    d->data = strdup(data);
    return d;
}
return NULL;
}
keydata_t*read_key(FILE* fp){

char buffer[66];
if (fgets(buffer,sizeof buffer, fp) != NULL ) {
    keydata_t *k = (keydata_t*)malloc(sizeof(keydata_t));
    size_t len = strlen(buffer);
    if(buffer > 0 &&  buffer[len-1] == '\n'){
        buffer[--len] = '\0';
    }
    k->name = strdup(buffer);
    return k;
}
return NULL;
}

Main function:

int main(int argc, char** argv)
{
char* dataset = NULL;
char* dataset1 = NULL;
char* key = NULL;
char* read_mode="r";
char* write_mode="w+";
struct node* root = NULL;

if(argv[1]!=NULL && argv[2] != NULL && argv[3] != NULL)
{
    dataset=argv[1];
    dataset1 = argv[2];
    key = argv[3];
}
else
{
    return EXIT_FAILURE;
}
FILE* Data_input = safe_open_file(dataset,read_mode);
FILE* Data_output = safe_open_file(dataset1, write_mode);
FILE* keyfile = safe_open_file(key, read_mode);

datatype_t **ptr = (datatype_t**)malloc(MAX_NUM_LINE*sizeof(datatype_t *));
datatype_t *ptr_one;
keydata_t *keyt;


int index = 0;
while((ptr_one = read(Data_input)) != NULL)
{
    root = insert(root, ptr_one);

}

while((keyt = read_key(keyfile))!= NULL){
    search(root, keyt);
}




fclose(Data_input);
fclose(Data_output);
fclose(keyfile);

}

For some reason it doesn't print found at any point.

These are the keys I am getting from the txt file using fgets.

Mr neil
Verizon
keylor

So from my Bst it should return found for the first key but not for the rest. My output wont print anything, it just executes.

Dave
  • 71
  • 1
  • 10
  • There are libraries for this. – Iharob Al Asimi Sep 12 '16 at 16:28
  • Did you account for the possible newline character at the end of the strings read by `fgets()`? Also, why do you call `search()` on the left child after you find a match? And you don't return anything for a NULL `root`... – Dmitri Sep 12 '16 at 16:32
  • @user3121023 : Nope, Ive fixed it to the null pointer – Dave Sep 12 '16 at 16:34
  • @Dmitri : Hi, ive changed that to NULL pointer, I am doing this because my insert function keeps on inserting to left side if its equal to the key value – Dave Sep 12 '16 at 16:35
  • @iharob: care to mention a few? I am not so good with the BST search with strings – Dave Sep 12 '16 at 16:36
  • @Dave, this is off topic on SO, you need to ask about this somewhere else. – Iharob Al Asimi Sep 12 '16 at 16:38
  • `NULL` and a null character are not the same (though that's probably not causing your issue). As is, your `search()` function should print `"found\n"` if it finds a match -- but it'll never return a valid node pointer because you always continue searching until you reach a `NULL` node, and you have no return statement for that case (unless the end of your function is missing...). – Dmitri Sep 12 '16 at 16:38
  • @iharob : But I cant really use any libraries for this. How could I check this without them ? – Dave Sep 12 '16 at 16:38
  • Your title is misleading, please fix it. My comment is based on the question title. – Iharob Al Asimi Sep 12 '16 at 16:39
  • @Dave I think you'll need to post the rest of your code, too, to enable us to find a solution for your problem. Otherwise it's just poking in the dark with a slightly sticky pole. – deamentiaemundi Sep 12 '16 at 16:40
  • @iharob : Thank you so much for that! – Dave Sep 12 '16 at 16:40
  • @deamentiaemundi : Okay I am on it. Ill add the rest but its just really long! hope thats fine – Dave Sep 12 '16 at 16:41
  • @Dave the limit is 30k as far as I remember – deamentiaemundi Sep 12 '16 at 16:42
  • @deamentiaemundi : Hi i've added in the rest of the bits except the reading from files which works well as I've tested it multiple times – Dave Sep 12 '16 at 16:45
  • @Davre I have a very faint idea, judging from the rest of the code, that the problem lies in the reading from the file. – deamentiaemundi Sep 12 '16 at 16:51
  • @Dmitri : Well I printed them out one by one, and it stays on Mr neil, which is what I want right? so I can search them from top down? – Dave Sep 12 '16 at 16:52
  • 1
    @deamentiaemundi : Should I post my read functions here to see if it shows any errors? – Dave Sep 12 '16 at 16:53
  • @Dave yes, that was my intention – deamentiaemundi Sep 12 '16 at 16:53
  • The `fscanf()` call in your `read()` function may include the previous line's newline character at the start of `name` for the second and successive lines. – Dmitri Sep 12 '16 at 16:59
  • @Dmitri : shouldn't the [^\n] take care of that ? – Dave Sep 12 '16 at 17:03
  • That prevents the newline from being included in the second field... but it's still left in the stream to be included in the first field on the next call. – Dmitri Sep 12 '16 at 17:04
  • @Dmitri Oh yeah I see, but the file I am reading in to insert is a CSV file so Ive used that format. – Dave Sep 12 '16 at 17:09
  • @user3121023 : So I use a CSV file to read in the data and make a BST and then use a txt file to search in the BST using only the key values. – Dave Sep 12 '16 at 17:10
  • If you don't need to keep whitespace at the start of `name`, try adding a space to the start of the `fscanf()` format string (which will eat leading whitespace, including the leftover newline). – Dmitri Sep 12 '16 at 17:22
  • @Dmitri : There isn't a whitespace at the start If I am correct. i think the problem lies in the calling of search function somewhere! – Dave Sep 12 '16 at 17:24
  • 1
    Are you sure there's no newline in `name` for the second (and remaining) lines? Your code looks like there would be... `%[^\n]` reads until it finds a newline, but it doesn't burn of the newline that it stops at. And `%[^,]` doesn't skip whitespace or stop at newline -- it stops at `','`. Actually, your second field would include the comma, too. – Dmitri Sep 12 '16 at 18:08
  • Please post sample data – Iharob Al Asimi Sep 12 '16 at 18:12
  • Yes @Dmitri, I think that if you check out the end part of my read function you'll see that I am changing every '\n' to a '\0'. – Dave Sep 12 '16 at 18:14
  • @Dmitri Nope its a csv file so there is no newline for the name – Dave Sep 12 '16 at 18:58
  • The extra newline I'm talking about is in your `read()` function, *not* in `read_key()`. I'm suggesting that the names in the tree (except for the first one added) have an extra newline at the beginning, not the strings you're passing to the search function. Did you try adding a space to the start of the `fscanf()` format string? – Dmitri Sep 12 '16 at 20:28
  • @Dmitri Is this what you mean to add space to the start of fscanf()? fscanf (fp, "% [^,] %[^\n]",name,data); – Dave Sep 13 '16 at 01:59
  • Before the percent sign, not after it – Dmitri Sep 13 '16 at 02:15
  • (fp, "'space'%[^,],'space'%[^\n]",name,data); @Dmitri something like this? – Dave Sep 13 '16 at 02:40
  • Something like`fscanf(fp, " %[^,] , %[^\n]", name, data);`. Also, you should also check `fscanf()`'s return value to make sure it assigned two fields. – Dmitri Sep 13 '16 at 02:40
  • @Dmitri : Oh I see, yeah even after that It still says its less than the other one where it should print same. It does return two values the fscanf. – Dave Sep 13 '16 at 02:44
  • @Dmitri : Hi, i looped over the string name character by character and it looks like that is where the problem is because it prints out the name followed by uninitialised characters. – Dave Sep 13 '16 at 03:31

2 Answers2

0

There are a lot of problems in your code

  1. Your search() function is recursive, but it doesn't return when root == NULL, and that invokes Undefined Behavior.

  2. There's a type datatype_t in your code that is not defined anywhere, and BST isn't defiend either so I doubt this is the real code because it doesn't compile.

  3. You cast void * returned from malloc() which is not only unecessary but can be a problem. But you never check if it returned NULL which I don't think is a problem but is good practice.

  4. You never free() malloc()ed memory, which is also not a problem but it's bad in general. Specialy, if your program should keep for a long time.

Community
  • 1
  • 1
Iharob Al Asimi
  • 52,653
  • 6
  • 59
  • 97
  • Once it's in the BST, he can't exactly use `qsort()` on it... but the tree should be ordered correctly if he does his inserts appropriately. – Dmitri Sep 12 '16 at 16:45
  • So you mean to create a balanced Tree by sorting it ? – Dave Sep 12 '16 at 16:46
  • Its really hard to sort the nodes, Its a BST, so all I am looking to do here is just search the nodes from a txt file once the insert function is executed properly. – Dave Sep 12 '16 at 16:49
  • He's not using a binary search on array data, he's using a binary search tree -- traversing all the nodes to find a match kind of defeats the whole point of the tree. His approach is correct, just buggy. – Dmitri Sep 12 '16 at 18:04
  • Hi Thanks for giving such an insight into my code. So I edited the code and it'd be great if you could check again to see if you find anything funny. I will free the memory once I figure out the problem. – Dave Sep 12 '16 at 18:30
  • The datatype_t and BST are defined, I have included them. Sorry to not have put them up earlier – Dave Sep 12 '16 at 18:33
0

The sizes of some buffers (66, 1466) reminded me that I helped another poster with a quite similar problem. They got one of the large Yelp files, massaged into a 90MB CSV file with the names (some included spaces) as the keys and the rest as data. Using the name as the key means that that key is not unique, of course, and I proposed a linked list for the data to handle it (was the simplest I could come up with).

Another problem: the CSV file was sorted, so a simple BST would be quite skewed, it needs to be balanced.

As it is very similar to the problem of the OP, although not the actual question, I take the liberty to post it here. May it be helpful.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

// make a string out of a number macro
#define MAKE_STRING(x) #x
#define CONVERT_TO_STRING(x) MAKE_STRING(x)

#define MAX(x,y) (((x) > (y)) ? (x) : (y))

// For search-key buffer
#define MAXNAMELENGTH 512
#define MAXSCANF CONVERT_TO_STRING(MAXNAMELENGTH)

// linked-list node to hold the actual data
// TODO: extend to a skip-list/another tree
struct list {
  char *data;
  struct list *next;
};
typedef struct list list;

// AVL tree
struct node {
  // should have been named "key", I s'ppose?
  char *name;
  // Data goes in a linked list
  list *ll;
  struct node *left;
  struct node *right;
  // the single extra information needed for balancing: the level
  int height;
};
typedef struct node node;

node *search(node ** tree, char *key);

int height(node * tree);
int diff_height(node * tree);
node *rot_left(node * tree);
node *rot_right(node * tree);

int insertion(node ** r, char *key, char *value);

void bt_print(node * leaf);
void deltree(node * tree);
//void trim(char *source);

// getline() used for clarity of this example.
// Uses a version from the Stackoverflow documentation
// if not available natively. See getline() at the end
// of this code for a link
#if !(defined _POSIX_C_SOURCE) || _POSIX_C_SOURCE < 200809L
#   include <sys/types.h>
ssize_t getline(char **pline_buf, size_t * pn, FILE * fin);
#endif

#ifdef SORTED_LIST
int ll_sorted_insert(list ** head, char *value);
#else
int ll_insert(list ** head, char *value);
#endif

void ll_free(list * knot);
void ll_print(list * knot);
void ll_fprint(FILE * fp, list * knot);
// int ll_search(list * knot, char *value);

int main(int argc, char **argv)
{
  node *root;
  FILE *fp;
  int res;
  size_t bufsize = MAXNAMELENGTH + 2;
  char *buffer;
  char *keyin = NULL;
  char *valuein = NULL;
  char *comma;
  char *point;

  char *line_buf = NULL;
  size_t line_buf_size = 0;
  ssize_t line_size;

  int exitvalue = EXIT_SUCCESS;

  int processed = 0;

  root = NULL;

  // must have at least one argument
  if (argc < 2) {
    fprintf(stderr, "Usage: %s datafile [< key-list]\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  // tried to keep everything on the heap.
  buffer = malloc(bufsize);
  if (buffer == NULL) {
    fprintf(stderr, "Malloc failed\n");
    exit(EXIT_FAILURE);
  }
  // the datafile
  fp = fopen(argv[1], "r");
  if (fp == NULL) {
    fprintf(stderr, "Opening \"%s\" for reading failed\n", argv[1]);
    exit(EXIT_FAILURE);
  }
  // takes search keys from stdin only. You might see a need to change that

  // main difference to fgets(): getline() takes care of the memory
  while ((line_size = getline(&line_buf, &line_buf_size, fp)) > 0) {
    if (ferror(fp)) {
      // TODO: check errno for the exact kind of error
      fprintf(stderr, "An eror occured during reading \"%s\"\n", argv[1]);
      exitvalue = EXIT_FAILURE;
      goto _END;
    } else if (feof(fp)) {
      break;
    }
    // assuming strict KEY","DATA"\n" and without whitespace

    //TODO: check if line_size < bufsize!

    // valuein points to the comma before DATA
    valuein = strchr(line_buf, ',');
    // skip comma
    valuein++;
    // valuein points to DATA now
    // might have no new line at the end
    if ((point = strchr(valuein, '\n')) != NULL) {
      *point = '\0';
    }
    //printf("%s",valuein);
    // comma points to the comma before DATA
    comma = strchr(line_buf, ',');
    // make *comma NUL, that is: replace ',' with '\0'
    *comma = '\0';
    keyin = line_buf;
    // keyin points to KEY now

    if (!insertion(&root, keyin, valuein)) {
      fprintf(stderr, "An eror occured during inserting \"%s\"\n", keyin);
      exitvalue = EXIT_FAILURE;
      goto _END;
    }
    if (++processed % 1000 == 0) {
      printf("Processing line %d\n", processed);
    }
  }

  // bt_print(root);

  free(line_buf);

  // search-keys come from either stdin or get typed in.
  // things typed in are also stdin
  puts("Searching...");
  while ((res = scanf("%" MAXSCANF "s", buffer)) == 1) {
    if (strcmp(buffer, "exit") == 0) {
      break;
    }
    // ignoring return for now
    (void) search(&root, buffer);
  }

_END:
  // clean up
  deltree(root);
  free(buffer);
  fclose(fp);
  exit(exitvalue);
}

// Not used here, code is already overly cluttered
/*
#include <ctype.h>
// trim whitespace, fore and aft
// look into your textbook, you might find it there, too
char *trim_both(char *s)
{
  char *end;
  while (isspace(*s)) {
    s++;
  }
  if (*s == '\0') {
    return s;
  }
  end = s + stringlength(s) - 1;
  while (end > s && isspace(*end)) {
    end--;
  }
  *(end + 1) = '\0';
  return s;
}
*/

// traverse tree (defauls to preorder) and print the linked lists at the nodes
void bt_print(node * leaf)
{
  if (leaf) {
#ifdef PRINT_POSTORDER
    bt_print(leaf->left);
    bt_print(leaf->right);
    puts("LEAF_START");
    printf("%s\n", leaf->name);
    ll_print(leaf->ll);
    puts("LEAF_END\n");
#elif (defined PRINT_INORDER)
    bt_print(leaf->left);
    puts("LEAF_START");
    printf("%s\n", leaf->name);
    ll_print(leaf->ll);
    puts("LEAF_END\n");
#else
    puts("LEAF_START");
    printf("%s\n", leaf->name);
    ll_print(leaf->ll);
    puts("LEAF_END\n");
    bt_print(leaf->left);
    bt_print(leaf->right);
#endif
  }
}

// (simple) implementation of an AVL tree

// or use a macro if "inline" is not possible
// get height
inline int height(node * tree)
{
  return (tree == NULL) ? 0 : tree->height;
}

inline int diff_height(node * tree)
{
  return (tree == NULL) ? 0 : height(tree->left) - height(tree->right);
}

// rotation of branch for balancing
node *rot_left(node * tree)
{
  node *right = tree->right;
  // temporary variable for the swap, pardon, rotation
  node *left;

  // rotate
  left = right->left;
  right->left = tree;
  tree->right = left;

  // increment heights
  tree->height = MAX(height(tree->left), height(tree->right)) + 1;
  right->height = MAX(height(right->left), height(right->right)) + 1;

  return right;
}

// rotation of branch for balancing
node *rot_right(node * tree)
{
  node *left = tree->left;
  node *right;

  right = left->right;
  left->right = tree;
  tree->left = right;

  tree->height = MAX(height(tree->left), height(tree->right)) + 1;
  left->height = MAX(height(left->left), height(left->right)) + 1;

  return left;
}

// insert a key/value pair into the tree
int insertion(node ** r, char *key, char *value)
{
  int heightdiff = 0;
  if (*r == NULL) {
    // allocate memory for a new node
    *r = malloc(sizeof(node));
    if (r == NULL) {
      return 0;
    }
    // the caller gave us nothing but pointers to the actual content
    // so allocate memory...
    (*r)->name = malloc(strlen(key) + 1);
    if ((*r)->name == NULL) {
      return 0;
    }
    // and make a copy
    strcpy((*r)->name, key);
    // Must be NULL for ll_sorted_insert() to work
    (*r)->ll = NULL;
    // insert value into a linked list either sorted or
    // or as it comes
#ifdef SORTED_LIST
    ll_sorted_insert(&(*r)->ll, value);
#else
    ll_insert(&(*r)->ll, value);
#endif
    // terminate node
    (*r)->left = NULL;
    (*r)->right = NULL;
    // set height, used for the AVL-tree balancing, to one
    (*r)->height = 1;
  }
  // search a place for the key
  // Checks of returns omitted for clarity
  else if (strcmp(key, (*r)->name) < 0) {
    insertion(&(*r)->left, key, value);
  } else if (strcmp(key, (*r)->name) > 0) {
    insertion(&(*r)->right, key, value);
  } else {
    // insert the data for the key into the linked list
#ifdef SORTED_LIST
    ll_sorted_insert(&(*r)->ll, value);
#else
    ll_insert(&(*r)->ll, value);
#endif
    return 1;
  }

  // And now balance the tree
  // see your textbook or even wikipedia for an explanation
  // (they have colored pictures!)
  (*r)->height = MAX(height((*r)->left), height((*r)->right)) + 1;

  heightdiff = diff_height(*r);

  // left-left
  if (heightdiff > 1 && strcmp(key, (*r)->left->name) < 0) {
    *r = rot_right(*r);
    return 1;
  }
  // right-right
  if (heightdiff < -1 && strcmp(key, (*r)->right->name) > 0) {
    *r = rot_left(*r);
    return 1;
  }
  // left-right
  if (heightdiff > 1 && strcmp(key, (*r)->left->name) > 0) {
    *r = rot_right(*r);
    return 1;
  }
  // right-left
  if (heightdiff < -1 && strcmp(key, (*r)->right->name) < 0) {
    *r = rot_left(*r);
    return 1;
  }
  return 1;
}

// insert data into a linked list. Sorted here...
int ll_sorted_insert(list ** head, char *value)
{
  list *current;
  list *llnode;

  llnode = malloc(sizeof(list));
  if (llnode == NULL) {
    return 0;
  }
  // as with the key, we only got a pointer and need to take care of the
  // memory here
  llnode->data = malloc(strlen(value) + 1);
  if (llnode->data == NULL) {
    return 0;
  }
  strcpy(llnode->data, value);

  if (*head == NULL || strcmp((*head)->data, value) >= 0) {
    llnode->next = *head;
    *head = llnode;
  } else {
    // put before
    current = *head;
    while (current->next != NULL &&
           strcmp(current->next->data, llnode->data) < 0) {
      current = current->next;
    }
    llnode->next = current->next;
    current->next = llnode;
  }
  return 1;
}

// ... and here as it comes in
int ll_insert(list ** head, char *value)
{
  list *llnode;

  llnode = malloc(sizeof(list));
  if (llnode == NULL) {
    return 0;
  }
  llnode->data = malloc(strlen(value) + 1);
  if (llnode->data == NULL) {
    return 0;
  }
  // put in front
  strcpy(llnode->data, value);
  llnode->next = *head;
  *head = llnode;
  return 1;
}

// traverse the tree and delete everything
void deltree(node * tree)
{
  if (tree) {
    deltree(tree->left);
    deltree(tree->right);
    ll_free(tree->ll);
    free(tree->name);
    free(tree);
    tree = NULL;
  }
}

node *search(node ** tree, char *key)
{
  node *tmp;
  if (!(*tree)) {
    // search exhausted (or no tree at all)
    printf("NOT FOUND %s\n", key);
    return NULL;
  } else {
    if (strcmp(key, (*tree)->name) < 0) {
      tmp = search(&(*tree)->left, key);
      return tmp;
    } else if (strcmp(key, (*tree)->name) > 0) {
      tmp = search(&(*tree)->right, key);
      return tmp;
    } else {
      // found, print the linked list
      printf("FOUND %s\n", key);
      ll_print((*tree)->ll);
      return *tree;
    }
  }
}

// Helpers for the linked list

// free the linked list
void ll_free(list * knot)
{
  list *cur;
  while (knot != NULL) {
    cur = knot;
    free(knot->data);
    knot = knot->next;
    free(cur);
  }
}

// print the linked list to stdout
void ll_print(list * knot)
{
  while (knot != NULL) {
    printf("\t%s\n", knot->data);
    knot = knot->next;
  }
}


// search list, not used here
/*
int ll_search(list * knot, char *value)
{
  while (knot != NULL) {
    if(strcmp(knot->data, value) == 0){
      return 1;
    }
    knot = knot->next;
  }
  return 0;
}
*/


// getline() from http://stackoverflow.com/documentation/c/507/files-and-i-o-streams/8274/get-lines-from-a-file-using-getline

#include <errno.h>
#include <stdint.h>

#if !(defined _POSIX_C_SOURCE)
typedef long int ssize_t;
#endif

/* Only include our version of getline() if the POSIX version isn't available. */

#if !(defined _POSIX_C_SOURCE) || _POSIX_C_SOURCE < 200809L

#   if !(defined SSIZE_MAX)
#      define SSIZE_MAX (SIZE_MAX >> 1)
#   endif

ssize_t getline(char **pline_buf, size_t * pn, FILE * fin)
{
  const size_t INITALLOC = 16;
  const size_t ALLOCSTEP = 16;
  size_t num_read = 0;
  if ((NULL == pline_buf) || (NULL == pn) || (NULL == fin)) {
    errno = EINVAL;
    return -1;
  }
  if (NULL == *pline_buf) {
    *pline_buf = malloc(INITALLOC);
    if (NULL == *pline_buf) {
      return -1;
    } else {
      *pn = INITALLOC;
    }
  }

  {
    int c;
    while (EOF != (c = getc(fin))) {
      num_read++;
      if (num_read >= *pn) {
        size_t n_realloc = *pn + ALLOCSTEP;
        char *tmp = realloc(*pline_buf, n_realloc + 1);
        if (NULL != tmp) {
          *pline_buf = tmp;
          *pn = n_realloc;
        } else {
          return -1;
        }
        if (SSIZE_MAX < *pn) {
          errno = ERANGE;
          return -1;
        }
      }
      (*pline_buf)[num_read - 1] = (char) c;
      if (c == '\n') {
        break;
      }
    }
    if (EOF == c) {
      errno = 0;
      return -1;
    }
  }
  (*pline_buf)[num_read] = '\0';
  return (ssize_t) num_read;
}
#endif

Some of the necessary checks are omitted for brevity!

$ gcc-4.9 -O3 -g3  -W -Wall -Wextra  -std=c11  balancbst.c -o balancbst
$ wc  < yelp_academic_dataset_user_alternative.csv
  552275 17307225 93975270
$ valgrind --leak-check=full --show-leak-kinds=all  --track-origins=yes ./balancbst  yelp_academic_dataset_user_alternative.csv
==9100== Memcheck, a memory error detector
==9100== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==9100== Using Valgrind-3.10.1 and LibVEX; rerun with -h for copyright info
==9100== Command: ./Yelp1M yelp_academic_dataset_user_alternative.csv
==9100==
Processing line 1000
Processing line 2000
...
Processing line 550000
Processing line 551000
Processing line 552000
Searching...
aron
FOUND aron
    yelping_since: 2008 01 || votes:  funny 17 useful 19 cool 15  || name: aron || type: user || compliments:  funny 2 plain 4 writer 3 note 4 hot 2 cool 1  || fans: 2 || average_stars: 4 28 || review_count: 45 ||
Aron        
FOUND Aron
    yelping_since: 2011 01 || votes:  funny 2 useful 14 cool 4  || name: Aron || type: user || compliments:  note 1  || fans: 0 || average_stars: 3 91 || review_count: 35 || 
    yelping_since: 2009 07 || votes:  funny 0 useful 5 cool 2  || name: Aron || type: user || fans: 0 || average_stars: 3 6 || review_count: 5 || 
    yelping_since: 2012 07 || votes:  funny 0 useful 4 cool 0  || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 5 || 
    yelping_since: 2011 03 || votes:  funny 1 useful 16 cool 3  || name: Aron || type: user || fans: 0 || average_stars: 3 75 || review_count: 8 || 
    yelping_since: 2012 10 || votes:  funny 0 useful 2 cool 1  || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 1 || 
    yelping_since: 2013 07 || votes:  funny 0 useful 0 cool 0  || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 1 || 
    yelping_since: 2010 01 || votes:  funny 0 useful 0 cool 0  || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 2 || 
    yelping_since: 2012 11 || votes:  funny 0 useful 5 cool 1  || name: Aron || type: user || fans: 0 || average_stars: 4 29 || review_count: 14 || 
    yelping_since: 2011 11 || votes:  funny 8 useful 9 cool 2  || name: Aron || type: user || fans: 0 || average_stars: 2 57 || review_count: 21 || 
    yelping_since: 2012 03 || votes:  funny 0 useful 9 cool 1  || name: Aron || type: user || compliments:  plain 1  || fans: 2 || average_stars: 4 92 || review_count: 21 || 
    yelping_since: 2008 03 || votes:  funny 11 useful 44 cool 13  || name: Aron || type: user || compliments:  profile 1 writer 1 note 2 hot 2 cool 1 more 1  || fans: 0 || average_stars: 3 7 || review_count: 19 || 
    yelping_since: 2009 05 || votes:  funny 0 useful 8 cool 1  || name: Aron || type: user || fans: 0 || average_stars: 3 83 || review_count: 6 || 
    yelping_since: 2015 02 || votes:  funny 0 useful 0 cool 0  || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 2 || 
    yelping_since: 2010 11 || votes:  funny 9 useful 20 cool 16  || name: Aron || type: user || compliments:  writer 1  || fans: 0 || average_stars: 3 71 || review_count: 6 || 
    yelping_since: 2009 10 || votes:  funny 12 useful 83 cool 12  || name: Aron || type: user || compliments:  funny 1  || fans: 0 || average_stars: 3 8 || review_count: 42 || 
    yelping_since: 2014 10 || votes:  funny 17 useful 57 cool 47  || name: Aron || elite:  2015  || type: user || compliments:  funny 3 cute 2 plain 17 writer 9 list 1 note 3 photos 5 hot 3 more 1 cool 15  || fans: 6 || average_stars: 4 39 || review_count: 32 || 
    yelping_since: 2012 04 || votes:  funny 0 useful 0 cool 0  || name: Aron || type: user || fans: 0 || average_stars: 5 0 || review_count: 2 || 
    yelping_since: 2012 11 || votes:  funny 3 useful 4 cool 1  || name: Aron || type: user || fans: 0 || average_stars: 4 57 || review_count: 7 || 
    yelping_since: 2014 08 || votes:  funny 2 useful 6 cool 3  || name: Aron || type: user || fans: 0 || average_stars: 3 5 || review_count: 4 || 
    yelping_since: 2010 03 || votes:  funny 121 useful 465 cool 157  || name: Aron || elite:  2012 2013 2014 2015  || type: user || compliments:  funny 2 plain 12 writer 9 note 2 photos 1 hot 7 more 1 cool 5  || fans: 10 || average_stars: 3 78 || review_count: 233 || 
    yelping_since: 2014 08 || votes:  funny 3 useful 37 cool 5  || name: Aron || type: user || fans: 1 || average_stars: 3 93 || review_count: 14 || 
    yelping_since: 2012 04 || votes:  funny 0 useful 5 cool 0  || name: Aron || type: user || fans: 0 || average_stars: 1 0 || review_count: 1 || 
    yelping_since: 2011 01 || votes:  funny 123 useful 492 cool 213  || name: Aron || elite:  2012 2013 2014 2015  || type: user || compliments:  profile 3 cute 3 funny 3 plain 27 writer 13 list 1 note 8 hot 16 more 7 cool 27  || fans: 12 || average_stars: 4 24 || review_count: 378 ||
qqqqqqqqqqqqqqqqq
NOT FOUND qqqqqqqqqqqqqqqqq
exi==9100== 
==9100== HEAP SUMMARY:
==9100==     in use at exit: 0 bytes in 0 blocks
==9100==   total heap usage: 1,212,396 allocs, 1,212,396 frees, 101,900,376 bytes allocated
==9100== 
==9100== All heap blocks were freed -- no leaks are possible
==9100== 
==9100== For counts of detected and suppressed errors, rerun with: -v
==9100== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)t
$ time echo "exit" | ./balancbst yelp_academic_dataset_user_alternative.csv
Processing line 1000
Processing line 2000
...
Processing line 551000
Processing line 552000
Searching...

real    0m1.391s
user    0m1.279s
sys 0m0.097s
deamentiaemundi
  • 5,502
  • 2
  • 12
  • 20