0

So I have a 2d array of structs that I am storing data from a csv file. I am reading in the strings from the file then I want to assign their value in the 2d array as that string. So for example position [0][0] would be "red" and position [1][1] would be "blue" or something. The problem is that I am reading in the characters from the file and storing them in a temp string then I want to assign that temp string value to the current position I'm on. So I have the temp as "red" and I want [0][0] to be "red" then it reads more from the file and temp is now "blue" and I want [0][1] to be "blue". When I assign [0][0]=temp then [0][1]=temp they write over each other(since it is a pointer). I have tried strdup(temp) but that does not seem to fix the problem either they still re write over each other sometimes. I know I could use strcpy but for strcpy I have to have another string to copy it to so I tried that with just making a new char array each time, but that does not work either. It still gets written over. I know I could fix this problem if each time I assigned a value I created a new char array with a new name and pointed to that char array, but this is for a very large csv file so then I would have to create about 10 million char arrays and I do not know how to do that, and it seems like there should be a simpler way.

Any help is very much appreciated thanks!

struct node {
    char* value;
};

char temp[500];
struct node ** arrayofnodes[35];
for(int i=0; i<35; i++) {
    arrayofnodes[i] = malloc(test * sizeof (struct node));
    for(int j=0; j<test; j++) arrayofnodes[i][j] = malloc(sizeof (struct node));
}


//(all of this below happens inside a function because it is going to be called like 10 million times)
    arrayofnodes[row][count]->value=temp; //way 1
    arrayofnodes[row][count]->value=strdup(temp); //way 2
    char fun[500];
    strcpy(fun,temp);
    arrayofnodes[row][count]->value=fun;//way 3
Artyer
  • 31,034
  • 3
  • 47
  • 75
  • Recommend you read this. https://stackoverflow.com/questions/17258647/why-is-it-safer-to-use-sizeofpointer-in-malloc – MFisherKDX Mar 13 '19 at 21:07
  • 1
    The `strdup()` approach is a viable one, and the only viable one of the three presented. If it's not working for you then we would need to consider a [mcve] to do more than guess about why not. – John Bollinger Mar 13 '19 at 21:28

2 Answers2

1

arrayofnodes[i] = malloc(N * sizeof (struct node)); makes enough space for N structs. What you want is space for N struct pointers.

However I don't think that's your main problem. Only "way 2" is valid - you do need to create storage for every new string you read. "way 1" is re-using the temp storage, and "way 3" is re-using the 'fun' storage, which will be invalid as soon as you exit the function.

If "way 2" is not working for you, I'd look at the code generating row and count. Are those indexes always less than 35 and test respectively?

AShelly
  • 34,686
  • 15
  • 91
  • 152
0

When using malloc or strdup, you need extra 16 bytes of space for management.
When you allocate a lot of small memory, this extra space also takes up a lot of memory.
So consider own managing memory, or tcmalloc, jemalloc

Below is the test data:

gcc -otest test.c -Wall -O3 -g

[test_strdup] cost time: 1085247 us, use memory: 839.81 MB
[test_optimized] cost time: 394635 us, use memory: 411.71 MB

gcc -otest test.c -Wall -O3 -g -ltcmalloc

[test_strdup] cost time: 627160 us, use memory: 461.07 MB
[test_optimized] cost time: 397938 us, use memory: 422.85 MB

gcc -otest test.c -Wall -O3 -g -ljemalloc

[test_strdup] cost time: 749875 us, use memory: 481.77 MB
[test_optimized] cost time: 330825 us, use memory: 451.96 MB

Below is the test code

Just to compare the memory allocation, there is a memory leak in the test code.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/sysinfo.h>

#define ROWS 35
#define COLS (1000*10000/ROWS)
#define MB   (1024*1024)
#define TEST_STR "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

struct node {
    char *value;
};

long current_time()
{
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return tv.tv_sec * 1000000L + tv.tv_usec;
}

long current_usemem()
{
    FILE *fp;
    long resident = 0;

    fp = fopen("/proc/self/statm", "r");
    if (fp) {
        if (fscanf(fp, "%*s %ld ", &resident) != 1)
            resident = 0;
        fclose(fp);
    }
    resident *= 4096;

    return resident;
}

void test_strdup()
{
    char temp[500];
    struct node **arrayofnodes[ROWS];
    int i, j;
    long start_time, end_time;
    long start_usemem, end_usemem;

    strcpy(temp, TEST_STR);
    start_usemem = current_usemem();
    start_time = current_time();

    for(i = 0; i < ROWS; i++) {
        arrayofnodes[i] = (struct node **)malloc(COLS * sizeof(struct node *));
        for(j = 0; j < COLS; j++) {
            arrayofnodes[i][j] = (struct node *)malloc(sizeof (struct node));
        }
    }

    for(i = 0; i < ROWS; i++) {
        for(j = 0; j < COLS; j++) {
            arrayofnodes[i][j]->value = strdup(temp);
        }
    }

    end_time = current_time();
    end_usemem = current_usemem();
    printf("[%s] cost time: %ld us, use memory: %.2f MB\n",
           __FUNCTION__, end_time - start_time,
           (end_usemem - start_usemem) / 1024.0 / 1024);
}

struct memory_chunk {
    struct memory_chunk *next;
    char *cur;
    char *end;
    char buf[0];
};

struct memory_pool {
    struct memory_chunk *head;
};

void *pool_alloc(struct memory_pool *pool, size_t size)
{
    void *ret;
    struct memory_chunk *chunk;

    chunk = pool->head;
    if (chunk == NULL || chunk->cur + size >= chunk->end) {
        size_t len = (size < MB ? MB : size + sizeof(*chunk));
        chunk = (struct memory_chunk *)malloc(len);
        chunk->next = pool->head;
        chunk->end = (char *)chunk + len;
        chunk->cur = chunk->buf;
        pool->head = chunk;
    }

    ret = chunk->cur;
    chunk->cur += size;
    return ret;
}

char *pool_strdup(struct memory_pool *pool, const char *s)
{
    size_t size = strlen(s) + 1;
    void *ret = pool_alloc(pool, size);
    memcpy(ret, s, size);
    return ret;
}

void test_optimized()
{
    char temp[500];
    struct node ***arrayofnodes;
    int i, j;
    long start_time, end_time;
    long start_usemem, end_usemem;
    struct memory_pool pool = {NULL};

    strcpy(temp, TEST_STR);
    start_usemem = current_usemem();
    start_time = current_time();

    arrayofnodes = (struct node ** *)pool_alloc(&pool, ROWS * sizeof(struct node **));
    for(i = 0; i < ROWS; i++) {
        arrayofnodes[i] = (struct node **)pool_alloc(&pool, COLS * sizeof(struct node *));
        for(j = 0; j < COLS; j++) {
            arrayofnodes[i][j] = (struct node *)pool_alloc(&pool, sizeof(struct node));
        }
    }

    for(i = 0; i < ROWS; i++) {
        for(j = 0; j < COLS; j++) {
            arrayofnodes[i][j]->value = pool_strdup(&pool, temp);
        }
    }

    end_time = current_time();
    end_usemem = current_usemem();
    printf("[%s] cost time: %ld us, use memory: %.2f MB\n",
           __FUNCTION__, end_time - start_time,
           (end_usemem - start_usemem) / 1024.0 / 1024);

}

int main()
{
    test_strdup();
    test_optimized();

    return 0;
}
ccxxshow
  • 844
  • 6
  • 5