1

I'm making an application that has to deliver very big reports and I encounter that the slowest part of the process is when it has to create the final JSON string, so I tried different things to speed it up and see which was the fastest

I started with += and + operation over a string. Then I change all the + with += and it improved a little. And then I tried something more bold which is using a char * buffer and manage the concatenation myself, even made my own INT andFLOAT to string functions. And it improved the time by a 30% but the process still takes about 15 segs for big reports and I would like to reduce that time to the minimum so I will paste my code here and see if there is any suggestion in how improved the performance.

char * itoa2(UINT num, char * str)
{
    if (num == 0)
    {
        *str++ = '0';
        return str;
    }

    // Process individual digits
    UCHAR nsiz = (UCHAR)log10(num) + 1;
    str += nsiz;
    while (num != 0)
    {
        *str-- = (num % 10) + '0';
        num = num/10;
    }
    return str + nsiz + 1;
}

char * ftoa2(float num, char * str) {
    int intg = (UINT)num;
    int dec = (UINT)((num - intg) * 1000);



    // Process individual digits
    UCHAR nsiz = (UCHAR)log10(intg) + 1;
    str += nsiz - 1;
    if (dec > 0)
    {
        nsiz += (UCHAR)log10(dec) + 2;
        str += nsiz - 1;
        while (dec != 0) {
            *str-- = (dec % 10) + '0';
            dec = dec / 10;
        }
        *str-- = '.';
    }

    if (intg == 0)
    {
        *str-- = '0';
    } else {
        while (intg != 0) {
            *str-- = (intg % 10) + '0';
            intg = intg / 10;
        }
    }


    return str + nsiz + 1;
}

void Formatter::jsonFormat2() {

    char * result = new char[1024 * 1024 * 1024]; //TODO: Use small buffers instead of this

    char * pos;

    string tmp;

    pos = result;

    (*pos++) = '[';

    const char * key;
    int keyPos;
    const Reporter::Entry * entry;
    set<Field *, Field::cmpOrder> * grouped = params->getGrouped();

    bool byDay, byMonth, byYear;
    time_t ts = 0;
    struct tm * lt;

    char type;

    FieldData::fieldVariant fkey(0, nullptr);
    FieldData::fieldVariant * fvar;

    byDay = params->getByDay();
    byMonth = params->getByMonth();
    byYear = params->getByYear();


    for (auto& it : *report) {
        key = it.first;
        entry = it.second;
        keyPos = 0;

        (*pos++) = '{';

        if (byDay || byMonth || byYear) {
            ts = *(time_t *)key;
            keyPos += sizeof(time_t);
            lt = gmtime(&ts);
        }

        if (byDay) {
            memcpy(pos, "\"day\":\"", 7);
            pos+=7;
            pos = itoa2(lt->tm_year + 1900, pos);
            (*pos++) = '-';
            if (lt->tm_mon+1 < 10) (*pos++) = '0';
            pos = itoa2(lt->tm_mon+1, pos);
            (*pos++) = '-';
            if (lt->tm_mday < 10) (*pos++) = '0';
            pos = itoa2(lt->tm_mday, pos);
            (*pos++) = '\"'; (*pos++) = ',';

            memcpy(pos, "\"wday\":\"", 8);
            pos+=8;

            memcpy(pos, &weekDayC[lt->tm_wday * 3], 3);
            pos+=3;

            (*pos++) = '\"'; (*pos++) = ',';
        }
        if (byMonth) {
            memcpy(pos, "\"month\":\"", 9);
            pos+=9;
            pos = itoa2(lt->tm_year + 1900, pos);
            (*pos++) = '-';
            if (lt->tm_mon+1 < 10) (*pos++) = '0';
            pos = itoa2(lt->tm_mon, pos);
            (*pos++) = '\"'; (*pos++) = ',';
        }
        if (byYear) {
            memcpy(pos, "\"year\":\"", 8);
            pos+=9;
            pos = itoa2(lt->tm_year + 1900, pos);
            (*pos++) = '\"'; (*pos++) = ',';
        }


        for (auto& field : *grouped) {
            (*pos++) = '\"';

            tmp = field->getName();
            memcpy(pos, tmp.c_str(), tmp.length());
            pos+=tmp.length();
            memcpy(pos, "\":\"", 3);
            pos+=3;

            type = field->getType();
            if (type == INT8) {
                pos = itoa2(*(key + keyPos), pos);
            } else if (type == INT16) {
                pos = itoa2(*(USHORT *)(key + keyPos), pos);
            } else if (type == INT32) {
                pos = itoa2(*(UINT *)(key + keyPos), pos);
            } else if (type == INT64) {
                pos = itoa2(*(ULONG *)(key + keyPos), pos);
            } else if (type == INT128) {
                pos = itoa2(*(UXLONG *)(key + keyPos), pos);
            } else if (type == CHAR2) {
                memcpy(pos, key + keyPos, 2);
                pos+=2;
            }

            (*pos++) = '\"'; (*pos++) = ',';


            for (auto& fieldData : *field->getFieldData()) {
                fkey.type = field->getType();

                if (fkey.type == CHAR2) {
                    fkey.data = new string((char *) (key + keyPos), 2);
                } else
                    fkey.data = (void *)key + keyPos;

                fvar = fieldData->find(&fkey);

                (*pos++) = '\"';

                tmp = fieldData->colName;
                memcpy(pos, tmp.c_str(), tmp.length());
                pos+=tmp.length();
                memcpy(pos, "\":\"", 3);
                pos+=3;
                if (fvar != nullptr) tmp = toString(fvar); else tmp = "null";
                memcpy(pos, tmp.c_str(), tmp.length());
                pos+=tmp.length();
                memcpy(pos, "\",", 2);
                pos+=2;

            }
            keyPos += field->getBytes();
        }

        memcpy(pos, "\"imps\":\"", 8);
        pos+=8;
        pos = itoa2(entry->imps, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"impsn\":\"", 9);
        pos+=9;
        pos = itoa2(entry->impsn, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"impsu\":\"", 9);
        pos+=9;
        pos = itoa2(entry->impsu, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"clicks\":\"", 10);
        pos+=10;
        pos = itoa2(entry->clicks, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"clicksu\":\"", 11);
        pos+=11;
        pos = itoa2(entry->clicksu, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"revenue\":\"", 11);
        pos+=11;
        pos = ftoa2(entry->revenue, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"revenue_2\":\"", 13);
        pos+=13;
        pos = ftoa2(entry->revenue_2, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"revenue_3\":\"", 13);
        pos+=13;
        pos = ftoa2(entry->revenue_3, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"convs\":\"", 9);
        pos+=9;
        pos = itoa2(entry->convs, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"convs_2\":\"", 11);
        pos+=11;
        pos = itoa2(entry->convs_2, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        memcpy(pos, "\"convs_3\":\"", 11);
        pos+=11;
        pos = itoa2(entry->convs_3, pos);
        (*pos++) = '\"'; (*pos++) = ',';

        (*pos++) = '}'; (*pos++) = ',';
        pos+=3;
    }
    pos--;

    (*pos++) = ']';
    (*pos++) = '\0';

    if (params->getVerbose()) cout << string(result, pos - result);
}
Ziezi
  • 6,375
  • 3
  • 39
  • 49
Yind
  • 335
  • 3
  • 17
  • maybe multithreading could be an option? – Yind Sep 21 '15 at 21:59
  • 1
    If you're concatenating a lot of strings, consider using a rope data structure. It avoids copying during concatenation, but access to individual characters is slower. – Brian Bi Sep 21 '15 at 21:59
  • I'm surprised that these `itoa2` and `ftoa2` functions helped speeding things up. Are you sure you've leveraged all compiler and library optimizations already? – 5gon12eder Sep 21 '15 at 22:02
  • @5gon12eder they helped because they operate directly into the buffer avoiding extra copies. – Yind Sep 21 '15 at 22:04
  • **measure** the code. and don't do silly tings like allocating a GB buffer. – Cheers and hth. - Alf Sep 21 '15 at 22:06
  • @Brian but the insertions are always at the end of the file that's why I thought of a char buffer and a pointer to know the current position, I don't figure out how the rope structure would help to speed it up but I will look it up and do some tests – Yind Sep 21 '15 at 22:06
  • 2
    You have a lot of pointers and indirection. That can really slow down your code as it trashes the cache. I would try to get everything you need to process into some sort of flat array like a vector and then traverse that as it is very cache friendly. – NathanOliver Sep 21 '15 at 22:08
  • @Cheersandhth.-Alf as you can see in the code I have to make subbuffers for that but as my testings are over the same report which output of string is about 960mb I set it fixed just to focus on the optimization, later I will improve the buffering part – Yind Sep 21 '15 at 22:12
  • 1
    You have `new` in your code, that slows things down dramatically. If you store the JSON in a file, I would construct smaller pieces on the stack and store and discard them. – alain Sep 21 '15 at 22:12
  • 2
    The most efficient method to concatenate strings is to remember the endpoint of the string. Finding the end of the string takes up a lot of execution time. – Thomas Matthews Sep 21 '15 at 22:14
  • @ThomasMatthews yes I do that with the pointer 'pos' so I add string into the buffer and move the pos pointer as many chars I added, when I have to add small string I do it like (*pos++) = ']' – Yind Sep 21 '15 at 22:16
  • Profile your code and find where the hotspots still are. My first thought is that buffering everything in memory is a bottleneck. – Jason Sep 21 '15 at 22:40
  • 1
    I found this summer time ago, make sure to read the whole answer, the code at the end helps a lot. http://stackoverflow.com/a/18899027/1116364 – Daniel Jour Sep 21 '15 at 23:02
  • Memory leak 1GB + some in lower scale , ++ / -- mix of operations ... I feel such code cannot be safe (for example proper JSON character quoting, unsafe mixed assigment fkey.data allocated c++ string and C char* ) – Jacek Cz Sep 22 '15 at 00:12
  • Get rid of the `string tmp`. Rewrite the variant's `toString` to directly write to the buffer - like `itoa2` does. – Paul Groke Sep 22 '15 at 02:30

1 Answers1

1

In the same method, Formatter::jsonFormat2(), you have

char * result = new char[1024 * 1024 * 1024];

at the top, and

if (params->getVerbose()) cout << string(result, pos - result);

at the bottom.

Which means, you not only have a 1GB memory leak every time you call this method, you also do a lot of work for nothing if getVerbose() happens to return false (which you could have checked right at the beginning).

Further, if you are going to write everything to std::cout (or to any stream for that matter), consider writing the various fragments directly, using the stream insertion syntax (the << operator).

arayq2
  • 2,502
  • 17
  • 21
  • And if you care about performance with `std::cout`, there are things you can do to greatly speed performance over the obvious; e.g. turning off synchronization with stdio, and not flushing until you want to guarantee output has been written. Probably other things too; would be worth searching for on its own or asking as a separate question. –  Sep 22 '15 at 02:24
  • if (params->getVerbose()) cout << string(result, pos - result); is just for debuging issues.. the real thing will be save in a file – Yind Sep 22 '15 at 08:42