4

I'm writing a program to my college classes. It's an implementation of dynamic programming algorithm for simple version of scheduling tasks on 2 processors. Cause it's a memory wasteful method, I thought of some improvements to it. For example, one don't have to store whole S x n rectangular array, where S is sum of times of all tasks and n is number of tasks. Because in first iterations of algorithm data will be stored only in small index values of n axis, I thought I can make my array a triangle, i.e. each next sub-array is a certain amount of elements longer.

Then I looked in Task Manager for memory usage and I was shocked. Version with rectangular array took 980KBs. Version with triangle array (so the smaller one) took almost 15MBs! Maybe I don't know something about ways of memory allocation used by system, or I have delusions. Or I made some stupid mistake in my code. But I bet that I don't know something. Can someone enlight me?

Here is my code:

#include <iostream>
#include <fstream> 
#include <conio.h>
#include <stack>

using namespace std;

void readTasks(char* filename, int*& outTaskArray, int& outTaskCount)
{
    ifstream input = ifstream();
    input.open(filename, ios::in);

    input >> outTaskCount;
    outTaskArray = new int[outTaskCount];
    for (int i = 0; i < outTaskCount; ++i)
    {
        input >> outTaskArray[i];
    }

    input.close();
}

void coutTasks(int* taskArray, int taskCount)
{
    cout << taskCount << " tasks:\n";
    for (int i = 0; i < taskCount; ++i)
    {
        cout << i << ": " << taskArray[i] << endl;
    }
}

void Scheduling2(int* taskArray, int taskCount, int memorySaving, 
    stack<int>*& outP1Stack, stack<int>*& outP2Stack)
{
    bool** scheduleArray = new bool*[taskCount];
    int sum;

    // I know that construction below is ugly cause of code repetition.
    // But I'm rather looking for performance, so I try to avoid e.g. 
    // checking the same condition too many times.
    if (memorySaving == 0)
    {   
        sum = 0;
        for (int i = 0; i < taskCount; ++i)
        {
            sum += taskArray[i];
        }

        scheduleArray[0] = new bool[sum + 1];
        for (int j = 0; j < sum + 1; ++j)
        {
            scheduleArray[0][j] = j == 0 || j == taskArray[0];
        }
        for (int i = 1; i < taskCount; ++i)
        {
            scheduleArray[i] = new bool[sum + 1];
            for (int j = 0; j < sum + 1; ++j)
            {
                scheduleArray[i][j] = scheduleArray[i - 1][j] || 
                    j >=  taskArray[i] && 
                    scheduleArray[i - 1][j - taskArray[i]];
            }
        }

        getch(); // I'm reading memory usage from Task Manager when program stops here

        int halfSum = sum >> 1;
        while (!scheduleArray[taskCount - 1][halfSum]) --halfSum;

        for (int i = taskCount - 1; i > 0; --i)
        {
            if (scheduleArray[i - 1][halfSum])
                outP1Stack->push(i);
            else if (scheduleArray[i - 1][halfSum - taskArray[i]])
            {
                outP2Stack->push(i);
                halfSum -= taskArray[i];
            }
        }
        if (halfSum) outP2Stack->push(0);
            else outP1Stack->push(0);
    }
    else if (memorySaving == 1)
    {   
        sum = 0;
        for (int i = 0; i < taskCount; ++i)
        {
            sum += taskArray[i];

            scheduleArray[0] = new bool[sum + 1];
            for (int j = 0; j < sum + 1; ++j)
            {
                scheduleArray[0][j] = j == 0 || j == taskArray[0];
            }
            for (int i = 1; i < taskCount; ++i)
            {
                scheduleArray[i] = new bool[sum + 1];
                        for (int j = 0; j < sum + 1; ++j)
                {
                    scheduleArray[i][j] = scheduleArray[i - 1][j] || 
                        j >= taskArray[i] && 
                        scheduleArray[i - 1][j - taskArray[i]];
                }
            }
        }

        getch(); // I'm reading memory usage from Task Manager when program stops here

        int halfSum = sum >> 1;
        while (!scheduleArray[taskCount - 1][halfSum]) --halfSum;

        for (int i = taskCount - 1; i > 0; --i)
        {
            if (scheduleArray[i - 1][halfSum])
                outP1Stack->push(i);
            else if (scheduleArray[i - 1][halfSum - taskArray[i]])
            {
                outP2Stack->push(i);
                halfSum -= taskArray[i];
            }
        }
        if (halfSum) outP2Stack->push(0);
        else outP1Stack->push(0);
    }

    for (int i = 0; i < taskCount; ++i)
    {
        delete[] scheduleArray[i];
    }
    delete[] scheduleArray;
}

int main()
{
    char* filename = "input2.txt";
    int memorySaving = 0; //changing to 1 in code when testing memory usage

    int* taskArray; // each number in array equals time taken by task
    int taskCount;
    readTasks(filename, taskArray, taskCount);

    coutTasks(taskArray, taskCount);

    stack<int>* p1Stack = new stack<int>();
    stack<int>* p2Stack = new stack<int>();

    Scheduling2(taskArray, taskCount, memorySaving, p1Stack, p2Stack);

    cout << "\np1: ";
    while (p1Stack->size())
    {
        cout << p1Stack->top() << ", ";
        p1Stack->pop();
    }
    cout << "\np2: ";
    while (p2Stack->size())
    {
        cout << p2Stack->top() << ", ";
        p2Stack->pop();
    }

    delete p1Stack;
    delete p2Stack;

    delete[] taskArray;
    return 0;
}
Sushi271
  • 544
  • 2
  • 8
  • 22
  • Could you shorten the way you're creating the arrays into a few lines? That's all that matters. – Pubby Nov 29 '12 at 04:33
  • 1
    Sidenotes: why would you make memorySaving an int and test against 0 instead of making it a bool and just checking `if (memorySaving)` or `if (not memorySaving)`? Why would you express division by two through a bit shift operation? Why are you allocating raw arrays instead of using `std::vector`? Why are you `new`-ing your stacks as pointers rather than declaring them normally and passing them by reference? Why use char* instead of `std::string`? – HostileFork says dont trust SE Nov 29 '12 at 04:44
  • @HostileFork: memorySaving is going to have more values than just two. Isn't a bit shift operation faster than division by two? Has std::vector O(1) access time to every element by index, as array has? As for pointers to stacks - isn't dynamic memory allocation generally better, as I can decide when to release memory myself? Why should I use std::string, if I don't use any functions it supports, and char* is as good as std::string here? – Sushi271 Nov 29 '12 at 10:55
  • @Pubby: do you suggest I should create my whole scheduleArray first and than fill it later? I was just lazy-creating each next row of it, thinking that I'll save time that way. Wouldn't allocating whole array at the beginning be slower (because I'm iterating through each row twice)? – Sushi271 Nov 29 '12 at 11:14
  • @HostileFork: OK, now I digged and found info that complexity of std::vector::operator[] is indeed O(1). But why using vector would help me in this case? I don't need to use any of its functions. – Sushi271 Nov 29 '12 at 11:28

2 Answers2

2

Damn it, I'm blind. I have a damn big memory leak and I didn't see that. I just looked into the part executed, when memorySaving == 1 and noticed that I'm allocating (god knows why) every row of my array taskCount times... It's completely not what I meant, when I was writing this. Well. It was late night.

Sorry for bothering you all. Question should be closed.

Sushi271
  • 544
  • 2
  • 8
  • 22
1

Since my suggestions would have fixed your problem had you taken them (as I suspected), I'll make them an answer!

But why using vector would help me in this case? I don't need to use any of its functions.

Yes, you did! You needed one of its most important "functions"...the automatic management of the array memory block. Note that if your declaration was vector< vector<bool> > scheduleArray, you couldn't have had a leak. There wouldn't have been any new or delete in your code...how would it be possible?

Other benefits of using vector:

  • You can't accidentally do a delete instead of delete[] on the pointer

  • It can do bounds checking (if you enable it, which you should in your debug builds...try a test with just vector<int> v; v[0] = 1; to make sure you have this turned on.)

  • Vector knows how many elements it is holding, so you don't run into situations where you have to pass parameters like taskCount. This eliminates one more place where you have the opportunity to make a mistake in your bookkeeping. (e.g. what if you remove an element from the vector and forget to reflect that in the count variable?)

Answers to your comments:

Isn't a bit shift operation faster than division by two?

No.

If you're coding in raw assembly, then sometimes it might be, on certain architectures. But for the most part integer division and bit shift costs a whole cycle either way. And I'm sure some wacky architecture exists out there that can divide faster than it can shift.

Remember that this is C++, not assembly. It's best to keep your code clear and trust the optimizer to do the right thing. For instance what if SHIFT and DIV both take one instruction cycle, but you can get more speed by alternating which you use if you're in a tight loop due to something about the pipeline?

memorySaving is going to have more values than just two.

Then use an enumerated type.

Has std::vector O(1) access time to every element by index, as array has?

Yes, as you discovered. There is a small amount of per-vector overhead in terms of storage (which varies by compiler). But as I mentioned above this is a small price to pay. Also, you were probably usually tracking the length in some variable outside your array in the first place.

As for pointers to stacks - isn't dynamic memory allocation generally better, as I can decide when to release memory myself?

It's generally not better, precisely for that reason. If you're responsible for deciding when to release memory yourself, then you can drop the ball on managing that responsibility.

So wherever possible, you should let C++'s scoping handle the lifetime of your objects. Sometimes making a dynamic object that survives outside of the scope of its creation simply can't be avoided, but that's why Modern C++ has smart pointers. Use 'em!

http://en.wikipedia.org/wiki/Smart_pointer

For instance, your readTasks would be cleaner and safer if it returned a shared_ptr< vector<int> >.

Why should I use std::string, if I don't use any functions it supports, and char* is as good as std::string here?

To get into the habit of not using it for reasons that parallel the above arguments for vector. Bounds-checking, for instance. Also, quiz question: what do you think will happen if you wanted to capitalize "input2.txt" and said filename[0] = 'I';?

When you're done with implementing all my suggestions, then you can go look at boost::dynamic_bitset. :-)

Community
  • 1
  • 1
  • I always wondered how `vector` is organized inside. Because elements of `vector` can be pushed or popped in any time, than it has to change it size dynamically. Which comes down to reallocation of whole array inside `vector` - and that costs time. – Sushi271 Nov 29 '12 at 13:39
  • Pointers to stacks - OK, but let's say I can manage memory correctly i.e. without leaks. Than I can release some resources a long before C++ would do this, cause I, and only I, personally know that I won't need them anymore. Isn't that true? As for `filename[0] = 'I'` - I expect that program will get a memory cell pointed by `filename` and put a 0x49 number (which is an ASCII code for 'I'). Isn't that what's gonna happen? – Sushi271 Nov 29 '12 at 13:40
  • 1
    @Sushi271 It's [undefined behavior](http://en.wikipedia.org/wiki/Undefined_behavior). The compiler is free from being required to check for this happening (directly or indirectly if that pointer is passed to somewhere else that happens to modify it). Therefore you cannot predict the consequences. You can address this by making it a `char const *`, but it's just another example of not doing oneself any favors by sticking to low-level C-like practices. Easy to forget. http://stackoverflow.com/questions/10001202/is-modification-of-string-literals-undefined-behaviour-according-to-the-c89-stan – HostileFork says dont trust SE Nov 29 '12 at 13:40
  • 1
    @Sushi271 Vectors have both a "size" and a "capacity". If you are dealing with a case where you know the size ahead of time then you can [tell vector the capacity you want](http://www.cplusplus.com/reference/vector/vector/capacity/) if you or so inclined. Or you really want to tie your hands so that you can never add or remove elements even if you decide you want to, then you can use `std::array`: http://stackoverflow.com/questions/4424579/c-stdvector-vs-stdarray – HostileFork says dont trust SE Nov 29 '12 at 13:44
  • Hmm. I've been though that when I write `char* arr` and `char arr[]` it's the same thing, and it only looks other way to programmer. I also thought that when I do `filename[i]`, in fact I do `*(filename + i)`. Why would that be undefined behavior? I don't get it. Address is valid. We are just referencing to a memory cell of certain address. Edit: OK, I misread that link from SO. So it's undefined behavior simply because strings are **designed** to be unmodifiable? Which was done for performance reasons? – Sushi271 Nov 29 '12 at 13:48
  • 1
    @Sushi271 `char*` vs `char[]` [is covered elsewhere](http://stackoverflow.com/questions/1880573/c-difference-between-char-var-and-char-var), and yes--UB gives compilers wiggle room to optimize. Regarding your desire to throw data away earlier than the end of a scope, don't use dynamic allocation. Try to make your design efficient without it by letting the scopes work for you, and if that ultimately fails then find those particular cases and use [boost::optional< stack >](http://www.boost.org/libs/optional/). Pass by reference to writers, and when it's not needed assign `none` to it. – HostileFork says dont trust SE Nov 29 '12 at 13:58
  • Now I remember that once, when I tried to perform `delete[]` on some `char*` variable defined, let's say: `char* c = "xxx";` program crashed. At the same time when I wrote: `char* c = new char[4]; c[0] = 'x'; c[1] = 'x'; c[2] = 'x'; c[3] = '\0';` than deletion worked fine. That brings me now to conclusion that there two objects: string (as char array) and string literal, and they are pretty different things. Am I right? – Sushi271 Nov 29 '12 at 14:09
  • As for `dynamic_bitset` - I planned to do something like this myself to save even more memory, but it was too complex to create in short time I had. It's good to know that such a structure already exists. And I'm reading about `boost::optional` now. Why nobody showed me these things when they thaught me C++?... [rethorical question] ;) I'm pretty sure there are much more useful things already wrote & ready to use which I don't know about. – Sushi271 Nov 29 '12 at 14:12
  • 1
    *"Why nobody showed me these things when they thaught me C++?"* -> William Kamkwamba similarly asked ["Where was Google all this time?"](http://blog.metaprinter.com/2009/10/where-was-google-all-this-time-great-story-about-information-dissemination/) But don't worry, you're in better hands now. :-) The story on `char*`, `char[]` and `"xxx"` is covered here if you search a bit, lots of nuance. With `char const * c1 = "xxx"; char const * c2 = "xxx";` you could see `c1 == c2`, or `c1 != c2`...it's up to the compiler. But why don't you accept this answer and you can research + ask new questions. – HostileFork says dont trust SE Nov 29 '12 at 14:18
  • To google in Google you must first know **what** to google... you cannot expect to look for sun from someone, who whole life was living deep in cave. – Sushi271 Nov 29 '12 at 14:24
  • I was speaking by analogy. He built a windmill generator from scratch using very little information, but googling "windmill generator" would have gotten him a lot of information he needed. In this case, StackOverflow is your "Google" because you don't need to know the terms...you just say what you're looking for and people who know the answer from all over the world tell you the answer at any time of day or night. :-) – HostileFork says dont trust SE Nov 29 '12 at 14:26
  • 1
    Yeah right. :) I wrote that comment before I watched video. Well, from the one point of view, this guy is amazing for how he managed to do such thing basing only on book with pictures of windmill and EM things, but looking from the other side it's kinda sad how Internet-dependent we have become. If one day Internet was shut down, we would be lost. And he wouldn't. – Sushi271 Nov 29 '12 at 14:45