1

I'm testing functional style programming with C++. Having the compiler to optimize recursion to a loop when possible is necessary to avoid stack overflows. The below program crashes due to stack overflow when BlendingTable::print() is called. I don't know why it doesn't crash in the constructor of BlendingTable, which does a same form of recursion as print. I need to allocate 1GB of stack to avoid stack overflow, and this is not acceptable.

Do I need to pass some special options to hint gcc to optimize the recursion? How do functional programming languages handle the problem of deep recursion? Or is it simply an unsolvable problem with current compiler technology?


EDIT1

I kind of solved the problem by placing __attribute__((always_inline)) at Stdout::print. Then gcc does remove the recursion in BlendingTable::print.

#include <cstdlib>
#include <iostream>
#include <memory>
#include <array>
#include <limits>

class Stdout {
public:
    template<typename T, typename... Ts>
    static void print(const T& t, const Ts&... ts) {
        std::cout << t << std::flush;
        print(ts...);
    }

    static void print() {}

    template<typename... Ts>
    static void printLine(const Ts&... ts) {
        print(ts..., '\n');
    }
};

class Error {
public:
    template<typename... Ts>
    static void assert(bool b, const Ts&... ts) {
        if (!b) {
            Stdout::printLine(ts...);
            throw;
        }
    }
};

template<typename T, int dimension = 1>
class Array {
private:
    std::unique_ptr<T[]> pointer;
    std::array<int, dimension> sizes;
    int realSize;

public:
    template<typename... Ns>
    Array(Ns... ns):
    realSize(1) {
        checkArguments(ns...);
        create(1, ns...);
    }

    Array() {}

private:
    template<typename... Ns>
    static constexpr void checkArguments(Ns...) {
        static_assert(sizeof...(Ns) == dimension, "dimension mismatch");
    }

    template<typename... Ns>
    void create(int d, int n, Ns... ns) {
        realSize *= n;
        sizes[d - 1] = n;
        create(d + 1, ns...);
    }

    void create(int) {
        pointer = std::unique_ptr<T[]>(new T[realSize]);
    }

    int computeSubSize(int d) const {
        if (d == dimension) {
            return 1;
        }
        return sizes[d] * computeSubSize(d + 1);
    }

    template<typename... Ns>
    int getIndex(int d, int n, Ns... ns) const {
        return n * computeSubSize(d) + getIndex(d + 1, ns...);
    }

    int getIndex(int) const {
        return 0;
    }

public:
    template<typename... Ns>
    T& operator()(Ns... ns) const {
        checkArguments(ns...);
        return pointer[getIndex(1, ns...)];
    }

    int getSize(int d = 1) const {
        return sizes[d - 1];
    }
};

class BlendingTable : public Array<unsigned char, 3> {
private:
    enum {
        SIZE = 0x100,
        FF = SIZE - 1,
    };

public:
    BlendingTable():
    Array<unsigned char, 3>(SIZE, SIZE, SIZE) {
        static_assert(std::numeric_limits<unsigned char>::max() == FF, "unsupported byte format");
        create(FF, FF, FF);
    }

private:
    void create(int dst, int src, int a) {
        (*this)(dst, src, a) = (src * a + dst * (FF - a)) / FF;
        if (a > 0) {
            create(dst, src, a - 1);
        } else if (src > 0) {
            create(dst, src - 1, FF);
        } else if (dst > 0) {
            create(dst - 1, FF, FF);
        } else {
            return;
        }
    }

    void print(int dst, int src, int a) {
        Stdout::print(static_cast<int>((*this)(FF - dst, FF - src, FF - a)), ' ');
        if (a > 0) {
            print(dst, src, a - 1);
        } else if (src > 0) {
            print(dst, src - 1, FF);
        } else if (dst > 0) {
            print(dst - 1, FF, FF);
        } else {
            Stdout::printLine();
            return;
        }
    }

public:
    void print() {
        print(FF, FF, FF);
    }
};

int main() {
    BlendingTable().print();
    return EXIT_SUCCESS;
}
  • 1
    Check this http://stackoverflow.com/questions/15897452/tail-recursion-in-gcc-g – pablochan Jul 07 '15 at 09:27
  • @pablochan `print()` and `create()` already uses the tail recursion mechanism. – nouney Jul 07 '15 at 09:28
  • Yes, but the important part is the `-O2` switch when invoking the compiler – pablochan Jul 07 '15 at 09:36
  • I would welcome such a missed-optimization report in gcc's bugzilla (your code does not compile with clang, you may want to fix that first). Though it seems that it considers the calls to operator<< too heavy to make it worth turning into a loop, and the function needs enough stack that replacing call by jmp is harder. – Marc Glisse Jul 07 '15 at 09:39
  • @pablochan compiled with `-fwhole-program -s -O3` –  Jul 07 '15 at 09:40
  • @MarcGlisse clang complains about the return type of a `constexpr` function being `void` in `Array::checkArguments`. Is it me or clang being wrong? –  Jul 07 '15 at 09:42
  • That probably requires C++14, but then it complains about something else which I don't immediately understand. Anyway that check_arguments should be irrelevant to the present issue, so it is preferable to remove it. – Marc Glisse Jul 07 '15 at 09:47
  • @MarcGlisse Fixed! added a `static`. –  Jul 07 '15 at 09:49

0 Answers0