33

A typical LOG() macro-based logging solution may look something like this:

#define LOG(msg) \
    std::cout << __FILE__ << "(" << __LINE__ << "): " << msg << std::endl 

This allows programmers to create data rich messages using convenient and type-safe streaming operators:

string file = "blah.txt";
int error = 123;
...
LOG("Read failed: " << file << " (" << error << ")");

// Outputs:
// test.cpp(5): Read failed: blah.txt (123)

The problem is that this causes the compiler to inline multiple ostream::operator<< calls. This increases the generated code and therefore function size, which I suspect may hurt instruction cache performance and hinder the compiler's ability to optimize the code.

Here's a "simple" alternative that replaces the inlined code with a call to a variadic template function:

********* SOLUTION #2: VARIADIC TEMPLATE FUNCTION *********

#define LOG(...) LogWrapper(__FILE__, __LINE__, __VA_ARGS__)

// Log_Recursive wrapper that creates the ostringstream
template<typename... Args>
void LogWrapper(const char* file, int line, const Args&... args)
{
    std::ostringstream msg;
    Log_Recursive(file, line, msg, args...);
}

// "Recursive" variadic function
template<typename T, typename... Args>
void Log_Recursive(const char* file, int line, std::ostringstream& msg, 
                   T value, const Args&... args)
{
    msg << value;
    Log_Recursive(file, line, msg, args...);
}

// Terminator
void Log_Recursive(const char* file, int line, std::ostringstream& msg)
{
    std::cout << file << "(" << line << "): " << msg.str() << std::endl;
}

The compiler automatically generates new instantiations of the template function as needed depending on the number, kind, and order of message arguments.

The benefit is there are fewer instructions at each call site. The downside is the user must pass the message parts as function parameters instead of combining them using streaming operators:

LOG("Read failed: ", file, " (", error, ")");

********* SOLUTION #3: EXPRESSION TEMPLATES *********

At @DyP's suggestion I created an alternative solution that uses expression templates:

#define LOG(msg) Log(__FILE__, __LINE__, Part<bool, bool>() << msg)

template<typename T> struct PartTrait { typedef T Type; };

// Workaround GCC 4.7.2 not recognizing noinline attribute
#ifndef NOINLINE_ATTRIBUTE
  #ifdef __ICC
    #define NOINLINE_ATTRIBUTE __attribute__(( noinline ))
  #else
    #define NOINLINE_ATTRIBUTE
  #endif // __ICC
#endif // NOINLINE_ATTRIBUTE

// Mark as noinline since we want to minimize the log-related instructions
// at the call sites
template<typename T>
void Log(const char* file, int line, const T& msg) NOINLINE_ATTRIBUTE
{
    std::cout << file << ":" << line << ": " << msg << std::endl;
}

template<typename TValue, typename TPreviousPart>
struct Part : public PartTrait<Part<TValue, TPreviousPart>>
{
    Part()
        : value(nullptr), prev(nullptr)
    { }

    Part(const Part<TValue, TPreviousPart>&) = default;
    Part<TValue, TPreviousPart> operator=(
                           const Part<TValue, TPreviousPart>&) = delete;

    Part(const TValue& v, const TPreviousPart& p)
        : value(&v), prev(&p)
    { }

    std::ostream& output(std::ostream& os) const
    {
        if (prev)
            os << *prev;
        if (value)
            os << *value;
        return os;
    }

    const TValue* value;
    const TPreviousPart* prev;
};

// Specialization for stream manipulators (eg endl)

typedef std::ostream& (*PfnManipulator)(std::ostream&);

template<typename TPreviousPart>
struct Part<PfnManipulator, TPreviousPart>
    : public PartTrait<Part<PfnManipulator, TPreviousPart>>
{
    Part()
        : pfn(nullptr), prev(nullptr)
    { }

    Part(const Part<PfnManipulator, TPreviousPart>& that) = default;
    Part<PfnManipulator, TPreviousPart> operator=(const Part<PfnManipulator,
                                                  TPreviousPart>&) = delete;

    Part(PfnManipulator pfn_, const TPreviousPart& p)
    : pfn(pfn_), prev(&p)
    { }

    std::ostream& output(std::ostream& os) const
    {
        if (prev)
            os << *prev;
        if (pfn)
            pfn(os);
        return os;
    }

    PfnManipulator pfn;
    const TPreviousPart* prev;
};

template<typename TPreviousPart, typename T>
typename std::enable_if<
    std::is_base_of<PartTrait<TPreviousPart>, TPreviousPart>::value, 
    Part<T, TPreviousPart> >::type
operator<<(const TPreviousPart& prev, const T& value)
{
    return Part<T, TPreviousPart>(value, prev);
}

template<typename TPreviousPart>
typename std::enable_if<
    std::is_base_of<PartTrait<TPreviousPart>, TPreviousPart>::value,
    Part<PfnManipulator, TPreviousPart> >::type
operator<<(const TPreviousPart& prev, PfnManipulator value)
{
    return Part<PfnManipulator, TPreviousPart>(value, prev);
}

template<typename TPart>
typename std::enable_if<
    std::is_base_of<PartTrait<TPart>, TPart>::value,
    std::ostream&>::type
operator<<(std::ostream& os, const TPart& part)
{
    return part.output(os);
}

The expression templates solution allows the programmer to use the familiar convenient and type-safe streaming operators:

LOG("Read failed: " << file << " " << error);

However, when Part<A, B> creation is inlined no operator<< calls are made, giving us the benefit of both worlds: convenient and type-safe streaming operators + fewer instructions. ICC13 with -O3 produces the following assembly code for the above:

movl      $.L_2__STRING.3, %edi
movl      $13, %esi
xorl      %eax, %eax
lea       72(%rsp), %rdx
lea       8(%rsp), %rcx
movq      %rax, 16(%rsp)
lea       88(%rsp), %r8
movq      $.L_2__STRING.4, 24(%rsp)
lea       24(%rsp), %r9
movq      %rcx, 32(%rsp)
lea       40(%rsp), %r10
movq      %r8, 40(%rsp)
lea       56(%rsp), %r11
movq      %r9, 48(%rsp)
movq      $.L_2__STRING.5, 56(%rsp)
movq      %r10, 64(%rsp)
movq      $nErrorCode.9291.0.16, 72(%rsp)
movq      %r11, 80(%rsp)
call      _Z3LogI4PartIiS0_IA2_cS0_ISsS0_IA14_cS0_IbbEEEEEENSt9enable_ifIXsr3std10is_base_ofI9PartTraitIT_ESA_EE5valueEvE4typeEPKciRKSA_

The total is 19 instructions including one function call. It appears each additional argument streamed adds 3 instructions. The compiler creates a different Log() function instantiation depending on the number, kind, and order of message parts, which explains the bizarre function name.

********* SOLUTION #4: CATO'S EXPRESSION TEMPLATES *********

Here is Cato's excellent solution with a tweak to support stream manipulators (eg endl):

#define LOG(msg) (Log(__FILE__, __LINE__, LogData<None>() << msg))

// Workaround GCC 4.7.2 not recognizing noinline attribute
#ifndef NOINLINE_ATTRIBUTE
  #ifdef __ICC
    #define NOINLINE_ATTRIBUTE __attribute__(( noinline ))
  #else
    #define NOINLINE_ATTRIBUTE
  #endif // __ICC
#endif // NOINLINE_ATTRIBUTE

template<typename List>
void Log(const char* file, int line, 
         LogData<List>&& data) NOINLINE_ATTRIBUTE
{
    std::cout << file << ":" << line << ": ";
    output(std::cout, std::move(data.list));
    std::cout << std::endl;
}

struct None { };

template<typename List>
struct LogData {
    List list;
};

template<typename Begin, typename Value>
constexpr LogData<std::pair<Begin&&, Value&&>> operator<<(LogData<Begin>&& begin, 
                                                          Value&& value) noexcept
{
    return {{ std::forward<Begin>(begin.list), std::forward<Value>(value) }};
}

template<typename Begin, size_t n>
constexpr LogData<std::pair<Begin&&, const char*>> operator<<(LogData<Begin>&& begin, 
                                                              const char (&value)[n]) noexcept
{
    return {{ std::forward<Begin>(begin.list), value }};
}

typedef std::ostream& (*PfnManipulator)(std::ostream&);

template<typename Begin>
constexpr LogData<std::pair<Begin&&, PfnManipulator>> operator<<(LogData<Begin>&& begin, 
                                                                 PfnManipulator value) noexcept
{
    return {{ std::forward<Begin>(begin.list), value }};
}

template <typename Begin, typename Last>
void output(std::ostream& os, std::pair<Begin, Last>&& data)
{
    output(os, std::move(data.first));
    os << data.second;
}

inline void output(std::ostream& os, None)
{ }

As Cato points out, the benefit over the last solution is that it results in fewer function instantiations since the const char* specialization handles all string literals. It also causes fewer instructions to be generated at the call site:

movb  $0, (%rsp)
movl  $.L_2__STRING.4, %ecx
movl  $.L_2__STRING.3, %edi
movl  $20, %esi
lea   212(%rsp), %r9
call  void Log<pair<pair<pair<pair<None, char const*>, string const&>, char const*>, int const&> >(char const*, int, LogData<pair<pair<pair<pair<None, char const*>, string const&>, char const*>, int const&> > const&)

Please let me know if you can think of any way to improve the performance or usability of this solution.

Marc Eaddy
  • 1,762
  • 3
  • 16
  • 23
  • 1
    I don't quite understand why you use a temporary stringstream. Why wouldn't `#define LOG(msg) std::cout << __FILE__ << '(' << __LINE__ << '(' << msg << std::endl` work? What's the reason for the `flush` (conversion to lvalue shouldn't be necessary)? And why a `static_cast` instead of a `std::move`? – dyp Oct 17 '13 at 00:01
  • 1
    If you want to keep the `<<` operators but for some reason call a function instead of outputting them directly, you could use expression templates to accumulate the arguments: `#define LOG(msg) log_impl( my_expression_templ_start() << msg )` (where `my_expression_templ_start` is a struct with an overload `operator<<`). – dyp Oct 17 '13 at 00:12
  • Thanks DyP. I simplified the problem using cout as you described. Expression templates is a brilliant idea! – Marc Eaddy Oct 17 '13 at 00:20
  • 1
    Since an `ostringstream` is an `ostream`, I don't see how recursively applying `<<` to the `ostringstream` reduces the code complexity for you. – jxh Oct 17 '13 at 00:24
  • I'm trying to reduce the instruction count at the call site. The first LOG() macro causes multiple operator<< call instructions to be inserted at the call site. The second macro replaces those instructions with a single call instruction to a LogWrapper instantiation. – Marc Eaddy Oct 17 '13 at 00:37
  • 1
    I don't think you will get a very useful `__LINE__` from `Log_Recursive`. See [this answer for a hint on how to mix variadic macros with variadic templates](http://stackoverflow.com/a/19107736/1484212); you will need a macro to expand `__LINE__` at the correct time. – DanielKO Oct 17 '13 at 06:30
  • Have you considered using an [ordinary function and an array](http://ideone.com/oS7EXc)? There will be only one function not several so you won't thrash the icache when you call it with a different number of arguments. – Simple Oct 17 '13 at 08:08
  • @DanielKO: Opps, this was a copy-paste bug. I fixed it. Simple: This won't work because an array requires all elements to be the same type. I'm mixing different types. Perhaps a variadic array would work? – Marc Eaddy Oct 17 '13 at 10:49
  • What's wrong with a `printf` based solution ? Format strings are often easier to read, because you don't mix text and variables. – Alexandre C. Oct 18 '13 at 18:59
  • 2
    I agree that printf is often easier to read, but unlike streaming, printf is not type safe and requires converting user-defined types to intermediary strings. – Marc Eaddy Oct 18 '13 at 22:00
  • I made some fixes: 1) use perfect forwarding to store Value to support non-const& (needed for op<<(ostream&, T&)) and allow compiler to use move semantics so it can keep variables in registers instead of requiring a stack address, 2) use PF to store Begin to avoid uninitialized memory read when copying "empty" (really 4 uninitialized bytes) None struct, 3) make op<< functions constexpr and noexcept to facilitate compiler opts, 4) move LogData into Log() function using && to avoid copying it. – Marc Eaddy Apr 08 '14 at 23:06
  • I like the approach and I thought I understood the principle of the expression templates, but run into something which I do not grasp. I wanted to add an option for a loglevel without using macro's. In case the condition for the loglevel does not match that the expressions in the log statement are not evaluated. I expected a simple if() inside the Log( ) function would do the trick, but that does not work. What do I overlook here and any suggestion how to accomplish this ? – Waldorf Jan 31 '15 at 20:08
  • A loglevel check could be done in the Log() template function, right before the first statement. If by "expressions" you mean expression templates, those are evaluated at compile-time. If you meant stream operator expressions, those are evaluated at run-time and skipable with a loglevel check. – Marc Eaddy Feb 01 '16 at 22:54
  • Do you realize that using system functions instead of standard library would give you magnitudes better performance? And I don't see the point to have 1ns faster logger if it's slowest animal in the zoo anyways. It just doesn't make sense – mip Aug 14 '16 at 19:16
  • @doc what about portability? – pappix Feb 23 '18 at 22:01
  • @pappix if you really want to have a performance boost, then you should refer to a solution that gives you real performance boost. You could use abstraction layer to gain portability. But if you want to have portability out of the box, then standard library is O.K., but you have to be aware of its legendary slowliness. Tuning standard library for performance is is like turning 30 year old pickup into Ferrari. – mip Feb 26 '18 at 11:44
  • @doc sure my point is that in some situation you might give priority to other constraints (i.e.: portability) but still want to keep as much as possible a low overhead within what other constraints allow. To me a solution that has performance only as constraint is less interesting since most real world scenario will impose you different constraints alongside performances. – pappix Feb 27 '18 at 12:36
  • 1
    @pappix yes I understand your point. I have different conclusions however, so let's agree to disagree. – mip Feb 27 '18 at 12:56
  • I like the first one – Erik Aronesty Sep 27 '18 at 19:00

2 Answers2

21

Here is another expression template which seems to be even more efficient based on some tests that I've run. In particular, it avoids creating multiple functions for strings with different lengths by specializing operator<< to use a char * member in the resulting structure. It should also be easy to add other specializations of this form.

struct None { };

template <typename First,typename Second>
struct Pair {
  First first;
  Second second;
};

template <typename List>
struct LogData {
  List list;
};

template <typename Begin,typename Value>
LogData<Pair<Begin,const Value &>>
  operator<<(LogData<Begin> begin,const Value &value)
{
  return {{begin.list,value}};
}

template <typename Begin,size_t n>
LogData<Pair<Begin,const char *>>
  operator<<(LogData<Begin> begin,const char (&value)[n])
{
  return {{begin.list,value}};
}

inline void printList(std::ostream &os,None)
{
}


template <typename Begin,typename Last>
void printList(std::ostream &os,const Pair<Begin,Last> &data)
{
  printList(os,data.first);
  os << data.second;
}

template <typename List>
void log(const char *file,int line,const LogData<List> &data)
{
  std::cout << file << " (" << line << "): ";
  printList(std::cout,data.list);
  std::cout << "\n";
}

#define LOG(x) (log(__FILE__,__LINE__,LogData<None>() << x))

With G++ 4.7.2, with -O2 optimization, this creates a very compact instruction sequence, equivalent to filling a struct with the parameters using a char * for string literals.

Vaughn Cato
  • 63,448
  • 5
  • 82
  • 132
  • 1
    This is much simpler than my original solution, results in fewer function instantiations, and causes fewer instructions to be generated at the call site. I added it to end of my original question and modified it to support stream manipulators. – Marc Eaddy Oct 18 '13 at 22:29
1

I've gone through exactly the same thing. And I ended up with the same solution you've outlined, which just requires the client API to use comma instead of the insertion operator. It keeps things fairly simple, and works well enough. Highly recommended.

John Zwinck
  • 239,568
  • 38
  • 324
  • 436
  • I added an alternative expression template solution that allows you to use the streaming operator with 0 cost. Feel free to try it out! – Marc Eaddy Oct 17 '13 at 19:11