I have been trying to understand the practical usages of TMP. I see a lot of code along the following lines:
#ifndef LOOP2_HPP
#define LOOP2_HPP
// primary template
template <int DIM, typename T>
class DotProduct {
public:
static T result (T* a, T* b) {
return *a * *b + DotProduct<DIM-1,T>::result(a+1,b+1);
}
};
// partial specialization as end criteria
template <typename T>
class DotProduct<1,T> {
public:
static T result (T* a, T* b) {
return *a * *b;
}
};
// convenience function
template <int DIM, typename T>
inline T dot_product (T* a, T* b)
{
return DotProduct<DIM,T>::result(a,b);
}
Is it a good practice to always explicitly inline such heavily recursive functions?
EDIT:
For a more concrete example take the following code:
template <int N>
inline void f() {
f<N-1>();
std::cout << N << "\n";
}
template <>
void f<0>() {
std::cout << 0 << "\n";
};
int main() {
f<1>();
return 0;
}
I just want to use the function f
as a way to unroll a bunch of cout statements which I don't want to write at compile time. Following is the assembly generated by gcc-8.3, all optimizations enabled:
void f<0>():
push rbp
mov rbp, rsp
mov esi, 0
mov edi, OFFSET FLAT:_ZSt4cout
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
mov esi, OFFSET FLAT:.LC0
mov rdi, rax
call std::basic_ostream<char, std::char_traits<char> >& s
td::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)
nop
pop rbp
ret
main:
push rbp
mov rbp, rsp
call void f<1>()
mov eax, 0
pop rbp
ret
void f<1>():
push rbp
mov rbp, rsp
call void f<0>()
mov esi, 1
mov edi, OFFSET FLAT:_ZSt4cout
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
mov esi, OFFSET FLAT:.LC0
mov rdi, rax
call std::basic_ostream<char, std::char_traits<char> >& std::operator<< <std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*)
nop
pop rbp
ret
It seems that each of the unrolling leads to a runtime call
instruction. It is this cost I want to avoid. I just want the final generated code to be a concatenation of multiple cout
s.