I wrote some test case on the server, and found Eigen::Matrix is much slower than std::vector. I do not know why?
The server's configuration list below:
cat /proc/cpuinfo
Intel(R) Xeon(R) Platinum 8124M CPU @ 3.00GHz
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 ida arat
The compile command:
g++ -DEIGEN_FFTW_DEFAULT -isystem /toolchain/library/gtest/1.10.0/include -isystem /toolchain/library/glog/0.4.0/include -isystem /toolchain/library/eigen/3.3.7/include/eigen3 -isystem /toolchain/library/eigen/3.3.7/include/eigen3/unsupported -isystem /toolchain/library/boost/1.72.0/include -isystem /toolchain/library/fftw/3.3.8/include -isystem /toolchain/library/opencv/2.4.13.6/include -isystem /toolchain/library/nlopt/2.6.2/include -Wno-unused-local-typedefs -Werror -Wall -std=c++0x -fPIC -march=native -Ofast -DNDEBUG -std=gnu++11 ......
test case list below:
TEST(ComplexMul, MatrixFloat2) { // test for Matrix * complex_value
using test_type = float;
const complex<test_type> kC1(3.4, 4.3);
Matrix<complex<test_type>, Dynamic, Dynamic, RowMajor> m1, m3(kMatRowNum, kMatColNum);
m1 = Matrix<complex<test_type>, Dynamic, Dynamic, RowMajor>::Random(kMatRowNum, kMatColNum);
bpt::ptime tm_begin1 = bpt::microsec_clock::local_time();
m3 = m1 * kC1;
bpt::ptime tm_end1 = boost::posix_time::microsec_clock::local_time();
bpt::time_duration dur1 = tm_end1 - tm_begin1;
ostream_color::Modifier_C red(ostream_color::FG_GREEN);
ostream_color::Modifier_C def(ostream_color::FG_DEFAULT);
cout << red << "ComplexMul.MatrixFloat2 duration: " << dur1.total_milliseconds() << " ms";
cout << def << endl;
cout << m3.block(0, 0, 3, 3) << endl;
}
TEST(ComplexMul, VectorFloat) { // test for std::vector * complex_value
using test_type = float;
const complex<test_type> kC1(3.4, 4.3);
Matrix<complex<test_type>, Dynamic, Dynamic, RowMajor> m1 = Matrix<complex<test_type>, Dynamic, Dynamic, RowMajor>::Random(kMatRowNum, kMatColNum);
std::vector<std::complex<test_type>> vec1(m1.data(), m1.data() + m1.rows() * m1.cols()), vec3(m1.rows() * m1.cols());
bpt::ptime tm_begin1 = bpt::microsec_clock::local_time();
for (size_t i = 0; i < vec1.size(); i++) {
vec3[i] = vec1[i] * kC1;
}
bpt::ptime tm_end1 = boost::posix_time::microsec_clock::local_time();
bpt::time_duration dur1 = tm_end1 - tm_begin1;
ostream_color::Modifier_C red(ostream_color::FG_GREEN);
ostream_color::Modifier_C def(ostream_color::FG_DEFAULT);
cout << red << "ComplexMul.VectorFloat duration: " << dur1.total_milliseconds() << " ms";
cout << def << endl;
cout << vec3[0] << endl;
}
Test result list below: enter image description here