I have a function as follows, it is called many times, which makes my program run slowly. Is there any way to optimize it? For example, using SIMD instructions or other techniques. The getray() function is a to retrieve a vector-3 given vector-2 query from a pre-computed look-up table. It is compiled in Visual-studio-2013 and the target configuration is x64 machine.
By the way, the for-loop which calls this function many times is already optimized by using OpenMP.
Thank you very much.
bool warpPlanarHomography(
const Eigen::Matrix3d& H_camera2_camera1
, const cv::Mat& image1
, const cv::Mat& image2
, FisheyeCameraUnified& cam1
, FisheyeCameraUnified& cam2
, const Eigen::Vector2i& patchCenter
, const int patchSize
, Eigen::Matrix<unsigned char, 7, 7>& patch1)
{
const int patchSize_2 = 3;
for (int v = 0; v < patchSize; ++v) // row
{
for (int u = 0; u < patchSize; ++u)
{
Eigen::Vector2i p1 = Eigen::Vector2i(u - patchSize_2, v - patchSize_2).cast<int>() + patchCenter;
if (p1(0, 0) < 0 || p1(1, 0) < 0 || p1(0, 0) >= image1.cols || p1(1, 0) >= image1.rows) return false;
Eigen::Vector3d ray1;
cam1.getRay(p1(1, 0), p1(0, 0), ray1);
Eigen::Vector2d p2;
if (!cam2.project(H_camera2_camera1 * ray1, p2))
{
return false;
}
if (p2.x() < 0.0 || p2.x() >= image2.cols - 1 ||
p2.y() < 0.0 || p2.y() >= image2.rows - 1)
{
return false;
}
getInterpolatedPixel(image2, p2, &patch1(v, u));
}
}
return true;
}
, where the project function looks like this
bool FisheyeCameraUnified::project(const Eigen::Vector3d& ray, Eigen::Vector2d& pt)
{
double fx, fy, cx, cy, xi;
fx = m_K(0, 0);
fy = m_K(1, 1);
cx = m_K(0, 2);
cy = m_K(1, 2);
xi = m_xi;
double d = ray.norm();
double rz = 1.0 / (ray(2) + xi * d);
// Project the scene point to the normalized plane.
Eigen::Vector2d m_d(ray(0) * rz, ray(1) * rz);
// Apply the projection matrix.
pt(0) = fx * m_d(0) + cx;
pt(1) = fy * m_d(1) + cy;
return true;
}
and getInterpolatedPixel() function as follows
void getInterpolatedPixel(const cv::Mat& image, const Eigen::Vector2d& coords, unsigned char* pixel)
{
int ix = static_cast<int>(coords.x());
int iy = static_cast<int>(coords.y());
double dx = coords.x() - ix;
double dy = coords.y() - iy;
double dxdy = dx * dy;
const double w00 = 1.0 - dx - dy + dxdy;
const double w01 = dx - dxdy;
const double w10 = dy - dxdy;
const double w11 = dxdy;
const unsigned char* p00 = image.data + iy * image.step.p[0] + ix * image.channels();
const unsigned char* p01 = p00 + image.channels();
const unsigned char* p10 = p00 + image.step.p[0];
const unsigned char* p11 = p10 + image.channels();
for (int i = 0; i < image.channels(); ++i)
{
double value = w11 * p11[i] + w10 * p10[i] + w01 * p01[i] + w00 * p00[i];
pixel[i] = cv::saturate_cast<unsigned char>(value);
}
}