7

I am using openCV to implementing camera motion compensation for an application. I know I need to calculate the optical flow and then find the fundamental matrix between two frames to transform the image.

Here is what I have done so far:

void VideoStabilization::stabilize(Image *image) {
    if (image->getWidth() != width || image->getHeight() != height) reset(image->getWidth(), image->getHeight());

    IplImage *currImage = toCVImage(image);
    IplImage *currImageGray = cvCreateImage(cvSize(width, height), IPL_DEPTH_8U, 1);

    cvCvtColor(currImage, currImageGray, CV_BGRA2GRAY);

    if (baseImage) {
        CvPoint2D32f currFeatures[MAX_CORNERS];
        char featuresFound[MAX_CORNERS];

        opticalFlow(currImageGray, currFeatures, featuresFound);

        IplImage *result = transformImage(currImage, currFeatures, featuresFound);
        if (result) {
            updateImage(image, result);
            cvReleaseImage(&result);
        }
    }

    cvReleaseImage(&currImage);

    if (baseImage) cvReleaseImage(&baseImage);
    baseImage = currImageGray;

    updateGoodFeatures();
}

void VideoStabilization::updateGoodFeatures() {
    const double QUALITY_LEVEL = 0.05;
    const double MIN_DISTANCE = 5.0;

    baseFeaturesCount = MAX_CORNERS;

    cvGoodFeaturesToTrack(baseImage, eigImage,
                          tempImage, baseFeatures, &baseFeaturesCount, QUALITY_LEVEL, MIN_DISTANCE);

    cvFindCornerSubPix(baseImage, baseFeatures, baseFeaturesCount,
                       cvSize(10, 10), cvSize(-1,-1), TERM_CRITERIA);
}

void VideoStabilization::opticalFlow(IplImage *currImage, CvPoint2D32f *currFeatures, char *featuresFound) {
    const unsigned int WIN_SIZE = 15;
    const unsigned int PYR_LEVEL = 5;

    cvCalcOpticalFlowPyrLK(baseImage, currImage,
                           NULL, NULL,
                           baseFeatures,
                           currFeatures,
                           baseFeaturesCount,
                           cvSize(WIN_SIZE, WIN_SIZE),
                           PYR_LEVEL,
                           featuresFound,
                           NULL,
                           TERM_CRITERIA,
                           0);
}

IplImage *VideoStabilization::transformImage(IplImage *image, CvPoint2D32f *features, char *featuresFound) const {
    unsigned int featuresFoundCount = 0;
    for (unsigned int i = 0; i < MAX_CORNERS; ++i) {
        if (featuresFound[i]) ++featuresFoundCount;
    }

    if (featuresFoundCount < 8) {
        std::cout << "Not enough features found." << std::endl;
        return NULL;
    }

    CvMat *points1 = cvCreateMat(2, featuresFoundCount, CV_32F);
    CvMat *points2 = cvCreateMat(2, featuresFoundCount, CV_32F);

    CvMat *fundamentalMatrix = cvCreateMat(3, 3, CV_32F);

    unsigned int pos = 0;
    for (unsigned int i = 0; i < featuresFoundCount; ++i) {
        while (!featuresFound[pos]) ++pos;

        cvSetReal2D(points1, 0, i, baseFeatures[pos].x);
        cvSetReal2D(points1, 1, i, baseFeatures[pos].y);
        cvSetReal2D(points2, 0, i, features[pos].x);
        cvSetReal2D(points2, 1, i, features[pos].y);
        ++pos;
    }

    int fmCount = cvFindFundamentalMat(points1, points2, fundamentalMatrix, CV_FM_RANSAC, 1.0, 0.99);
    if (fmCount < 1) {
        std::cout << "Fundamental matrix not found." << std::endl;
        return NULL;
    }

    std::cout << fundamentalMatrix->data.fl[0] << " " << fundamentalMatrix->data.fl[1] << " " << fundamentalMatrix->data.fl[2] << "\n";
    std::cout << fundamentalMatrix->data.fl[3] << " " << fundamentalMatrix->data.fl[4] << " " << fundamentalMatrix->data.fl[5] << "\n";
    std::cout << fundamentalMatrix->data.fl[6] << " " << fundamentalMatrix->data.fl[7] << " " << fundamentalMatrix->data.fl[8] << "\n";

    cvReleaseMat(&points1);
    cvReleaseMat(&points2);

    IplImage *result = transformImage(image, *fundamentalMatrix);

    cvReleaseMat(&fundamentalMatrix);

    return result;
}

MAX_CORNERS is 100 and it usually find around 70-90 features.

With this code, I get a weird fundamental matrix, like:

-0.000190809 -0.00114947 1.2487
0.00127824 6.57727e-05 0.326055
-1.22443 -0.338243 1

Since I just hold the camera with my hand and try not to shake it (and there werent any objects moving), I expected the matrix to be close to identity. What am I doing wrong?

Also, I'm not sure what to use to transform the image. cvWarpAffine need a 2x3 matrix, should I discard the last row or use another function?

fbafelipe
  • 4,862
  • 2
  • 25
  • 40

1 Answers1

9

What you're looking for is not the fundamental matrix but rather an affine or perspective transform.

The fundamental matrix describes the relation of two cameras having significantly different viewpoints. It is calculated such that if you have two points x (on one image) and x' (on another) that are projections of the same point in space, then x F x' (the product) is zero. If x and x' are nearly identical... then the only solution is to make F nearly zero (and practically useless). That's why you've got what you have.

The matrix that should indeed be near identity is a transformation A that transforms the points x to x'= A x (the old image into the new one). Depending on what types of transformations you want to include (affine or perspective), you could (theoretically) use the functions cvGetAffineTransform or cvGetPerspectiveTransform to calculate the transform. For that, you would need 3 or 4 point pairs, respectively.

However, the best choice (I think) is cvFindHomograpy. It estimates a perspective transform based on all of the point pairs available, using outlier filtering algorithms (RANSAC, for example), giving you a 3x3 matrix.

Then you can use cvWarpPerspective to transform the images themselves.

Latanius
  • 2,553
  • 2
  • 23
  • 21
  • I still need to work to fix the shaking, but it seems to be in the right way now, thanks. Note that the function is name cvFindHomography. – fbafelipe Mar 04 '11 at 17:06