1

I have a std::vector<DOUBLEPOINT> I'm making an application with bezier curves and the results are shown in real time. I convert bezier points into a bunch of short lines. I store the coordinates for the small lines in the vector above. Here is my issue:. When the size of my vector exceeds a cache line, things get very slow very fast. I was wondering if it would be better to have a lot of std::vector<DOUBLEPOINT> and basically every 100 points, it switches to another one. Would this solve my cache problem? Otherwise, how else could I allow the user to create as many points as needed without it becoming very very slow? all my other algorithms are lightening fast (such as polygon filling) so these are not my issues. What really slows it down is the std::vector.

Thanks

    struct SHAPECONTOUR{

        std::vector<USERFPOINT> UserPoints;
        std::vector<DOUBLEPOINT> DrawingPoints;

        SHAPEOUTLINE Outline;

    };

I call UpdateShape() every time a point is added but I assure you my other algorithms are fast...


void OGLSHAPE::UpdateShape()
{
    if(Contour.size() == 0)
    {
        return;
    }
    for(int i = 0; i < Contour.size(); ++i)
    {
        Contour[i].DrawingPoints.clear();

     if(Contour[i].UserPoints.size() < 2)
     {
         break;
     }


    Contour[i].DrawingPoints.clear();
Contour[i].DrawingPoints.reserve(1000);
     for(unsigned int x = 0; x < Contour[i].UserPoints.size() - 1; ++x)
         SetCubicBezier(
             Contour[i].UserPoints[x],
             Contour[i].UserPoints[x + 1],
             i);




     //Remove Duplicates
     for(int j = 0; j < 2; ++j)
     {
         if(Contour[i].DrawingPoints.size() > 2)
             for(unsigned int x = 0; x < Contour[i].DrawingPoints.size() - 1; ++x)
             {
                 if(Contour[i].DrawingPoints[x].point[0] ==
                     Contour[i].DrawingPoints[x + 1].point[0] &&
                     Contour[i].DrawingPoints[x].point[1] ==
                     Contour[i].DrawingPoints[x + 1].point[1] 

                 )
                     Contour[i].DrawingPoints.erase(Contour[i].DrawingPoints.begin() + x);
             }
     }


     GenerateLinePoly(Contour[i].DrawingPoints,Contour[i].Outline.OutlineWidth);
     Contour[i].Outline.OutlineSize = OutlineVec.size()  / 2;
     glBindBufferARB(GL_ARRAY_BUFFER_ARB,Contour[i].Outline.OutlineVBO);
     glBufferDataARB(GL_ARRAY_BUFFER_ARB,sizeof(GLfloat) * OutlineVec.size(),&OutlineVec[0],GL_STATIC_COPY);


    }

    gluTessNormal(PolygonTesselator.tobj, 0, 0, 1);
    PolygonTesselator.Set_Winding_Rule(WindingRule); 

    //PolygonTesselator.SetDimensions(layer[currentlayer].Shapes[i].Dimensions,layer[currentlayer].Shapes[i].minima);

    PolygonTesselator.Begin_Polygon(); 
    for(unsigned int c = 0; c < Contour.size(); ++c)
    {
            PolygonTesselator.Begin_Contour();

            for(unsigned int j = 0; j < Contour[c].DrawingPoints.size(); ++j)
            {
                gluTessVertex(PolygonTesselator.tobj,&Contour[c].DrawingPoints[j].point[0],
                    &Contour[c].DrawingPoints[j].point[0]);
            }

            PolygonTesselator.End_Contour();

    }
    PolygonTesselator.End_Polygon();



    PolygonTesselator.TransferVerticies(
        ObjectVBOInt,
        TextureCoordsVBOInt,
        ObjectVBOCount,
        TextureCoordsVBOCount);
}

void OGLSHAPE::SetCubicBezier(USERFPOINT &a,USERFPOINT &b, int &currentcontour )
{





        double dx1 = a.RightHandle.x - a.UserPoint.x;
        double dy1 = a.RightHandle.y - a.UserPoint.y;
        double dx2 = b.LeftHandle.x - a.RightHandle.x;
        double dy2 = b.LeftHandle.y - a.RightHandle.y;
        double dx3 = b.UserPoint.x - b.LeftHandle.x;
        double dy3 = b.UserPoint.y - b.LeftHandle.y;

        float len = sqrt(dx1 * dx1 + dy1 * dy1) + 
            sqrt(dx2 * dx2 + dy2 * dy2) + 
            sqrt(dx3 * dx3 + dy3 * dy3);




        int NUM_STEPS =  int(len * 0.049);

        if(NUM_STEPS > 55)
        {
            NUM_STEPS = 55;
        }
        double subdiv_step  = 1.0 / (NUM_STEPS + 1);
        double subdiv_step2 = subdiv_step*subdiv_step;
        double subdiv_step3 = subdiv_step*subdiv_step*subdiv_step;

        double pre1 = 3.0 * subdiv_step;
        double pre2 = 3.0 * subdiv_step2;
        double pre4 = 6.0 * subdiv_step2;
        double pre5 = 6.0 * subdiv_step3;



        double tmp1x = a.UserPoint.x - a.RightHandle.x * 2.0 + b.LeftHandle.x;
        double tmp1y = a.UserPoint.y - a.RightHandle.y  * 2.0 + b.LeftHandle.y;

        double tmp2x = (a.RightHandle.x - b.LeftHandle.x)*3.0 - a.UserPoint.x + b.UserPoint.x;
        double tmp2y = (a.RightHandle.y - b.LeftHandle.y)*3.0 - a.UserPoint.y + b.UserPoint.y;

        temp.point[0] = a.UserPoint.x;
        temp.point[1] = a.UserPoint.y;

        //a user
        //a right
        //b left
        //b user

        double dfx = (a.RightHandle.x - a.UserPoint.x)*pre1 + tmp1x*pre2 + tmp2x*subdiv_step3;
        double dfy = (a.RightHandle.y - a.UserPoint.y)*pre1 + tmp1y*pre2 + tmp2y*subdiv_step3;

        double ddfx = tmp1x*pre4 + tmp2x*pre5;
        double ddfy = tmp1y*pre4 + tmp2y*pre5;

        double dddfx = tmp2x*pre5;
        double dddfy = tmp2y*pre5;

        int step = NUM_STEPS;

        // Suppose, we have some abstract object Polygon which
        // has method AddVertex(x, y), similar to LineTo in
        // many graphical APIs.
        // Note, that the loop has only operation add!

        while(step--)
        {


            temp.point[0]  += dfx;
            temp.point[1]  += dfy;
            dfx  += ddfx;
            dfy  += ddfy;
            ddfx += dddfx;
            ddfy += dddfy;

            Contour[currentcontour].DrawingPoints.push_back(temp);
        }


        temp.point[0] = (GLdouble)b.UserPoint.x;
        temp.point[1] = (GLdouble)b.UserPoint.y;
        Contour[currentcontour].DrawingPoints.push_back(temp);


}

void OGLSHAPE::GenerateLinePoly(const std::vector<DOUBLEPOINT> &input, int width)
{
    OutlineVec.clear();
    if(input.size() < 2)
    {
        return;
    }



    float w = width / 2.0f;

    //glBegin(GL_TRIANGLES);
    for( size_t i = 0; i < input.size()-1; ++i )
    {
        POINTFLOAT cur;
        cur.x = input[i].point[0];
        cur.y = input[i].point[1];


        POINTFLOAT nxt;
        nxt.x = input[i+1].point[0];
        nxt.y = input[i+1].point[1];

        POINTFLOAT b;
        b.x = nxt.x - cur.x;
        b.y = nxt.y - cur.y;

        b = normalize(b);



        POINTFLOAT b_perp;
        b_perp.x = -b.y;
        b_perp.y = b.x;


        POINTFLOAT p0;
        POINTFLOAT p1;
        POINTFLOAT p2;
        POINTFLOAT p3;

        p0.x = cur.x + b_perp.x * w;
        p0.y = cur.y + b_perp.y * w;

        p1.x = cur.x - b_perp.x * w;
        p1.y = cur.y - b_perp.y * w;

        p2.x = nxt.x + b_perp.x * w;
        p2.y = nxt.y + b_perp.y * w;

        p3.x = nxt.x - b_perp.x * w;
        p3.y = nxt.y - b_perp.y * w;

        OutlineVec.push_back(p0.x);
        OutlineVec.push_back(p0.y);
        OutlineVec.push_back(p1.x);
        OutlineVec.push_back(p1.y);
        OutlineVec.push_back(p2.x);
        OutlineVec.push_back(p2.y);

        OutlineVec.push_back(p2.x);
        OutlineVec.push_back(p2.y);
        OutlineVec.push_back(p1.x);
        OutlineVec.push_back(p1.y);
        OutlineVec.push_back(p3.x);
        OutlineVec.push_back(p3.y);



        // only do joins when we have a prv
        if( i == 0 ) continue;

        POINTFLOAT prv;
        prv.x = input[i-1].point[0];
        prv.y = input[i-1].point[1];

        POINTFLOAT a;
        a.x = prv.x - cur.x;
        a.y = prv.y - cur.y;

        a = normalize(a);

        POINTFLOAT a_perp;
        a_perp.x = a.y;
        a_perp.y = -a.x;

        float det = a.x * b.y  - b.x * a.y;
        if( det > 0 )
        {
            a_perp.x = -a_perp.x;
            a_perp.y = -a_perp.y;

            b_perp.x = -b_perp.x;
            b_perp.y = -b_perp.y;
        }

        // TODO: do inner miter calculation

        // flip around normals and calculate round join points
        a_perp.x = -a_perp.x;
        a_perp.y = -a_perp.y;

        b_perp.x = -b_perp.x;
        b_perp.y = -b_perp.y;

        size_t num_pts = 4;

        std::vector< POINTFLOAT> round( 1 + num_pts + 1 );
        POINTFLOAT nc;
        nc.x = cur.x + (a_perp.x * w);
        nc.y = cur.y + (a_perp.y * w);

        round.front() = nc;

        nc.x = cur.x + (b_perp.x * w);
        nc.y = cur.y + (b_perp.y * w);

        round.back() = nc;

        for( size_t j = 1; j < num_pts+1; ++j )
        {
            float t = (float)j/(float)(num_pts+1);
            if( det > 0 )
         {
             POINTFLOAT nin;
             nin = slerp2d( b_perp, a_perp, 1.0f-t );
             nin.x *= w;
             nin.y *= w;

             nin.x += cur.x;
             nin.y += cur.y;

             round[j] = nin;
         }
            else
         {
             POINTFLOAT nin;
             nin = slerp2d( a_perp, b_perp, t );
             nin.x *= w;
             nin.y *= w;

             nin.x += cur.x;
             nin.y += cur.y;

             round[j] = nin;
         }
        }

        for( size_t j = 0; j < round.size()-1; ++j )
        {

            OutlineVec.push_back(cur.x);
            OutlineVec.push_back(cur.y);


            if( det > 0 )
         {
             OutlineVec.push_back(round[j + 1].x);
             OutlineVec.push_back(round[j + 1].y);
             OutlineVec.push_back(round[j].x);
             OutlineVec.push_back(round[j].y);
         }
            else
         {

             OutlineVec.push_back(round[j].x);
             OutlineVec.push_back(round[j].y);

             OutlineVec.push_back(round[j + 1].x);
             OutlineVec.push_back(round[j + 1].y);
         }
        }
    }

}
jmasterx
  • 52,639
  • 96
  • 311
  • 557
  • Does performance degrade when rendering, or only when adding new points? – fingerprint211b Jul 12 '10 at 01:17
  • Only when adding new points, 257 is fine, but 258 points goes from 60fps to like 0.5fps. – jmasterx Jul 12 '10 at 01:20
  • in Debug mode, it slows down after 59 – jmasterx Jul 12 '10 at 01:21
  • How did you come to the conclusion that you have a "cache problem?" – James McNellis Jul 12 '10 at 01:22
  • 2
    Could we see some code? I'm thinking it's not the vector's fault, 258 elements isn't really much (unless you have giant classes that are copied all the time, but it doesn't sound like it). – fingerprint211b Jul 12 '10 at 01:25
  • 1
    But you aren't constantly adding new points, do you? Then slow down happens during iteration over already allocated vector. It brings suspection that it's not std::vector fault at all. – mip Jul 12 '10 at 01:32
  • @Jex: If you are going to blame the standard library or your CPU's cache for your performance problems, you need to reduce the code to a small snippet that demonstrates the problem. As it is now, with the large number of calls into other libraries (namely OpenGL calls), anything could be causing your performance problems. – James McNellis Jul 12 '10 at 01:34
  • The thing is, when I just scroll (redraw the shapes) it's lightening fast – jmasterx Jul 12 '10 at 01:41
  • Also, when I switched from std::vector> to std::vector I already got a huge speed boost. – jmasterx Jul 12 '10 at 01:42

2 Answers2

0

I don't know anything about caching, but it might help to expand your vector manually, instead of letting it expand itself one value at a time as they are added.

According to this reference, you can call reserve to increase the maximum size of your vector. I would suggest starting your vector with a length of say, 100, and once the 100th slot is filled, request 100 more slots.

This answer might not address the question, as I do not know how caching relates to std::vectors but it is possible you are facing the problem I describe.

31eee384
  • 2,748
  • 2
  • 18
  • 27
0

This probably isn't the root of your issue. But you may want to try using iterators instead of indexing everywhere. It may help the compiler make better optimization decisions. std::for_each looks like a possible candidate for this, you can just defer to another function, for example:

void OGLSHAPE::real_update_shap(SHAPECONTOUR &contour) {
    if(contour.UserPoints.size() < 2) {
        return;
    }
    // do your thing!
}

void OGLSHAPE::UpdateShape() {
    // no need to explicitly test if empty, for_each won't do anything if the vector
    // has no elements
    std::for_each(Contour.begin(), Contour.end(), std::bind1st(std::mem_fun(real_update_shape), this));
}

Or at the very least, use some references to help the compiler out. For example, convert this:

 //Remove Duplicates
 for(int j = 0; j < 2; ++j)
 {
     if(Contour[i].DrawingPoints.size() > 2)
         for(unsigned int x = 0; x < Contour[i].DrawingPoints.size() - 1; ++x)
         {
             if(Contour[i].DrawingPoints[x].point[0] ==
                 Contour[i].DrawingPoints[x + 1].point[0] &&
                 Contour[i].DrawingPoints[x].point[1] ==
                 Contour[i].DrawingPoints[x + 1].point[1] 

             )
                 Contour[i].DrawingPoints.erase(Contour[i].DrawingPoints.begin() + x);
         }
 }

To something like this:

 //Remove Duplicates
 for(int j = 0; j < 2; ++j)
 {
     // reference to the one we care about, this may be allowed to be const
     // but I am not sure since it depends on your specific use cases, if it can be
     // it is better to shoot for const correctness as much as possible.
     // PS: types in all caps is very ugly, people will think it's a macro!
     SHAPECONTOUR &current_contour = Contour[i];
     if(current_contour.DrawingPoints.size() > 2)
         for(unsigned int x = 0; x < current_contour.DrawingPoints.size() - 1; ++x)
         {
             if( current_contour.DrawingPoints[x    ].point[0] ==
                 current_contour.DrawingPoints[x + 1].point[0] &&
                 current_contour.DrawingPoints[x    ].point[1] ==
                 current_contour.DrawingPoints[x + 1].point[1])

             current_contour.DrawingPoints.erase(current_contour.DrawingPoints.begin() + x);
         }
 }

It's one of those things that can possible help, and definitely doesn't hurt. So it's worth a try.

Evan Teran
  • 87,561
  • 32
  • 179
  • 238
  • Unfortunately I profiled my code and realized it was my polygon tessellation that was killing my speed. I don't know how to speed this up... – jmasterx Jul 12 '10 at 05:59