2

Ive written a program that reads in a ascii file with cell vertice coordinates and then outputs a file with cell centres. Is there any way I can speed up the c++ code, to at least match my matlab performance. Im using push_back vectors in my c++ code but I don't know how I would achieve similar with arrays??

Im trying to use sleepy for profiling but how do you get sleepy to start profiling before you set of the program it can only program runs that are already running, also the outputs are very confusing and dont go to my lines of code but lines like std::_Vector_cal >.... etc ect. Whats the difference between exclusive and inclusive, and the highest % exclusive name is operator delete and second is operator new????

My c++ source:

#include <algorithm>
#include <fstream>
#include <iostream>
#include <iterator>
#include <sstream>
#include <string>
#include <vector>
#include <cstdlib>

std::vector<double> GetValues_n(const std::vector<std::string>& src, int start, int end)
{
    std::vector<double> ret;
    for(int i = start; i <= end; ++i)
    {
        ret.push_back(std::strtod(src[i].c_str(), nullptr));
    }
    return ret;
}

std::vector<int> GetValues_c(const std::vector<std::string>& src, int start, int end)
{
    std::vector<int> ret;
    for(int i = start; i <= end; ++i)
    {
        ret.push_back(std::atoi(src[i].c_str()));
    }
    return ret;
}

std::vector<double> polycentre(const std::vector<double>&  x,const std::vector<double>&  y,size_t ID)
{
    std::vector<double> C(3, 0);
    std::vector<double> x1(x.size(),0);
    std::vector<double> y1(y.size(),0);
    int sizx = x.size();
    int sizy = y.size();
    if(sizy != sizx)
    {
        std::cerr << "polycentre inputs not equal length";
    }
    double x0 = x[0];
    double y0 = y[0];
    for(int aa = 1; aa < sizx; ++aa)
    {
        if(x[aa] < x0)
        {
            x0 = x[aa];
        }
        if(y[aa] < y0)
        {
            y0 = y[aa];
        }
    }
    double A = 0.0;
    double B1 = 0.0;
    double B2 = 0.0;
    for(int aa = 0; aa < sizx; ++aa)
    {
        x1[aa] = x[aa] - x0;
        y1[aa] = y[aa] - y0;
    }
    for(int aa = 0; aa < sizx; ++aa)
    {
        if(aa != sizx-1)
        {
            A = A + (x1[aa]*y1[aa+1] - x1[aa+1]*y1[aa]);
            B1 = B1 + ((x1[aa]+x1[aa+1])*(x1[aa]*y1[aa+1]-x1[aa+1]*y1[aa]));
            B2 = B2 + ((y1[aa]+y1[aa+1])*(x1[aa]*y1[aa+1]-x1[aa+1]*y1[aa]));
        }
        else if(aa == sizx-1)
        {
            A = A + (x1[aa]*y1[0] - x1[0]*y1[aa]);
            B1 = B1 + ((x1[aa]+x1[0])*(x1[aa]*y1[0]-x1[0]*y1[aa]));
            B2 = B2 + ((y1[aa]+y1[0])*(x1[aa]*y1[0]-x1[0]*y1[aa]));
        }
    }
    A = A*0.5;
    C[0] = ID;
    C[1] = (((1/6.0)/A)*B1) + x0;
    C[2] = (((1/6.0)/A)*B2) + y0;
    return C;
}

template <typename T>

void PrintValues(const std::string& title, std::vector<std::vector<T>>& v, std::ofstream& outfil)
{
    if(outfil.is_open())
    {
        outfil << "ID,X,Y,Z \n";
        std::cout << title << std::endl;
        for(size_t line = 0; line < v.size(); ++line)
        {
            for(size_t val = 0; val < v[line].size(); ++val)
            {
                //std::cout << v[line][val] << " ";
                outfil.precision(10); 
                outfil << v[line][val] << ",";
            }
            outfil << "\n";
            //std::cout << std::endl;
        }
        //std::cout << std::endl;
    }
}

int main(int argc, char* argv[])
{

    std::ofstream outfil;

    if (argc < 2)
    {
        std::cerr << argv[0] << " needs to get input file (2dm)" << std::endl;
    }

    else if (argc == 3)
    {
        outfil.open(argv[2]);
    }

    else
    {
        outfil.open(std::string(argv[1]) + ".csv");
    }

    std::vector<std::vector<std::string>> values;
    std::ifstream fin(argv[1]);

    for (std::string line; std::getline(fin, line); )
    {
        std::istringstream in(line);
        values.push_back(
                         std::vector<std::string>(std::istream_iterator<std::string>(in),
                                                  std::istream_iterator<std::string>()));
    }

    std::vector<std::vector<int>> cells;
    std::vector<std::vector<double>> nodes;

    for (size_t i = 0; i < values.size(); ++i)
    {
        if(values[i][0] == "E3T")
        {
            cells.push_back(GetValues_c(values[i], 1, 5));
        }
        else if(values[i][0] == "E4Q")
        {
            cells.push_back(GetValues_c(values[i], 1, 6));
        }
        else if(values[i][0] == "ND")
        {
            nodes.push_back(GetValues_n(values[i], 1, 4));
        }
    }

    std::vector<std::vector<double>> cell_centres;

    for (size_t aa = 0; aa < cells.size(); ++aa)
    {
        if(cells[aa].size() == 5)
        {
            std::vector<double> xs;
            xs.push_back(nodes[cells[aa][1] - 1][1]);
            xs.push_back(nodes[cells[aa][2] - 1][1]);
            xs.push_back(nodes[cells[aa][3] - 1][1]);
            std::vector<double> ys;
            ys.push_back(nodes[cells[aa][1] - 1][2]);
            ys.push_back(nodes[cells[aa][2] - 1][2]);
            ys.push_back(nodes[cells[aa][3] - 1][2]);
            cell_centres.push_back(polycentre(xs,ys,aa+1));
        }
        else if(cells[aa].size() == 6)
        {
            std::vector<double> xs;
            xs.push_back(nodes[cells[aa][1] - 1][1]);
            xs.push_back(nodes[cells[aa][2] - 1][1]);
            xs.push_back(nodes[cells[aa][3] - 1][1]);
            xs.push_back(nodes[cells[aa][4] - 1][1]);
            std::vector<double> ys;
            ys.push_back(nodes[cells[aa][1] - 1][2]);
            ys.push_back(nodes[cells[aa][2] - 1][2]);
            ys.push_back(nodes[cells[aa][3] - 1][2]);
            ys.push_back(nodes[cells[aa][4] - 1][2]);
            cell_centres.push_back(polycentre(xs,ys,aa+1));
        }
    }

    PrintValues("Cell Centres", cell_centres, outfil);
    return 0;
}
Alex Byasse
  • 322
  • 2
  • 5
  • 16
  • 2
    Have you profiled your C++ code to see where the hot spots are? – Timo Geusch Sep 27 '13 at 00:40
  • visual c++ express has no profiler and NETBEANS has no c++ profiler... – Alex Byasse Sep 27 '13 at 00:42
  • Without profiling these sort of questions are nigh-impossible to answer. It could be the way you are reading/parsing the numbers, it could be your vector, or it could be something else. If you're using VC++ then you're probably on windows, no? I hear there are free profilers for windows: http://stackoverflow.com/questions/67554/whats-the-best-free-c-profiler-for-windows-if-there-are – Calvin Sep 27 '13 at 00:45
  • Just a wild guess, could MTLAB be using SSE2,3,4... SSE can compute 4 (8x with SSE4) numbers simultaneously! Or even your GPU to do all the math? A good GPU can compute using 1,500 units meaning that they can do 1,500 add, mul, div, etc. all at once... – Alexis Wilke Sep 27 '13 at 01:02
  • @AlexByasse http://www.codersnotes.com/sleepy is a simplistic profiler that can be used with VC++ Express. There's also the tried and true method of just pausing your program randomly. The majority of your program is in the "slow" steps, so most of the time you'll pause in one. – Sam Cristall Sep 27 '13 at 01:14
  • Im trying to use sleepy but how do you get sleepy to start profiling before you set of the program it can only program runs that are already running, also the outputs are very confusing and dont go to my lines of code but lines like std::_Vector_cal >.... etc ect. Whats the difference between exclusive and inclusive, and the highest % exclusive name is operator delete and second is operator new???? – Alex Byasse Sep 27 '13 at 01:55
  • You're allocating multiple `std::vector`s within every function. Dynamic allocations are slow and can be a bottleneck. push_back means dynamic memory reallocation and a lot of copying. That's most likely your problem. As for profiler, you should be able to get AQTime trial, plus, I THINK mingw should have gprof. If it doesn't you can install linux onto virtual machine and use gprof there. – SigTerm Sep 27 '13 at 03:15

2 Answers2

5

The most obvious thing here is to reserve the storage before push_back calls. That way the vector is only resized once:

std::vector<double> GetValues_n(const std::vector<std::string>& src, int start, int end)
{
    std::vector<double> ret;
    ret.reserve( end - start + 1 );

    for(int i = start; i <= end; ++i)
    {
        ret.push_back(std::strtod(src[i].c_str(), nullptr));
    }
    return ret;
}
paddy
  • 60,864
  • 6
  • 61
  • 103
0

In addition to what @Paddy said, Keep this in mind too : Copy cTor's are involved when you are doing a vector.push_back. You are using "c_str()". Do investigate if it is efficient enough.

One more thing: GetValues_n and GetValues_c are returning vectors by value. You have taken the vector argument by reference. Which is good thing efficiency wise. Can you also try to return the vectors by reference?

NotAgain
  • 1,927
  • 3
  • 26
  • 42
  • 1
    copy constructor for a double is extremely cheap. The returned vector should automatically be moved by the compiler, if using a new one. Otherwise, calling `swap` can get the same benefit. – Ben Voigt Sep 27 '13 at 04:08
  • "Can you also try to return the vectors by reference?" !!! Never !!! return a reference to an object that lives on that function's stack! – Ludwig Schulze Dec 28 '16 at 11:08