I am trying to perform a parallel graph traversal. The entire code has only one shared variable (writable) - i would like to know why the memory consumption is too much for this. As per my understanding the memory should be
size of local variables in bfs * no of threads + ~some overheads
But i get the values much more than this.
Am i doing something wrong ?
Compiled using gcc/9.2.0
g++ demo.cpp -I . -o d -std=c++11 -O2 -fopenmp
#include <iostream>
#include <queue>
#include <vector>
#include <set>
#include <algorithm>
#include <climits>
#include <cmath>
#include <iomanip>
#include <map>
#include <map>
#include <tuple>
#include <sstream>
#include <unordered_set>
#include "omp.h"
using namespace std;
static map<int,std::vector<int>> g_adjLst;
static map<int,std::vector<int>> g_pairLst;
static int g_vertex;
void setSize(int nodesLen) ;
void addEdge(const int src, const int dst);
void print();
int findShortestPath(int src,int dst);
string bfs(int src, const std::vector<int>& , int thread) ;
int getSize() ;
int getPairSize() ;
void clearPair () ;
void storePairForPathFinding(int src, int dst) ;
string runBFS();
void setSize(int nodesLen) {
g_vertex = nodesLen;
}
string bfs( int src, const std::vector<int>& dst , int perc) {
std::stringstream msg;
if (g_adjLst.find(src) == g_adjLst.end()) {
return "";
}
std::vector<int> dist (g_vertex,INT_MAX);
std::vector<bool> visited(g_vertex,false);
std::queue <int> q;
q.push(src);
visited.at(src) = true;
dist.at(src)= 0;
while (!q.empty()) {
int size = q.size();
while (size--) {
int curr = q.front();
q.pop();
if (g_adjLst.find(curr) == g_adjLst.end()) {
return "";
}
for (vector<int> ::const_iterator it = g_adjLst[curr].begin(); it != g_adjLst[curr].end(); ++it) {
if (visited.at(*it)) {continue;}
if (dist.at(*it)> dist.at(curr) +1) {
dist.at(*it)= dist.at(curr) + 1;
q.push(*it);
}
visited.at(*it) = 1;
}
}
}
stringstream s;
for (std::vector<int> ::const_iterator it = dst.begin() ; it != dst.end(); ++it) {
s << " {" << src << "," << *it << "} " << dist[*it] ;
}
return s.str();
}
//
void storePairForPathFinding (int src, int dst) {
g_pairLst[src].push_back(dst);
}
//
//
string runBFS() {
int i,tid=0,nthreads;
vector<int> ::iterator ip ;
for (std::map<int,std::vector<int>> ::iterator it = g_pairLst.begin(); it != g_pairLst.end() ;++it) {
ip = std::unique(it->second.begin() , it->second.end());
it->second.resize(std::distance(it->second.begin(),ip));
}
std::string totalStr = "" ,partialStr = "";
int total = g_pairLst.size();
#pragma omp parallel for private(tid,nthreads,partialStr) shared (totalStr)
for (i = 0 ; i < g_pairLst.size(); i++) {
auto it = g_pairLst.begin();
advance(it,i);
tid = omp_get_thread_num();
partialStr = bfs(it->first,it->second,int(i*100/total));
// Create thread safe region.
#pragma omp critical
{
//add each threads partial sum to the total sum
totalStr += partialStr;
}
}
return totalStr;
}
//
//
void addEdge(int src, int dst) {
g_adjLst[src].push_back(dst);
}
int main () {
setSize(60000);
for(int i = 0; i < 60000;i++) {
for (int j=0; j< 60000; j++) {
addEdge(i,j);
if (i==j) {
storePairForPathFinding(0,i);
}
}
}
string s = runBFS();
cout << "Ans1 = " << s << endl;
string e = runBFS();
cout << "Ans2 = " << e << endl;
}
runBFS
calls the pair of bfs
needs to be done - foreach src and destinationLst (stored in g_pairLst
) bfs is done separately/parallely.
Then all the paths are concatenated in a string from each thread (partialStr)
and then finally concatenated at the end (totalStr)
- like summing up array .