-2

Below is the code that I have written in C++ and it is printing the wrong result for the 2nd and 3rd output line. I am not able to figure it out why it is happening.

Below is the code which I have written and it is a completely functional code on visual studio. This code expects the one input file named urlMgr.txt whose content should be URLs. Below is the sample URLs which I am using it.

  web.whatsapp.com 
  web.whatsapp.com 
  cplusplus.com/reference/algorithm/find_if 
  stackoverflow.com/questions/760221/breaking-in-stdfor-each-loop 
  mail.google.com/mail/u/0/#inbox 
  http://stackoverflow.com/questions/18085331/recursive-lambda-functions-in-c14
  mail.google.com/mail/u/0/#inbox 
  en.cppreference.com/w/cpp/language/lambda 
  https://www.google.co.in/?ion=1&espv=2#q=invariant%20meaning
  mail.google.com/mail/u/0/#inbox
  http://stackoverflow.com/questions/11699083/where-can-i-find-all-the-exception-guarantees-for-the-standard-containers-and-al
  https://www.google.co.in/?ion=1&espv=2#q=array+of+references:quora&start=10
  mail.google.com/mail/u/0/#inbox 
  web.whatsapp.com 
  quora.com/Whats-the-purpose-of-load-factor-in-hash-tables 
  https://www.quora.com/Whats-the-difference-between-the-rehash-and-reserve- methods-of-the-C++-unordered_map      cplusplus.com/reference/unordered_map/unordered_map/load_factor
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 

Code is also pasted below.

#include <iostream>
#include <string>
#include <unordered_set>
#include <algorithm>
#include <fstream>
#include <sstream>
#include <functional>
#include <unordered_map>
#include <queue>
using namespace std;

class urlInfo
{
public:
    urlInfo(string &url):urlName(url),hitCount(1)
    {
    }

    int getHitCount() const
    {
        return hitCount;
    }

    string getURL()
    {
        return urlName;
    }

    string getURL() const
    {
        return urlName;
    }

    void updateHitCount()
    {
        hitCount++;
    }

    void setHitCount(int count)
    {
        hitCount = count;
    }

private:
    string urlName;
    int hitCount;
};

class urlInfoMaxHeap
{
public:
    bool operator() (urlInfo *url1, urlInfo *url2) const
    {
        if(url2->getHitCount() > url1->getHitCount())
            return true;
        else
            return false;
    }
};


bool operator==(const urlInfo &ui1,const urlInfo& ui2)
{
    //return (ui1.getURL().compare(ui2.getURL()) == 0) ? 1:0;

    return (ui1.getURL() == ui2.getURL());
}

namespace std
{
    template <> struct hash<urlInfo>
    {
        size_t operator()(urlInfo const & ui)
        {
            return hash<string>()(ui.getURL());
        }
    };
}

class urlMgr
{
public:
    urlMgr(string &fileName)
    {
        ifstream rdStr;
        string str;
        rdStr.open(fileName.c_str(),ios::in);
        if(rdStr.is_open())
        {
            int len;
            rdStr.seekg(0,ios::end);
            len = rdStr.tellg();
            rdStr.seekg(0,ios::beg);
            str.reserve(len+1);
            char *buff = new char[len +1];
            memset(buff,0,len+1);
            rdStr.read(buff,len);
            rdStr.close();
            str.assign(buff);
            delete [] buff;
        }
        stringstream ss(str);
        string token;

        while(getline(ss,token,'\n'))
        {
            //cout<<endl<<token;
            addUrl(token);
        }

    }


    void addUrl(string &url)
    {
        unordered_map<string,urlInfo*>::iterator itr;
        itr = urls.find(url);
        if(itr == urls.end())
        {
            urlInfo *u = new urlInfo(url);
            urls[url] = u;
            maxHeap.push_back(u);
        }
        else
        {
            itr->second->updateHitCount();
            urlInfo* u = itr->second;
            vector<urlInfo*>::iterator vItr;
            vItr = find(maxHeap.begin(),maxHeap.end(),u);
            if(vItr!=maxHeap.end())
            {
                maxHeap.erase(vItr);
                maxHeap.push_back(u);
            }
        }

        make_heap(maxHeap.begin(),maxHeap.end(),urlInfoMaxHeap());
    }

    void releaseResources()
    {
        for_each(urls.begin(),urls.end(),[](pair<string,urlInfo*> p){
            urlInfo* u = p.second;
            delete u;
        });
    }

    void printHeap()
    {
        for_each(maxHeap.begin(),maxHeap.end(),[](urlInfo* u){
            cout<<endl<<u->getHitCount()<<"  "<<u->getURL();
        });
    }
private:
    unordered_map<string,urlInfo*> urls;
    vector<urlInfo*> maxHeap;
};


int main()
{
    string fileName("urlMgr.txt");
    urlMgr um(fileName);
    um.printHeap();
    um.releaseResources();
    cout<<endl<<"Successfully inserted the data"<<endl;
}

The output i am getting is

   8 cplusplus.com/max_load_factor
   3 web.whatsapp.com
   4 mail.google.com/mail/u/0/#inbox
   1 en.cppreference.com/w/cpp/language/lambda
   1 other url's and so on. //all other url's show count as 1.

What i expect is

   8 cplusplus.com/max_load_factor   
   4 mail.google.com/mail/u/0/#inbox
   3 web.whatsapp.com
   1 en.cppreference.com/w/cpp/language/lambda
   1 other url's and so on. //all other url's show count as 1.
Gaurav Sehgal
  • 7,422
  • 2
  • 18
  • 34
  • use the below pasted URLs in the urlMgr.txt file:- https://web.whatsapp.com/ https://web.whatsapp.com/ http://www.cplusplus.com/reference/algorithm/find_if/ http://stackoverflow.com/questions/760221/breaking-in-stdfor-each-loop https://mail.google.com/mail/u/0/#inbox http://stackoverflow.com/questions/18085331/recursive-lambda-functions-in-c14 https://mail.google.com/mail/u/0/#inbox http://en.cppreference.com/w/cpp/language/lambda https://www.google.co.in/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UTF-8#q=invariant%20meaning https://mail.google.com/mail/u/0/#inbox – Kapil Satija Apr 19 '16 at 19:25
  • http://stackoverflow.com/questions/11699083/where-can-i-find-all-the-exception-guarantees-for-the-standard-containers-and-al https://www.google.co.in/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UTF-8#q=array+of+references:quora&start=10 https://mail.google.com/mail/u/0/#inbox https://web.whatsapp.com/ https://www.quora.com/Whats-the-purpose-of-load-factor-in-hash-tables https://www.quora.com/Whats-the-difference-between-the-rehash-and-reserve-methods-of-the-C++-unordered_map http://www.cplusplus.com/reference/unordered_map/unordered_map/load_factor/ – Kapil Satija Apr 19 '16 at 19:26
  • http://www.cplusplus.com/max_load_factor/ http://www.cplusplus.com/max_load_factor/ http://www.cplusplus.com/max_load_factor/ http://www.cplusplus.com/max_load_factor/ http://www.cplusplus.com/max_load_factor/ http://www.cplusplus.com/max_load_factor/ http://www.cplusplus.com/max_load_factor/ http://www.cplusplus.com/max_load_factor/ – Kapil Satija Apr 19 '16 at 19:26
  • Use all the above url in the input file and each URL must be in a separate line. Press enter after every line (i.e \n should be there) – Kapil Satija Apr 19 '16 at 19:27
  • 1
    You should put your comments in your question as a test input. – Biruk Abebe Apr 19 '16 at 19:28
  • I didnt find the 2nd and 3rd output line and to be honest I am too lazy to search for it. Anyhow you missed to say what output you expect to get and what is wrong with the output you get. – 463035818_is_not_an_ai Apr 19 '16 at 19:28
  • 1
    afaik adding stuff to `namespace std` is forbidden by the standard and causes undefined behavior – 463035818_is_not_an_ai Apr 19 '16 at 19:30
  • @tobi303 In certain cases it is allowed. specializing `std::hash` is on of those cases: http://stackoverflow.com/a/8157967/4342498 – NathanOliver Apr 19 '16 at 19:33
  • @NathanOliver whenever I think I understood something there is some exception. Learning C++ never stops being fun ;). Thanks for clarifying – 463035818_is_not_an_ai Apr 19 '16 at 19:41
  • @tobi303 No problem. I have been using it for years and I learn something new/different way to accomplish a task almost daily on this site. – NathanOliver Apr 19 '16 at 19:43
  • @KapilSatija I am getting this output.Please tell what were you expecting.http://imgur.com/rndKOTr – Gaurav Sehgal Apr 19 '16 at 19:44
  • It was not allowing to enter the URL that's why I have added it separately in the comments. – Kapil Satija Apr 20 '16 at 02:14
  • @Gaurav Sehgal:- Your output is differing slightly I think your input file has got some discripency. In my case the first URL in the input has the hit count of 8 and 3 URL has the hitcount of value 4. Although I am creating the max heap based upon the hitcount on every insertion but still the URL whose hitcount is 4 is placed at 3rd position instead of second one.That's my problem.I want to know what mistake I am doing which is resulting in this problem. – Kapil Satija Apr 20 '16 at 02:26
  • @tobi : Thanks for sharing the information.I will also add the output file the better understanding of problem. – Kapil Satija Apr 20 '16 at 02:32
  • I am pasting the output of my screen as I am not getting any option of uploading the snapshot. OUTPUT SAMPLE:- 8 http://www.cplusplus.com/max_load_factor/ 3 https://web.whatsapp.com/ 4 https://mail.google.com/mail/u/0/#inbox 1 http://en.cppreference.com/w/cpp/language/lambda 1 https://www.google.co.in/webhp?sourceid=chrome-instant&ion=1&espv=2&ie=UTF-8# q=invariant%20meaning 1 http://stackoverflow.com/questions/11699083/where-can-i-find-all-the-exceptio n-guarantees-for-the-standard-containers-and-al 1 http://www.cplusplus.com/reference/algorithm/find_if/ – Kapil Satija Apr 20 '16 at 04:32
  • @KapilSatija .Yes .I guess there were some extra spaces at the end of some url's and were being treated separately. – Gaurav Sehgal Apr 20 '16 at 05:09
  • @KapilSatija I have edited the question for you.Please see if this is your actual problem. – Gaurav Sehgal Apr 20 '16 at 06:14
  • @Gaurav Sehgal:- Thanks a lot Gaurav. You have understood my problem now. I am expecting that it will be sorted because on each insertion of the element I am applying make_heap . Have you identified the reason also for the same. – Kapil Satija Apr 20 '16 at 14:01

1 Answers1

0

Finally after some debugging i have found the problem.The problem is in the way you are interpreting how max_heap() works.

Consider this.

url1 occurs 8 times
url2 occurs 4 times
url3 occurs 3 times

After a call to max_heap() what you will get is

maxHeap[0]=8                     8
maxHeap[1]=4                   4   3
maxHeap[2]=3

Or you can also get

maxHeap[0]=8                     8
maxHeap[1]=3                  3     4
maxHeap[2]=4

Both of the above are maxHeaps but you are considering that only the first heap can occur and so in the below code you are just printing maxHeap content without realizing that you may be printing second heap.

  void printHeap()
{
    for_each(maxHeap.begin(),maxHeap.end(),[](urlInfo* u){
        cout<<endl<<u->getHitCount()<<"  "<<u->getURL();
    });
}

To resolve this.One way is to after picking up maxHeap[0] .You delete first element and again call max_heap before picking up maxHeap[0] again.Or you can also have something like below.

 while(maxHeap.size()>0){
    cout<<(*maxHeap.begin())->getHitCount()<<" "<<(*maxHeap.begin())->getURL()<<endl;
    std::pop_heap(maxHeap.begin(),maxHeap.end(),urlInfoMaxHeap());maxHeap.pop_back();}

In the above code pop_heap() will move the topmost element(which has the highest priority according to your implementation of compare function which you pass to make_heap()) to the end and heapify again. You can then delete the last element then.

Also i did not find the use of the below in your code

 vector<urlInfo*>::iterator vItr;
        vItr = find(maxHeap.begin(),maxHeap.end(),u);
        if(vItr!=maxHeap.end())
        {
            maxHeap.erase(vItr);
            maxHeap.push_back(u);
        }
Gaurav Sehgal
  • 7,422
  • 2
  • 18
  • 34
  • Thanks a lot for the detailed explanation. I was also thinking the same but was not sure before your explanation.Two Reason why I have added the code for erasing and again inserting. I am thinking. 1) max_heap will not update the heap since no insertion has happened int the container if i remove the code snippet you have pointed(so forced the insertion after deletion). 2) I was also not sure whether automatic heapify will happen after the updation of hitcount (without the call to max_heap in the end). – Kapil Satija Apr 20 '16 at 17:26
  • You might want to have a look at `push_heap` http://www.cplusplus.com/reference/algorithm/push_heap/ and if the solution worked for you, you could accept this as an answer. – Gaurav Sehgal Apr 20 '16 at 17:37
  • If on updating the hitcount automatic heapify can work then I can directly switch to the priority_queue if there is any way by which either I can retrigger heapify on the priority_queue container. Any idea about this? – Kapil Satija Apr 20 '16 at 18:07
  • @KapilSatija AFAIK updating a member of priority_queue will not update its ordering until you remove that element and insert it again.Also you cannot update any member of priority_queue you want.I don't see `priority_queue` working here – Gaurav Sehgal Apr 20 '16 at 18:14
  • @KapilSatija Think about this how will you check whether a member exists in priority queue or not. – Gaurav Sehgal Apr 20 '16 at 18:19
  • :- That was the reason I did not used the priority queue here. Earlier I was trying to use that but could not able to use it because of the challenge that you mentioned. Thanks a lot for clarifying all my doubts. – Kapil Satija Apr 21 '16 at 04:48
  • @KapilSatija Your welcome.If this answer helped you, please accept this as answer. http://meta.stackexchange.com/questions/23138/how-to-accept-the-answer-on-stack-overflow – Gaurav Sehgal Apr 21 '16 at 09:34