1

I am in trouble handling duplicated elements in arrays. For example, in the problem of finding all pairs of integers within an array which sum to a specified value, here is my implementation:

vector<pair<int, int>> find_all_pairs_with_sum(int data[], int length, int sum)
{
    assert(data && length>1);

    vector<pair<int, int>> res;

    sort(data, data+length);

    int first = 0; 
    int last = length - 1; 
    while (first < last) 
    { 
        int s = data[first] + data[last]; 
        if (s == sum) 
        {
            res.push_back(make_pair(data[first], data[last]));

            ++first; 
            --last; 
        } 
        else 
        { 
            if (s < sum)
                ++first; 
            else
                --last; 
        } 
    } 

    return res;
}

The problem will occur when the array contains duplicated elements. For example, when

int data[] = {3, 4, 3, 4};
int sum = 7;

The program will only give two pairs (3,4) (3,4). However, in this case, the correct answer should be four pairs (3,4) (3,4) (3,4) (3,4) (as 4=2x2). How can I modify the code in order to correctly handle such cases (hopefully still in O(n logn))? It seems the change should be made in if (s==sum) scope while updating first and last, but I just cannot make it right.

Note that: I know another way that can handle this correctly by using a hash table to record occurrences of each elements. Please suggest how to work through this problem without using a hash table.

herohuyongtao
  • 49,413
  • 29
  • 133
  • 174
  • An interesting post: [STL - most efficient way to erae duplicates](http://stackoverflow.com/questions/1041620/most-efficient-way-to-erase-duplicates-and-sort-a-c-vector) – FoggyDay Jan 10 '14 at 08:27

1 Answers1

2

Your array gets sorted as

  Index: 0 1 2 3
Element: 3 3 4 4

When you find a sum, you increment first and decrement last, so each pair is only added once, not twice. Additionally, steping inward at any rate will always prevent you from getting both 1-3 and 0-2 (by index). You could make a preliminary pass to find duplicates, and use that information to add the pairs correctly:

vector<pair<int, int>> find_all_pairs_with_sum(int data[], int length, int sum)
{
    assert(data && length>1);

    vector<pair<int, int>> res;
    int i;

    sort(data, data+length);

    // there is more than one way to skin this cat...
    vector<pair<int, int>> vettedData;
    for(i = 0; i < length; i++) {
        if(i == 0 || vettedData[vettedData.size() - 1].first != data[i])
            vettedData.push_back(make_pair(data[i], 1));
        else
            vettedData[vettedData.size() - 1].second++;
    }

    int first = 0; 
    int last = vettedData.size() - 1; 
    while (first < last) 
    { 
        int s = vettedData[first].first + vettedData[last].first; 
        if (s == sum) 
        {
            int iterations = vettedData[first].second * vettedData[last].second;
            for(i = 0; i < iterations; i++)
                res.push_back(make_pair(vettedData[first].first, vettedData[last].first));

            ++first; 
            --last; 
        } 
        else 
        { 
            if (s < sum)
                ++first; 
            else
                --last; 
        } 
    } 

    return res;
}
herohuyongtao
  • 49,413
  • 29
  • 133
  • 174
Mad Physicist
  • 107,652
  • 25
  • 181
  • 264