0
#ifndef _LIBCPP_HASH
#define _LIBCPP_HASH

#include <iostream>
#include <string>
#include <string.h>
#include <iomanip>
#define PERTERB_SHIFT 5
using namespace std;

int FAIL = 0;

template <typename myType>
class HashMap
{
  public:

    HashMap()
    {
      Size = 8;
      Resize = 2;
      Keys = new int[8];
      Values = new myType[8];
      typesize = sizeof(myType);
      Fill = 0;
      memset(Keys, -1, 32);
      memset(Values, 0, 8 * typesize);
    }

    ~HashMap()
    {
      delete[] Keys;
      delete[] Values;
    }

    HashMap(const HashMap &table)
    {
      Size = table.Size;
      Keys = table.Keys;
      Values = table.Values;
    }

    int GetSize() { return Size; }
    // can't return get(key) because no binding of reference int to temporary int, so has the same body as get..
    myType &operator[] (int hash)
    {
      register size_t perterb = hash;
      register unsigned long j = 0;
      register int temp = 0;
      register int index = 0;
      while ( Keys[index] != hash && Keys[index] != -1)
      {
        j = 5 * j + 1 + perterb;
        perterb >>= PERTERB_SHIFT;
        index = j % (2 << Resize);
        //cout << "j : " << j << " temp: " << temp << " hash: " << hash << " index: " << index << endl;
      }

      if (Fill == (int)(Size * 2 / 3))
      {
        cout << "here" << endl;
        Resize <<= 1;
        int* tempkey = new int[Size*2];
        myType* tempvalue = new myType[Size*2];
        memset(tempkey, -1, Size*8);
        memset(tempvalue, 0, Size*2*typesize);
        copy(Keys, Keys + Size, tempkey);
        copy(Values, Values + Size, tempvalue);
        delete[] Values; delete[] Keys;
        Keys = tempkey; Values = tempvalue;
        Size <<= 1;
      }
      if (Keys[index] != hash) Fill ++;
      Keys[index] = hash;
      return Values[index];

    }

    bool insert(int key, myType value)
    {
      int index = key % Size;
      short temp = 0;
      while ((Keys[index] != key) && (Keys[index] != -1) && temp < Size)
      {
        index ++;
        temp ++;
      }
      if (temp == Size)
      {
        int s = static_cast<int>(Size*4);
        int* tempkey = new int[s];
        myType* tempvalue = new myType[s];
        for (int i = Size; i < s; i++)
        {
          tempkey[i] = -1;
          tempvalue[i] = 0;
        }
        copy(Keys, Keys + Size, tempkey);
        copy(Values, Values + Size, tempvalue);
        delete[] Values; delete[] Keys;
        Size *= 4;
        Keys = tempkey;
        Values = tempvalue;
      }
      Keys[index] = key;
      Values[index] = value;
      return true;
    }

    myType get(int key)
    {
      int index = key % Size;
      short count = 0;
      while (Keys[index] != key && count < Size)
      {
        index ++;
        count ++;
      }
      if (count == Size)
      {
        try
        {
          throw key;
        }
        catch (int er)
        {
          cout << "KeyError: " << er << endl;
        }
        return FAIL;
      }
      return Values[index];
    }

    myType pop(int key)
    {
      int index = key % Size;
      short count = 0;
      while (Keys[index] != key && count < Size)
      {
        index ++;
        count ++;
      }
      if ( count == Size )
      {
        try
        {
          throw key;
        }
        catch (int er)
        {
          cout << "KeyError: " << er << endl;
        }
        return FAIL;
      }
      myType temp = Values[index];
      Values[index] = 0;
      Keys[index] = -1;
      return temp;
    }

    void Print()
    {
      cout << "index\t" << "key\t" << "value\t" << endl;
      for (int i = 0; i < Size; i++)
      {
        cout << i << "\t" << Keys[i] << "\t" << Values[i] << "\t" << endl;
      }
    }
  private:
    int Size;
    int Resize;
    int* Keys;
    myType* Values;
    short typesize;
    int Fill;
};

#endif

This is my code currently. The hash function is based off of Python dictionaries. The issue arises when trying to reference a hashmap entry after it has been inserted. If there was a collision during insertion, the [] operator will treat the reference as if it is trying to insert the value again and will stop at the first key = -1, instead of iterating until it finds the hash value in the key array. I'm able to address this issue with insert(), pop(), and get(), but I don't know how to deal with it for the [] operator, since it is used for both reference and assignments. It needs to know if it is followed by an assignment operator before it tries to either insert or get the hash.

I spent some time in the Python source code trying to figure out how they dealt with this issue but I couldn't figure it out.

I know this is a strange question, but I would really appreciate any help.

ChrisMM
  • 8,448
  • 13
  • 29
  • 48
Kyle Burns
  • 367
  • 2
  • 10
  • 1
    Python isn't C++. Python has a data model where `dict[x]` gets translated into `dict.__getitem__(x)` and `dict[x] = y` gets translated into `dict.__setitem__(x, y)`. In other words, the `[]` notation is a special part of the language, with inherent support – juanpa.arrivillaga Dec 23 '19 at 19:15
  • @juanpa.arrivillaga My question is how that is done – Kyle Burns Dec 23 '19 at 19:16
  • By writing a compiler/runtime that understands that data model. – juanpa.arrivillaga Dec 23 '19 at 19:17
  • @juanpa.arrivillaga So there is essentially no feasible way for me to do this? – Kyle Burns Dec 23 '19 at 19:18
  • `operator[]` in C++ maps will try to insert the key into the map with a default constructed value. If it already exists, great. It then returns a reference to the object. In Visual Studio, it is defined as `mapped_type& operator[](const key_type& _Keyval) { return _Try_emplace(_Keyval).first->_Myval.second; }` – ChrisMM Dec 23 '19 at 19:18
  • 1
    Your copy constructor isn't right and you didn't provide an assignment operator so you'll use the default one which also isn't right. You should read about [The rule of 3/5/0](https://stackoverflow.com/questions/4172722/what-is-the-rule-of-three). – François Andrieux Dec 23 '19 at 19:19
  • I am not C++ expert. In fact, I know practically nothing about C++. I don't know C++'s data model. Do C++ map types generally use the `map_object[x]` and `map_object[x] = y` notation? – juanpa.arrivillaga Dec 23 '19 at 19:19
  • 1
    Dare I ask, why is your `HashMap::insert` basically linear-searching from inception, whereas your `operator[]` does *not* ? The route taken during an exhausted search to eventually locate where to *add* an item had damn well better be the same route taken during a probe to find an *existing* item. I don't see any actual *hash* operation in your insert method, and that's a huge red flag. – WhozCraig Dec 23 '19 at 19:20
  • 1
    @juanpa.arrivillaga The standard library [`std::map`](https://en.cppreference.com/w/cpp/container/map) does support that notation. It has a meaning distinct from the `find` member (which doesn't default construct missing elements) and the `insert` member (which reports if the element already existed). – François Andrieux Dec 23 '19 at 19:20
  • @WhozCraig Apologies. This is from the last stable save I have of the code. My current version has the same hash function throughout the class. – Kyle Burns Dec 23 '19 at 19:25
  • 1
    I would also point out that resizing a hash system isn't as simple as just making more space, copying keys, and calling it good. The hashing operation is based on the *original* hash modulo table size. When you change the table size, you change that modulo, and therefore change the modulo calculation for *each key*. In short, a table-size change requires you *rehash* the prior content, one by one, into their new homes in the new table, not just copy verbatim into same slots. At least that's the way I've always done it for the last three decades; YMMV. – WhozCraig Dec 23 '19 at 19:27
  • It's not possible to know if the result of `operator[]` will be assigned to or read from. But you can return an object of a type that *could* make the distinction. You would need to write a proxy class that wraps a reference to the actual result of the `operator[]` operation. If that type's `operator=` is called, the result is being assigned to. If `operator T` (the conversion operator) is called, the result if being read from. – François Andrieux Dec 23 '19 at 19:28
  • @WhozCraig You're right about that. I'm doing this as a learning exercise and I definitely implemented the resizing incorrectly. Thank you for mentioning that! – Kyle Burns Dec 23 '19 at 19:29
  • 1
    @kyleburns I think you can eliminate a lot of complexity by using `std::vector` instead of dynamically allocating arrays. It would allow you to focus on the complexities unique to maps instead of the details of memory management. – François Andrieux Dec 23 '19 at 19:31
  • 1
    Assuming the actual hash-operation is implemented correctly, and assuming the resizing is done correctly, etc. containers like `std::map` and `std::unordered_map` both require the value-type to support both copy-construction and default construction when using `operator []`. One way or another, `operator []` will return a reference to an object, be it an object that existed prior, or one that was default-constructed and placed at the key index. If you type doesn't support both constructions, `operator[]` cannot be used (but you can still use `insert`, `find`, etc). – WhozCraig Dec 23 '19 at 19:35
  • This doesn't address the question, but identifiers that begin with an underscore followed by a capital letter (`_LIBCPP_HASH`) and names that contain two consecutive underscores are reserved for use by the implementation. Don't use them in your code. – Pete Becker Dec 23 '19 at 19:53
  • @WhozCraig It turns out the resizing is what was causing my issue in the first place and my overloaded operator works fine as is now that I fixed it. Thanks for your insight! – Kyle Burns Dec 23 '19 at 20:49

1 Answers1

1

Your [] can return a proxy type, which in turn has an operator int()&& for reading and operator =(int const&)&& for writing.

There are issues with this in that it interacts with auto poorly, but it does work.

This is what std vector bool does to support bit compressed data. It was viewed as a mistake, but partly for unrelated reasons.

Yakk - Adam Nevraumont
  • 262,606
  • 27
  • 330
  • 524