0

I have two dictionaries that I am trying to join and save the matching indices in a separate dictionary like so:

public class MatchedPairs
    {
        public List<int> index1;
        public List<int> index2;

        public MatchedPairs()
        {
            this.index1 = new List<int>();
            this.index2 = new List<int>();
        }
    }

Dictionary<int, string> file1Dictionary = new Dictionary<int, string>();
Dictionary<int, string> file2Dictionary = new Dictionary<int, string>();

//Fill dictionaries with data from flat files
//...

var matchedKeys = file1Dictionary.Join(file2Dictionary, x => x.Value, y => y.Value, (x, y) => new { k1 = x.Key, k2 = y.Key });

Dictionary<int, MatchedPairs> matches = new Dictionary<int, MatchedPairs>(); 

foreach (var match in matchedKeys)
{
    matches.index1.Add(match.k1);
    matches.index2.Add(match.k2);
}

I receive an

Out of Memory exception

when executing this code because file1Dictionary and file2Dictionary objects have millions of entries in them.

Is there anything I can do to be able to match these large objects in memory/in C#. My alternative is to load the data into a SQL database and do the joining there. Thanks.

Bert Wagner
  • 851
  • 1
  • 11
  • 23
  • 2
    If you can do it in the database it's the best option. – Tim Schmelter Jun 22 '15 at 11:42
  • Have you run this as a 64-bit app? – Enigmativity Jun 22 '15 at 11:42
  • 1
    Actually, can you load the dictionaries into memory? Is it the join that throws the out-of-memory exception? – Enigmativity Jun 22 '15 at 11:43
  • Here is some info on a similar problem http://stackoverflow.com/a/20643387/4019425 . Preallocating like so might solve the problem: Dictionary file1Dictionary = new Dictionary(40000000); (setting the limit to 40000000 entries, increase if nessesairy) – Ignotus Jun 22 '15 at 11:45
  • Run the app as 64-bit as @Enigmativity suggested,also set gcAllowVeryLargeObjects to true (check this http://stackoverflow.com/questions/8641888/outofmemoryexception-when-adding-more-items-to-a-very-large-hashsetint32) – George Vovos Jun 22 '15 at 11:51
  • How large are your files? GBs? If your file is in a format to do so, you could open a stream to the files, read the files line by line, and do comparisons that way. Obviously not as performant, but an idea if you cannot load the entire file into memory. SQL is perfect for this though... – mallocation Jun 22 '15 at 11:58

1 Answers1

-1

I think your dictionary should be Dictionary < string, MatchedPairs > matches (not integer).

    class Program
    {
        static void Main(string[] args)
        {

           Dictionary<int, string> file1Dictionary = new Dictionary<int, string>();
           Dictionary<int, string> file2Dictionary = new Dictionary<int, string>();

           //Fill dictionaries with data from flat files
           //...
           Dictionary<string, List<int>> reverseDict1 = file1Dictionary.Keys.AsEnumerable()
               .Select(x => new { value = x, keys = file1Dictionary[x] })
               .GroupBy(x => x.keys, y => y.value)
               .ToDictionary(x => x.Key, y => y.ToList());

           Dictionary<string, List<int>> reverseDict2 = file1Dictionary.Keys.AsEnumerable()
               .Select(x => new { value = x, keys = file2Dictionary[x] })
               .GroupBy(x => x.keys, y => y.value)
               .ToDictionary(x => x.Key, y => y.ToList());

           Dictionary<string, MatchedPairs> matches = new Dictionary<string, MatchedPairs>();
           foreach(string key in reverseDict1.Keys)
           {
               matches.Add(key, new MatchedPairs(reverseDict1[key], reverseDict2[key]));
           }

        }
    }
    public class MatchedPairs
    {
        public List<int> index1 { get; set; }
        public List<int> index2 { get; set; }

        public MatchedPairs(List<int> l1, List<int> l2)
        {
            this.index1 = new List<int>(l1);
            this.index2 = new List<int>(l2);
        }
    }
jdweng
  • 33,250
  • 2
  • 15
  • 20