-1

I have to write an implementation of string that stores it's values on hard drive instead of ram (I know how stupid it sounds, but it's intended to teach us how different sorting algorithms work on ram and hard drive). This is what I've written so far:

class HDDArray : IEnumerable<int>
{
    private string filePath;

    public int this[int index]
    {
        get
        {
            using (var reader = new StreamReader(filePath))
            {
                string line = reader.ReadLine();

                for (int i = 0; i < index; i++)
                {
                    line = reader.ReadLine();
                }

                return Convert.ToInt32(line);
            }
        }
        set
        {
            using (var fs = File.Open(filePath, FileMode.OpenOrCreate, FileAccess.ReadWrite))
            {
                var reader = new StreamReader(fs);
                var writer = new StreamWriter(fs);

                for (int i = 0; i < index; i++)
                {
                    reader.ReadLine();
                }

                writer.WriteLine(value);
                writer.Dispose();
            }
        }
    }

    public int Length
    {
        get
        {
            int length = 0;

            using (var reader = new StreamReader(filePath))
            {
                while (reader.ReadLine() != null)
                {
                    length++;
                }
            }

            return length;
        }
    }

    public HDDArray(string file)
    {
        filePath = file;

        if (File.Exists(file))
            File.WriteAllText(file, String.Empty);
        else
            File.Create(file).Dispose();
    }

    public IEnumerator<int> GetEnumerator()
    {
        using (var reader = new StreamReader(filePath))
        {
            string line;
            while ((line = reader.ReadLine()) != null)
            {
                yield return Convert.ToInt32(line);
            }
        }
    }

    IEnumerator IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }
}

The problem I'm facing is when trying to edit a line (in the the set portion of the indexer) I end up adding a new line instead of editing the old one (it's pretty obvious why, I just can't figure how to fix it).

PoVa
  • 995
  • 9
  • 24

3 Answers3

1

Your array is designed to work with integers. Such a class is quite easy to create because the length of all numbers is 4 bytes.

class HDDArray : IEnumerable<int>, IDisposable
{
    readonly FileStream stream;
    readonly BinaryWriter writer;
    readonly BinaryReader reader;

    public HDDArray(string file)
    {
        stream = new FileStream(file, FileMode.Create, FileAccess.ReadWrite);
        writer = new BinaryWriter(stream);
        reader = new BinaryReader(stream);
    }

    public int this[int index]
    {
        get
        {
            stream.Position = index * 4;
            return reader.ReadInt32();
        }
        set
        {
            stream.Position = index * 4;
            writer.Write(value);
        }
    }

    public int Length
    {
        get
        {
            return (int)stream.Length / 4;
        }
    }

    public IEnumerator<int> GetEnumerator()
    {
        stream.Position = 0;
        while (reader.PeekChar() != -1)
            yield return reader.ReadInt32();
    }

    IEnumerator IEnumerable.GetEnumerator()
    {
        return GetEnumerator();
    }

    public void Dispose()
    {
        reader?.Dispose();
        writer?.Dispose();
        stream?.Dispose();
    }
}

Since the size of each array element is known, we can simply move to stream by changing its Position property.

BinaryWriter and BinaryReader are very comfortable to write and read numbers.

Open stream is a very heavy operation. Hence do it once when you create the class. At the end of the work, you need to clean up after themselves. So I implemented the IDisposable interface.

Usage:

HDDArray arr = new HDDArray("test.dat");

Console.WriteLine("Length: " + arr.Length);

for (int i = 0; i < 10; i++)
    arr[i] = i;

Console.WriteLine("Length: " + arr.Length);

foreach (var n in arr)
    Console.WriteLine(n);

// Console.WriteLine(arr[20]); // Exception!

arr.Dispose(); // release resources
Alexander Petrov
  • 13,457
  • 2
  • 20
  • 49
  • I had to add `Encoding.ASCII` to writer and reader for it to not crash. Seems to be working fine, thanks. – PoVa May 30 '17 at 10:28
0

I stand to be corrected, but I dont think there is an easy way to re-write a specific line, so you will probably find it easier to rewrite the file - modifying that line.

You could change your set code as follows:

  set
  {
    var allLinesInFile = File.ReadAllLines(filepath);
    allLinesInFile[index] = value;
    File.WriteAllLines(filepath, allLinesInFile);
  }

Goes without saying that there should be some safety checks in there to check the file exists and index < allLinesInFile.Length

Steve Land
  • 4,852
  • 2
  • 17
  • 36
  • Of course, if the reason the data is on disk is because it doesn't fit in memory, this is going to be a bit of a problem. One could stream from one file into a second file, then rename the new one over the old. – Ben Voigt May 29 '17 at 19:05
  • Good point @BenVoigt , writing to a temporary file would be a better solution if the files in question are likely to be very large. – Steve Land May 29 '17 at 19:08
0

I think for the sake of homework of sorting algorithms you needn't bother yourself memory size issues.

Of course please add checking file existing to read.

Note: Line counting in example starts from 0.

string[] lines = File.ReadAllLines(filePath);

using (StreamWriter writer = new StreamWriter(filePath))
{
   for (int currentLineNmb = 0; currentLineNmb < lines.Length; currentLineNmb++ )
   {
       if (currentLineNmb == lineToEditNmb)
       {
          writer.WriteLine(lineToWrite);
          continue;
       }
       writer.WriteLine(lines[currentLineNmb]);                
   }
}