Use Reservoir Sampling to solve this in in a single pass
If you want to randomly choose one or more items from a list of items where the length of that list is not known in advance, you can use Reservoir Sampling.
We can take advantage of that, along with the File.ReadLines()
method (which avoids buffering all the lines in memory) to write a single-pass algorithm that will read each line just once, without buffering.
The sample code below shows a generalised solution that lets you randomly select any number of lines. For your case, N = 1.
The sample code also includes a test program to prove that the lines are chosen randomly with a uniform distribution.
(To see how this code works, see the Wiki article I linked above.)
using System;
using System.IO;
using System.Collections.Generic;
namespace Demo
{
internal class Program
{
public static List<string> RandomlyChooseLinesFromFile(string filename, int n, Random rng)
{
var result = new List<string>(n);
int index = 0;
foreach (var line in File.ReadLines(filename))
{
if (index < n)
{
result.Add(line);
}
else
{
int r = rng.Next(0, index + 1);
if (r < n)
result[r] = line;
}
++index;
}
return result;
}
// Test RandomlyChooseLinesFromFile()
private static void Main(string[] args)
{
Directory.CreateDirectory("C:\\TEST");
string testfile = "C:\\TEST\\TESTFILE.TXT";
File.WriteAllText(testfile, "0\n1\n2\n3\n4\n5\n6\n7\n8\n9");
var rng = new Random();
int trials = 100000;
var counts = new int[10];
for (int i = 0; i < trials; ++i)
{
string line = RandomlyChooseLinesFromFile(testfile, 1, rng)[0];
int index = int.Parse(line);
++counts[index];
}
// If this algorithm is correct, each line should be chosen
// approximately 10% of the times.
Console.WriteLine("% times each line was chosen:\n");
for (int i = 0; i < 10; ++i)
{
Console.WriteLine("{0} = {1}%", i, 100*counts[i]/(double)trials);
}
}
}
}