So I ended up creating my own BigStringBuilder function in the end. It's a list where each list element (or page) is a char array (type List<char[]>
).
Providing you're using 64 bit Windows, you can now easily surpass the 2 billion character element limit. I managed to test creating a giant string around 32 gigabytes large (needed to increase virtual memory in the OS first, otherwise I could only get around 7GB on my 8GB RAM PC). I'm sure it handles more than 32GB easily. In theory, it should be able to handle around 1,000,000,000 * 1,000,000,000 chars or one quintillion characters, which should be enough for anyone.
Speed-wise, some quick tests show that it's only around 33% slower than a StringBuilder when appending. I got very similar performance if I went for a 2D jagged char array (char[][]
) instead of List<char[]>
, but Lists are simpler to work with, so I stuck with that.
Hope somebody else finds it useful! There may be bugs, so use with caution. I tested it fairly well though.
// A simplified version specially for StackOverflow
public class BigStringBuilder
{
List<char[]> c = new List<char[]>();
private int pagedepth;
private long pagesize;
private long mpagesize; // https://stackoverflow.com/questions/11040646/faster-modulus-in-c-c
private int currentPage = 0;
private int currentPosInPage = 0;
public BigStringBuilder(int pagedepth = 12) { // pagesize is 2^pagedepth (since must be a power of 2 for a fast indexer)
this.pagedepth = pagedepth;
pagesize = (long)Math.Pow(2, pagedepth);
mpagesize = pagesize - 1;
c.Add(new char[pagesize]);
}
// Indexer for this class, so you can use convenient square bracket indexing to address char elements within the array!!
public char this[long n] {
get { return c[(int)(n >> pagedepth)][n & mpagesize]; }
set { c[(int)(n >> pagedepth)][n & mpagesize] = value; }
}
public string[] returnPagesForTestingPurposes() {
string[] s = new string[currentPage + 1];
for (int i = 0; i < currentPage + 1; i++) s[i] = new string(c[i]);
return s;
}
public void clear() {
c = new List<char[]>();
c.Add(new char[pagesize]);
currentPage = 0;
currentPosInPage = 0;
}
public void fileOpen(string path)
{
clear();
StreamReader sw = new StreamReader(path);
int len = 0;
while ((len = sw.ReadBlock(c[currentPage], 0, (int)pagesize)) != 0) {
if (!sw.EndOfStream) {
currentPage++;
if (currentPage > (c.Count - 1)) c.Add(new char[pagesize]);
}
else {
currentPosInPage = len;
break;
}
}
sw.Close();
}
// See: https://stackoverflow.com/questions/373365/how-do-i-write-out-a-text-file-in-c-sharp-with-a-code-page-other-than-utf-8/373372
public void fileSave(string path) {
StreamWriter sw = File.CreateText(path);
for (int i = 0; i < currentPage; i++) sw.Write(new string(c[i]));
sw.Write(new string(c[currentPage], 0, currentPosInPage));
sw.Close();
}
public long length() {
return (long)currentPage * (long)pagesize + (long)currentPosInPage;
}
public string ToString(long max = 2000000000) {
if (length() < max) return substring(0, length());
else return substring(0, max);
}
public string substring(long x, long y) {
StringBuilder sb = new StringBuilder();
for (long n = x; n < y; n++) sb.Append(c[(int)(n >> pagedepth)][n & mpagesize]); //8s
return sb.ToString();
}
public bool match(string find, long start = 0) {
//if (s.Length > length()) return false;
for (int i = 0; i < find.Length; i++) if (i + start == find.Length || this[start + i] != find[i]) return false;
return true;
}
public void replace(string s, long pos) {
for (int i = 0; i < s.Length; i++) {
c[(int)(pos >> pagedepth)][pos & mpagesize] = s[i];
pos++;
}
}
public void Append(string s)
{
for (int i = 0; i < s.Length; i++)
{
c[currentPage][currentPosInPage] = s[i];
currentPosInPage++;
if (currentPosInPage == pagesize)
{
currentPosInPage = 0;
currentPage++;
if (currentPage == c.Count) c.Add(new char[pagesize]);
}
}
}
}