If it is ok to have them in unicode order, you could:
Encoding enc = (Encoding)Encoding.GetEncoding("iso-2022-jp").Clone();
enc.EncoderFallback = new EncoderReplacementFallback("");
char[] chars = new char[1];
byte[] bytes = new byte[16];
using (StreamWriter sw = new StreamWriter(@"C:\temp\iso-2022-jp.txt"))
{
for (int i = 0; i <= char.MaxValue; i++)
{
chars[0] = (char)i;
int count = enc.GetBytes(chars, 0, 1, bytes, 0);
if (count != 0)
{
sw.WriteLine(chars[0]);
}
}
}
If you want to order it by byte sequence, you could:
Encoding enc = (Encoding)Encoding.GetEncoding("iso-2022-jp").Clone();
enc.EncoderFallback = new EncoderReplacementFallback("");
char[] chars = new char[1];
byte[] bytes = new byte[16];
var lst = new List<Tuple<byte[], char>>();
for (int i = 0; i <= char.MaxValue; i++)
{
chars[0] = (char)i;
int count = enc.GetBytes(chars, 0, 1, bytes, 0);
if (count != 0)
{
var bytes2 = new byte[count];
Array.Copy(bytes, bytes2, count);
lst.Add(Tuple.Create(bytes2, chars[0]));
}
}
lst.Sort((x, y) =>
{
int min = Math.Min(x.Item1.Length, y.Item1.Length);
for (int i = 0; i < min; i++)
{
int cmp = x.Item1[i].CompareTo(y.Item1[i]);
if (cmp != 0)
{
return cmp;
}
}
return x.Item1.Length.CompareTo(y.Item1.Length);
});
using (StreamWriter sw = new StreamWriter(@"C:\temp\iso-2022-jp.txt"))
{
foreach (var tuple in lst)
{
sw.WriteLine(tuple.Item2);
// This will print the full byte sequence necessary to
// generate the char. Note that iso-2022-jp uses escape
// sequences to "activate" subtables and to deactivate them.
//sw.WriteLine("{0}: {1}", tuple.Item2, string.Join(",", tuple.Item1.Select(x => x.ToString("x2"))));
}
}
or with a different sorting order (length first):
lst.Sort((x, y) =>
{
int cmp2 = x.Item1.Length.CompareTo(y.Item1.Length);
if (cmp2 != 0)
{
return cmp2;
}
int min = Math.Min(x.Item1.Length, y.Item1.Length);
for (int i = 0; i < min; i++)
{
int cmp = x.Item1[i].CompareTo(y.Item1[i]);
if (cmp != 0)
{
return cmp;
}
}
return 0;
});
Note that in all the examples I'm only generating the chars of the basic BMP plane. I don't think that characters outside the basic BMP plane are included in any encoding... If necessary I can modify the code to support it.
Just out of curiousity, the first version of the code with handling of non-BMP characters (that aren't present in iso-2022-jp):
Encoding enc = (Encoding)Encoding.GetEncoding("iso-2022-jp").Clone();
enc.EncoderFallback = new EncoderReplacementFallback("");
byte[] bytes = new byte[16];
using (StreamWriter sw = new StreamWriter(@"C:\temp\iso-2022-jp.txt"))
{
int max = -1;
for (int i = 0; i <= 0x10FFFF; i++)
{
if (i >= 0xD800 && i <= 0xDFFF)
{
continue;
}
string chars = char.ConvertFromUtf32(i);
int count = enc.GetBytes(chars, 0, chars.Length, bytes, 0);
if (count != 0)
{
sw.WriteLine(chars);
max = i;
}
}
Console.WriteLine("maximum codepoint: {0}", max);
}