If you start the other way, with X < Y < Z by incrementing Y and Z up to a limit, you can gain some efficiencies. Once Z^3 > X^3 + Y^3 + 1, you can skip to the next Y value due to the concavity of the cubic function.
This implementation in C# works pretty fast on a laptop:
UInt64 setsFound = 0;
UInt64 xlim = 10000;
UInt64 ylim = 1000000;
UInt64 zlim = 10000000;
//int ctr = 0;
Console.WriteLine("The first 23 sets ordered by increasing x.");
Parallel.For(1, (long)xlim, new ParallelOptions { MaxDegreeOfParallelism = 4 }, i =>
//for (UInt64 i = 0; i < xlim; i++)
{
UInt64 x = (UInt64)i;
UInt64 xCu = x * x * x;
int zFails = 0;
for (UInt64 y = x + 1; y < ylim; y++)
{
UInt64 yCu = y * y * y;
zFails = 0;
for (UInt64 z = y + 1; z < zlim & zFails < 1; z++)
{
UInt64 zCu = z * z * z;
if (xCu + yCu - zCu == 1)
{
Console.WriteLine(String.Format("{0}: {1}^3 + {2}^3 - {3}^3 = 1", setsFound, x, y, z));
setsFound++;
}
else if (zCu > xCu + yCu - 1)
{
zFails++;
}
}
}
}
);
Obviously you can take out the parallelization. Also, here are the first 19 elements in that set (computer is still running, I'll try to post the last 4 later):

(source: yfrog.com)