5

I'm working on a program which reads millions of floating point numbers from a text file. This program runs inside of a game that I'm designing, so I need it to be fast (I'm loading an obj file). So far, loading a relatively small file takes about a minute (without precompilation) because of the slow speed of Convert.ToSingle(). Is there a faster way to do this?

EDIT: Here's the code I use to parse the Obj file

http://pastebin.com/TfgEge9J

using System;
using System.IO;
using System.Collections.Generic;
using OpenTK.Math;
using System.Drawing;
using PlatformLib;

public class ObjMeshLoader
{
    public static StreamReader[] LoadMeshes(string fileName)
    {
        StreamReader mreader = new StreamReader(PlatformLib.Platform.openFile(fileName));
        MemoryStream current = null;
        List<MemoryStream> mstreams = new List<MemoryStream>();
        StreamWriter mwriter = null;

        if (!mreader.ReadLine().Contains("#"))
        {
            mreader.BaseStream.Close();
            throw new Exception("Invalid header");
        }

        while (!mreader.EndOfStream)
        {
            string cmd = mreader.ReadLine();
            string line = cmd;
            line = line.Trim(splitCharacters);
            line = line.Replace("  ", " ");

            string[] parameters = line.Split(splitCharacters);
            if (parameters[0] == "mtllib")
            {
                loadMaterials(parameters[1]);
            }

            if (parameters[0] == "o")
            {
                if (mwriter != null)
                {
                    mwriter.Flush();
                    current.Position = 0;
                }

                current = new MemoryStream();
                mwriter = new StreamWriter(current);
                mwriter.WriteLine(parameters[1]);
                mstreams.Add(current);
            }
            else
            {
                if (mwriter != null)
                {
                    mwriter.WriteLine(cmd);
                    mwriter.Flush();
                }
            }
        }

        mwriter.Flush();
        current.Position = 0;
        List<StreamReader> readers = new List<StreamReader>();

        foreach (MemoryStream e in mstreams)
        {
            e.Position = 0;
            StreamReader sreader = new StreamReader(e);
            readers.Add(sreader);
        }

        return readers.ToArray();
    }

    public static bool Load(ObjMesh mesh, string fileName)
    {
        try
        {
            using (StreamReader streamReader = new StreamReader(Platform.openFile(fileName)))
            {
                Load(mesh, streamReader);
                streamReader.Close();
                return true;
            }
        }
        catch { return false; }
    }

    public static bool Load2(ObjMesh mesh, StreamReader streamReader, ObjMesh prevmesh)
    {
        if (prevmesh != null)
        {
            //mesh.Vertices = prevmesh.Vertices;
        }

        try
        {
            //streamReader.BaseStream.Position = 0;
            Load(mesh, streamReader);
            streamReader.Close();
#if DEBUG
            Console.WriteLine("Loaded "+mesh.Triangles.Length.ToString()+" triangles and"+mesh.Quads.Length.ToString()+" quadrilaterals parsed, with a grand total of "+mesh.Vertices.Length.ToString()+" vertices.");
#endif
            return true;
        }
        catch (Exception er) { Console.WriteLine(er); return false; }
    }

    static char[] splitCharacters = new char[] { ' ' };
    static List<Vector3> vertices;
    static List<Vector3> normals;
    static List<Vector2> texCoords;
    static Dictionary<ObjMesh.ObjVertex, int> objVerticesIndexDictionary;
    static List<ObjMesh.ObjVertex> objVertices;
    static List<ObjMesh.ObjTriangle> objTriangles;
    static List<ObjMesh.ObjQuad> objQuads;
    static Dictionary<string, Bitmap> materials = new Dictionary<string, Bitmap>();

    static void loadMaterials(string path)
    {
        StreamReader mreader = new StreamReader(Platform.openFile(path));
        string current = "";
        bool isfound = false;

        while (!mreader.EndOfStream)
        {
            string line = mreader.ReadLine();
            line = line.Trim(splitCharacters);
            line = line.Replace("  ", " ");

            string[] parameters = line.Split(splitCharacters);

            if (parameters[0] == "newmtl")
            {
                if (materials.ContainsKey(parameters[1]))
                {
                    isfound = true;
                }
                else
                {
                    current = parameters[1];
                }
            }

            if (parameters[0] == "map_Kd")
            {
                if (!isfound)
                {
                    string filename = "";
                    for (int i = 1; i < parameters.Length; i++)
                    {
                        filename += parameters[i];
                    }

                    string searcher = "\\" + "\\";

                    filename.Replace(searcher, "\\");
                    Bitmap mymap = new Bitmap(filename);
                    materials.Add(current, mymap);
                    isfound = false;
                }
            }
        }
    }

    static float parsefloat(string val)
    {
        return Convert.ToSingle(val);
    }

    int remaining = 0;

    static string GetLine(string text, ref int pos)
    {
        string retval = text.Substring(pos, text.IndexOf(Environment.NewLine, pos));
        pos = text.IndexOf(Environment.NewLine, pos);
        return retval;
    }

    static void Load(ObjMesh mesh, StreamReader textReader)
    {
        //try {
        //vertices = null;
        //objVertices = null;
        if (vertices == null)
        {
            vertices = new List<Vector3>();
        }

        if (normals == null)
        {
            normals = new List<Vector3>();
        }

        if (texCoords == null)
        {
            texCoords = new List<Vector2>();
        }

        if (objVerticesIndexDictionary == null)
        {
            objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>();
        }

        if (objVertices == null)
        {
            objVertices = new List<ObjMesh.ObjVertex>();
        }

        objTriangles = new List<ObjMesh.ObjTriangle>();
        objQuads = new List<ObjMesh.ObjQuad>();

        mesh.vertexPositionOffset = vertices.Count;

        string line;
        string alltext = textReader.ReadToEnd();
        int pos = 0;

        while ((line = GetLine(alltext, pos)) != null)
        {
            if (line.Length < 2)
            {
                break;
            }

            //line = line.Trim(splitCharacters);
            //line = line.Replace("  ", " ");

            string[] parameters = line.Split(splitCharacters);

            switch (parameters[0])
            {

                case "usemtl":
                    //Material specification
                    try
                    {
                        mesh.Material = materials[parameters[1]];
                    }
                    catch (KeyNotFoundException)
                    {
                        Console.WriteLine("WARNING: Texture parse failure: " + parameters[1]);
                    }

                    break;
                case "p": // Point
                    break;
                case "v": // Vertex
                    float x = parsefloat(parameters[1]);
                    float y = parsefloat(parameters[2]);
                    float z = parsefloat(parameters[3]);
                    vertices.Add(new Vector3(x, y, z));
                    break;
                case "vt": // TexCoord
                    float u = parsefloat(parameters[1]);
                    float v = parsefloat(parameters[2]);
                    texCoords.Add(new Vector2(u, v));
                    break;
                case "vn": // Normal
                    float nx = parsefloat(parameters[1]);
                    float ny = parsefloat(parameters[2]);
                    float nz = parsefloat(parameters[3]);
                    normals.Add(new Vector3(nx, ny, nz));
                    break;
                case "f":
                    switch (parameters.Length)
                    {
                        case 4:
                            ObjMesh.ObjTriangle objTriangle = new ObjMesh.ObjTriangle();
                            objTriangle.Index0 = ParseFaceParameter(parameters[1]);
                            objTriangle.Index1 = ParseFaceParameter(parameters[2]);
                            objTriangle.Index2 = ParseFaceParameter(parameters[3]);
                            objTriangles.Add(objTriangle);
                            break;
                        case 5:
                            ObjMesh.ObjQuad objQuad = new ObjMesh.ObjQuad();
                            objQuad.Index0 = ParseFaceParameter(parameters[1]);
                            objQuad.Index1 = ParseFaceParameter(parameters[2]);
                            objQuad.Index2 = ParseFaceParameter(parameters[3]);
                            objQuad.Index3 = ParseFaceParameter(parameters[4]);
                            objQuads.Add(objQuad);
                            break;
                    }
                    break;
            }
        }
        //}catch(Exception er) {
        //  Console.WriteLine(er);
        //  Console.WriteLine("Successfully recovered. Bounds/Collision checking may fail though");
        //}
        mesh.Vertices = objVertices.ToArray();
        mesh.Triangles = objTriangles.ToArray();
        mesh.Quads = objQuads.ToArray();
        textReader.BaseStream.Close();
    }

    public static void Clear()
    {
        objVerticesIndexDictionary = null;
        vertices = null;
        normals = null;
        texCoords = null;
        objVertices = null;
        objTriangles = null;
        objQuads = null;
    }

    static char[] faceParamaterSplitter = new char[] { '/' };

    static int ParseFaceParameter(string faceParameter)
    {
        Vector3 vertex = new Vector3();
        Vector2 texCoord = new Vector2();
        Vector3 normal = new Vector3();

        string[] parameters = faceParameter.Split(faceParamaterSplitter);

        int vertexIndex = Convert.ToInt32(parameters[0]);

        if (vertexIndex < 0) vertexIndex = vertices.Count + vertexIndex;
        else vertexIndex = vertexIndex - 1;

        //Hmm. This seems to be broken.
        try
        {
            vertex = vertices[vertexIndex];
        }
        catch (Exception)
        {
            throw new Exception("Vertex recognition failure at " + vertexIndex.ToString());
        }

        if (parameters.Length > 1)
        {
            int texCoordIndex = Convert.ToInt32(parameters[1]);

            if (texCoordIndex < 0) texCoordIndex = texCoords.Count + texCoordIndex;
            else texCoordIndex = texCoordIndex - 1;

            try
            {
                texCoord = texCoords[texCoordIndex];
            }
            catch (Exception)
            {
                Console.WriteLine("ERR: Vertex " + vertexIndex + " not found. ");
                throw new DllNotFoundException(vertexIndex.ToString());
            }
        }

        if (parameters.Length > 2)
        {
            int normalIndex = Convert.ToInt32(parameters[2]);

            if (normalIndex < 0) normalIndex = normals.Count + normalIndex;
            else normalIndex = normalIndex - 1;

            normal = normals[normalIndex];
        }

        return FindOrAddObjVertex(ref vertex, ref texCoord, ref normal);
    }

    static int FindOrAddObjVertex(ref Vector3 vertex, ref Vector2 texCoord, ref Vector3 normal)
    {
        ObjMesh.ObjVertex newObjVertex = new ObjMesh.ObjVertex();
        newObjVertex.Vertex = vertex;
        newObjVertex.TexCoord = texCoord;
        newObjVertex.Normal = normal;

        int index;

        if (objVerticesIndexDictionary.TryGetValue(newObjVertex, out index))
        {
            return index;
        }
        else
        {
            objVertices.Add(newObjVertex);
            objVerticesIndexDictionary[newObjVertex] = objVertices.Count - 1;
            return objVertices.Count - 1;
        }
    }
}
Adam Lear
  • 38,111
  • 12
  • 81
  • 101
bbosak
  • 5,353
  • 7
  • 42
  • 60
  • 5
    why do you use a text file in the first place? Consider a binary file, then you can read the float's directly. – BrokenGlass Apr 23 '11 at 22:35
  • Yes. Neither float.Parse nor Single.Parse work fast enough. – bbosak Apr 23 '11 at 22:35
  • Because Blender exports to text files (obj format). I did make an obj-to-binary converter, but I was wondering if there was a better way to do it. – bbosak Apr 23 '11 at 22:35
  • 1
    @Heandel: `float.Parse` is the same as `Single.Parse`, which is what `Convert.ToSingle` calls when given a string. – David Brown Apr 23 '11 at 22:36
  • 3
    You won't get any better results from other string parsing methods. Go for the binary format. – Ekin Koc Apr 23 '11 at 22:38
  • I know I can get better performance, because I used a similar text-based OBJ parser for ActionScript, which loaded it almost instantly. – bbosak Apr 23 '11 at 22:42
  • do you have to have access to **all** floats right away? Otherwise you can read as you need them – BrokenGlass Apr 23 '11 at 22:44
  • Yes, I need immediate access to all floats, so I can upload them to the GPU and perform collision checking. I have seen ActionScript programs that do this, but C#s string-to-float parser is much slower. – bbosak Apr 23 '11 at 22:47
  • @Ekin Koc: yes, you will get better results from custom parsers, which don't have flexibility for a million different factors (exponent VS. decimal, local-dependent formatting, etc.). – André Caron Apr 23 '11 at 23:05
  • 2
    @IDWMaster: Just FYI, getting 30 times faster than my version is *physically impossible* (without multithreading). I took out *all* the parsing data and just said `return 0;` in my code, and it *still* didn't beat itself by a factor of 30. I think the problem is with how you're **reading** the file, not with the parsing; if you're using something like `StreamReader.ReadLine` (or anything else that allocates a string or an array), that will decrease the performance considerably. Would you mind posting sample code so we see what you're doing? – user541686 Apr 23 '11 at 23:44
  • I, too, doubt that the problem is with your `parseFloat` method. You could test that by just having it return 0. I suspect the problem isn't with the reading or the parsing, but rather with having to re-size your collections as they grow. I think @Paja is on the right track here. – Jim Mischel Apr 24 '11 at 00:02

5 Answers5

5

Based on your description and the code you've posted, I'm going to bet that your problem isn't with the reading, the parsing, or the way you're adding things to your collections. The most likely problem is that your ObjMesh.Objvertex structure doesn't override GetHashCode. (I'm assuming that you're using code similar to http://www.opentk.com/files/ObjMesh.cs.

If you're not overriding GetHashCode, then your objVerticesIndexDictionary is going to perform very much like a linear list. That would account for the performance problem that you're experiencing.

I suggest that you look into providing a good GetHashCode method for your ObjMesh.Objvertex class.

See Why is ValueType.GetHashCode() implemented like it is? for information about the default GetHashCode implementation for value types and why it's not suitable for use in a hash table or dictionary.

Community
  • 1
  • 1
Jim Mischel
  • 131,090
  • 20
  • 188
  • 351
3

Edit 3: The problem is NOT with the parsing.

It's with how you read the file. If you read it properly, it would be faster; however, it seems like your reading is unusually slow. My original suspicion was that it was because of excess allocations, but it seems like there might be other problems with your code too, since that doesn't explain the entire slowdown.

Nevertheless, here's a piece of code I made that completely avoids all object allocations:

static void Main(string[] args)
{
    long counter = 0;
    var sw = Stopwatch.StartNew();
    var sb = new StringBuilder();
    var text = File.ReadAllText("spacestation.obj");
    for (int i = 0; i < text.Length; i++)
    {
        int start = i;
        while (i < text.Length &&
            (char.IsDigit(text[i]) || text[i] == '-' || text[i] == '.'))
        { i++; }
        if (i > start)
        {
            sb.Append(text, start, i - start); //Copy data to the buffer

            float value = Parse(sb); //Parse the data

            sb.Remove(0, sb.Length); //Clear the buffer
            counter++;
        }
    }
    sw.Stop();
    Console.WriteLine("{0:N0}", sw.Elapsed.TotalSeconds); //Only a few ms
}

with this parser:

const int MIN_POW_10 = -16, int MAX_POW_10 = 16,
    NUM_POWS_10 = MAX_POW_10 - MIN_POW_10 + 1;
static readonly float[] pow10 = GenerateLookupTable();
static float[] GenerateLookupTable()
{
    var result = new float[(-MIN_POW_10 + MAX_POW_10) * 10];
    for (int i = 0; i < result.Length; i++)
        result[i] = (float)((i / NUM_POWS_10) *
                Math.Pow(10, i % NUM_POWS_10 + MIN_POW_10));
    return result;
}
static float Parse(StringBuilder str)
{
    float result = 0;
    bool negate = false;
    int len = str.Length;
    int decimalIndex = str.Length;
    for (int i = len - 1; i >= 0; i--)
        if (str[i] == '.')
        { decimalIndex = i; break; }
    int offset = -MIN_POW_10 + decimalIndex;
    for (int i = 0; i < decimalIndex; i++)
        if (i != decimalIndex && str[i] != '-')
            result += pow10[(str[i] - '0') * NUM_POWS_10 + offset - i - 1];
        else if (str[i] == '-')
            negate = true;
    for (int i = decimalIndex + 1; i < len; i++)
        if (i != decimalIndex)
            result += pow10[(str[i] - '0') * NUM_POWS_10 + offset - i];
    if (negate)
        result = -result;
    return result;
}

it happens in a small fraction of a second.

Of course, this parser is poorly tested and has these current restrictions (and more):

  • Don't try parsing more digits (decimal and whole) than provided for in the array.

  • No error handling whatsoever.

  • Only parses decimals, not exponents! i.e. it can parse 1234.56 but not 1.23456E3.

  • Doesn't care about globalization/localization. Your file is only in a single format, so there's no point caring about that kind of stuff because you're probably using English to store it anyway.

It seems like you won't necessarily need this much overkill, but take a look at your code and try to figure out the bottleneck. It seems to be neither the reading nor the parsing.

Community
  • 1
  • 1
user541686
  • 205,094
  • 128
  • 528
  • 886
  • Good job on the parser, but it's still not quite fast enough for my needs. – bbosak Apr 23 '11 at 23:15
  • @IDWMaster: Just curious, how many times faster do you need it to be? – user541686 Apr 23 '11 at 23:17
  • About 30 times faster. It loads pretty slow at the moment (a minute to load a simple OBJ file such as this one http://dl.dropbox.com/u/1854457/spacestation.obj). It's a real-time first person shooter game I'm working on, so it needs to load fast. – bbosak Apr 23 '11 at 23:20
  • @IDWMaster: About how many floats are there in your file? (It's a bit hard to calculate when I'm looking at it right now.) – user541686 Apr 23 '11 at 23:22
  • @IDWMaster: By the way, I think the bottleneck is *reading* the string, not parsing it. You're not creating a new string every single time, are you? If you're using `Substring` then that's a very big mistake. – user541686 Apr 23 '11 at 23:25
  • I posted my code for actually reading the file. The full Obj importer code. See the pastebin link in my answer. – bbosak Apr 24 '11 at 00:03
  • @IDWMaster: See? You use `ReadLine`, which is ridiculously slow, because it creates a new string every time. See my edit. – user541686 Apr 24 '11 at 00:06
  • How should I parse it, without using ReadLine OR substring. What is the fastest way to load an Obj file without these methods? – bbosak Apr 24 '11 at 00:08
  • @IDWMaster: I did it without either; see my edit. You should use a single `StringBuilder` throughout, and never create a new string in a loop. – user541686 Apr 24 '11 at 00:10
  • The problem is not with reading the file. I created a file that has 1 million lines, each with 10 doubles. The resulting file is 170 MB in size. On my machine (2.0 GHz Core 2 processor), it reads and parses the file in 8.2 seconds. It parses 1.2 million doubles per second. The problem is not in the parsing OR the reading. My test uses `double.Parse`. – Jim Mischel Apr 24 '11 at 00:30
  • @Jim: I guess it could be something else, but that was my hunch. I guess you could try figuring out the real bottleneck, but I'm pretty sure using `string.Split` or `StreamReader.ReadLine` *is* a bottleneck compared to the parsing if you don't have other inefficient code (which I guess may not be the case here, but I never ran his code to check). Did I really deserve a downvote though? :( – user541686 Apr 24 '11 at 00:40
  • @Mehrdad: I suppose the downvote was overkill. Oddly, SO is telling me that I can't remove it unless you edit the answer. Weird. In any event, there's no doubt that using readline and split is less than optimum, but it's a very small percentage of the runtime. Optimizing the read and parse isn't going to solve his problem. – Jim Mischel Apr 24 '11 at 01:00
  • @Jim: Haha I'll edit out that part about the read and parse then :) – user541686 Apr 24 '11 at 01:02
  • @Mehrdad: Likely the problem is the lack of a `GetHashCode` method for the vertex type, which causes his dictionary lookups to be linear searches. – Jim Mischel Apr 24 '11 at 05:02
2

Have you measured that the speed problem is really caused by Convert.ToSingle?

In the code you included, I see you create lists and dictionaries like this:

normals = new List<Vector3>();
texCoords = new List<Vector2>();
objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>();

And then when you read the file, you add in the collection one item at a time. One of the possible optimizations would be to save total number of normals, texCoords, indexes and everything at the start of the file, and then initialize these collections by these numbers. This will pre-allocate the buffers used by collections, so adding items to the them will be pretty fast.

So the collection creation should look like this:

// These values should be stored at the beginning of the file
int totalNormals = Convert.ToInt32(textReader.ReadLine());
int totalTexCoords = Convert.ToInt32(textReader.ReadLine());
int totalIndexes = Convert.ToInt32(textReader.ReadLine());

normals = new List<Vector3>(totalNormals);
texCoords = new List<Vector2>(totalTexCoords);
objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>(totalIndexes);

See List<T> Constructor (Int32) and Dictionary<TKey, TValue> Constructor (Int32).

Paya
  • 5,124
  • 4
  • 45
  • 71
  • `ReadLine` shouldn't even be used in the first place, I think that's the real issue. – user541686 Apr 23 '11 at 23:31
  • Replaced ReadLine with substring – bbosak Apr 24 '11 at 00:04
  • This would work, assuming I KNEW the total number at the start of the file. I don't know how many Vertices, texcoords, normals, quads, etc. at the start of the file, as per the Obj specification. – bbosak Apr 24 '11 at 00:06
  • @Mehrdad: although reading from a text file isn't going to be as fast as reading from a binary file, there's no way that it would account for taking over a minute to read a 600 KB file. I load gigabyte-sized XML files in less time than that, and parsing XML is way more involved than parsing floats. – Jim Mischel Apr 24 '11 at 00:12
  • Well, do you know roughly the number of items? Is it completely random, or usually like 100 000? You can initialize the collection to some estimated capacity. Or implement your own collection, and make it work similarly to `StringBuffer`. – Paya Apr 24 '11 at 00:12
  • @Jim, @Paja: See my edited version. Notice how I never create new objects inside any loops, and how tremendously the speed increases (i.e. a few milliseconds total). The issue isn't with whether this is a text file or a binary file, the issue is that *it creates too many new objects*, that's all. – user541686 Apr 24 '11 at 00:13
  • 1
    But I really recommend to measure just about everything what's going on in your parser and see where is the performance sink. Try to use a profiler. – Paya Apr 24 '11 at 00:14
  • @Paja: Using a profiler is very overkill. Just run the program, pause it a couple of times, and see where it pauses. That's the bottleneck. Of course, in this case, there's no need to profile -- just avoid creating a new string for every single float, and it should speed up dramatically. – user541686 Apr 24 '11 at 00:20
  • 1
    @Mehrdad: Actually using a profiler is less work than pausing the program several times (at least for me), and is much more reliable. – Paya Apr 24 '11 at 00:26
  • @Mehrdad: I disagree. Using `Readline` and `String.Split`, I can read and parse a file of 1 million lines (10 doubles per line) in 8.2 seconds. It's certain that the problem isn't with reading the file or parsing the numbers. The problem is somewhere else entirely. – Jim Mischel Apr 24 '11 at 00:33
  • @Jim: Maybe it's something else, but personally I'd avoid string.Split like the plague because it allocates so many objects. He's posted his code I guess; I'm not going to try to figure out the bottleneck but that was my guess. Could be somewhere else though I guess. – user541686 Apr 24 '11 at 00:37
0

This related question is for C++, but is definitely worth a read.

For reading as fast as possible, you're probably going to want to map the file into memory and then parse using some custom floating point parser, especially if you know the numbers are always in a specific format (i.e. you're the one generating the input files in the first place).

Community
  • 1
  • 1
André Caron
  • 44,541
  • 12
  • 67
  • 125
0

I tested .Net string parsing once and the fastest function to parse text was the old VB Val() function. You could pull the relevant parts out of Microsoft.VisualBasic.Conversion Val(string)

Converting String to numbers

Comparison of relative test times (ms / 100000 conversions)
Double  Single  Integer    Int(w/ decimal point)
14      13      6          16                 Val(Str)
14      14      6          16                 Cxx(Val(Str)) e.g., CSng(Val(str))
22      21      17          e!                Convert.To(str)
23      21      16          e!                XX.Parse(str) e.g. Single.Parse()
30      31      31         32                 Cxx(str)

Val: fastest, part of VisualBasic dll, skips non-numeric,
ConvertTo and Parse: slower, part of core, exception on bad format (including decimal point)
Cxx: slowest (for strings), part of core, consistent times across formats
Jon B
  • 352
  • 2
  • 9