0

I'm trying to read some files with ReadLine, but my file have some break lines that I need to catch (not all of them), and I don't know how to get them in the same array, neither in any other array with these separators... because... ReadLine reads lines, and break these lines, huh?

I can't replace these because I need to check it after the process, so I need to get the breaklines AND the content after that. That's the problem. How can I do that?

Here's my code:

public class ReadFile
{
    string extension;
    string filename;
    System.IO.StreamReader sr;

    public ReadFile(string arquivo, System.IO.StreamReader sr)
    {
        string ext = Path.GetExtension(arquivo);

        sr = new StreamReader(arquivo, System.Text.Encoding.Default);

        this.sr = sr;
        this.extension = ext;
        this.filename = Path.GetFileNameWithoutExtension(arquivo);

        if (ext.Equals(".EXP", StringComparison.OrdinalIgnoreCase))
        {
            ReadEXP(arquivo);
        }
        else MessageBox.Show("Extensão de arquivo não suportada: "+ext);

    }

    public void ReadEXP(string arquivo)
    {

        string line = sr.ReadLine();

        string[] words;
        string[] Separators = new string[] { "<Segment>", "</Segment>", "<Source>", "</Source>", "<Target>", "</Target>" };

        string ID = null;
        string Source = null;
        string Target = null;

        DataBase db = new DataBase();
        //db.CreateTable_EXP(filename);
        db.CreateTable_EXP();

        while ((line = sr.ReadLine()) != null)
        {

            try
            {

                if (line.Contains("<Segment>"))
                {
                    ID = "";
                    words = line.Split(Separators, StringSplitOptions.None);
                    ID = words[0];
                    for (int i = 1; i < words.Length; i++ )
                        ID += words[i];
                    MessageBox.Show("Segment[" + words.Length + "]: " + ID);
                }
                if (line.Contains("<Source>"))
                {
                    Source = "";
                    words = line.Split(Separators, StringSplitOptions.None);
                    Source = words[0];
                    for (int i = 1; i < words.Length; i++)
                        Source += words[i];
                    MessageBox.Show("Source[" + words.Length + "]: " + Source);
                }
                if (line.Contains("<Target>"))
                {
                    Target = "";
                    words = line.Split(Separators, StringSplitOptions.None);
                    Target = words[0];
                    for (int i = 1; i < words.Length; i++)
                        Target += words[i];
                    MessageBox.Show("Target[" + words.Length + "]: " + Target);

                    db.PopulateTable_EXP(ID, Source, Target);

                    MessageBox.Show("ID: " + ID + "\nSource: " + Source + "\nTarget: " + Target);
                }

            }
            catch (IndexOutOfRangeException e)
            {
                MessageBox.Show(e.Message.ToString());
                MessageBox.Show("ID: " + ID + "\nSource: " + Source + "\nTarget: " + Target);
            }

        }

        return;
    }
John Saunders
  • 160,644
  • 26
  • 247
  • 397
Aly
  • 55
  • 2
  • 9
  • 12
    If you read XML files, then use a specialized XML reader (e.g. with the `XDocument`). – Nico Schertler Jan 30 '14 at 19:21
  • 1
    Nico's comment times a thousand. Don't try to roll your own XML parsing. – driis Jan 30 '14 at 19:23
  • The hard way: use ReadLine, but put the string into a buffer. Write code to examine the buffer to decide if you have all of the content needed to process the current tag. If not, keep doing ReadLine and adding to the buffer until you find the end tag. – mbeckish Jan 30 '14 at 19:24
  • Please show sample of you input and expected places where you want to read more than one line at a time. If it is XML - close the question and use proper API as @NicoSchertler suggested. – Alexei Levenkov Jan 30 '14 at 19:29
  • Thanks guys, I didn't realize about XML readers... – Aly Jan 31 '14 at 12:21

1 Answers1

1

If you are trying to read XML, try using the built in libaries, here is a simple example of loading a section of XML with <TopLevelTag> in it.

var xmlData = XDocument.Load(@"C:\folder\file.xml").Element("TopLevelTag");
if (xmlData == null) throw new Exception("Failed To Load XML");

Here is a tidy way to get content without it throwing an exception if missing from the XML.

var xmlBit = (string)xmlData.Element("SomeSubTag") ?? "";

If you really have to roll your own, then look at examples for CSV parsers, where ReadBlock can be used to get the raw data including line breaks.

private char[] chunkBuffer = new char[4096];
var fileStream = new System.IO.StreamReader(new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite));
var chunkLength = fileStream.ReadBlock(chunkBuffer, 0, chunkBuffer.Length);
WhoIsRich
  • 4,053
  • 1
  • 33
  • 19
  • Thanks, I guess I'll try ReadBlock, because of this: http://stackoverflow.com/questions/21488555/special-characters-with-xdocument – Aly Feb 03 '14 at 19:01