0

I do not want to read the whole file at any point, I know there are answers on that question, I want t o read the First or Last line.

I know that my code locks the file that it's reading for two reasons 1) The application that writes to the file crashes intermittently when I run my little app with this code but it never crashes when I am not running this code! 2) There are a few articles that will tell you that File.ReadLines locks the file.

There are some similar questions but that answer seems to involve reading the whole file which is slow for large files and therefore not what I want to do. My requirement to only read the last line most of the time is also unique from what I have read about.

I nead to know how to read the first line (Header row) and the last line (latest row). I do not want to read all lines at any point in my code because this file can become huge and reading the entire file will become slow.

I know that

line = File.ReadLines(fullFilename).First().Replace("\"", "");

... is the same as ...

FileStream fs = new FileStream(@fullFilename, FileMode.Open, FileAccess.Read, FileShare.Read);

My question is, how can I repeatedly read the first and last lines of a file which may be being written to by another application without locking it in any way. I have no control over the application that is writting to the file. It is a data log which can be appended to at any time. The reason I am listening in this way is that this log can be appended to for days on end. I want to see the latest data in this log in my own c# programme without waiting for the log to finish being written to.

My code to call the reading / listening function ...

    //Start Listening to the "data log"
    private void btnDeconstructCSVFile_Click(object sender, EventArgs e)
    {
        MySandbox.CopyCSVDataFromLogFile copyCSVDataFromLogFile = new MySandbox.CopyCSVDataFromLogFile();
        copyCSVDataFromLogFile.checkForLogData();
    }

My class which does the listening. For now it simply adds the data to 2 generics lists ...

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using MySandbox.Classes;
using System.IO;

namespace MySandbox
{
    public class CopyCSVDataFromLogFile
    {
        static private List<LogRowData> listMSDataRows = new List<LogRowData>();
        static String fullFilename = string.Empty;
        static LogRowData previousLineLogRowList = new LogRowData();
        static LogRowData logRowList = new LogRowData();
        static LogRowData logHeaderRowList = new LogRowData();
        static Boolean checking = false;

        public void checkForLogData()
        {
            //Initialise
            string[] logHeaderArray = new string[] { };
            string[] badDataRowsArray = new string[] { };

            //Get the latest full filename (file with new data) 
            //Assumption: only 1 file is written to at a time in this directory.
            String directory = "C:\\TestDir\\";
            string pattern = "*.csv";
            var dirInfo = new DirectoryInfo(directory);
            var file = (from f in dirInfo.GetFiles(pattern) orderby f.LastWriteTime descending select f).First();
            fullFilename = directory + file.ToString(); //This is the full filepath and name of the latest file in the directory!

            if (logHeaderArray.Length == 0)
            {
                //Populate the Header Row
                logHeaderRowList = getRow(fullFilename, true);
            }

            LogRowData tempLogRowList = new LogRowData();

            if (!checking)
            {
                //Read the latest data in an asynchronous loop
                callDataProcess();
            }
        }

        private async void callDataProcess()
        {
            checking = true;                        //Begin checking
            await checkForNewDataAndSaveIfFound();
        }

        private static Task checkForNewDataAndSaveIfFound()
        {
            return Task.Run(() =>   //Call the async "Task"
            {
                while (checking)    //Loop (asynchronously)
                {
                    LogRowData tempLogRowList = new LogRowData();

                    if (logHeaderRowList.ValueList.Count == 0)
                    {
                        //Populate the Header row
                        logHeaderRowList = getRow(fullFilename, true);
                    }
                    else
                    {
                        //Populate Data row
                        tempLogRowList = getRow(fullFilename, false);

                        if ((!Enumerable.SequenceEqual(tempLogRowList.ValueList, previousLineLogRowList.ValueList)) &&
                            (!Enumerable.SequenceEqual(tempLogRowList.ValueList, logHeaderRowList.ValueList)))
                        {
                            logRowList = getRow(fullFilename, false);
                            listMSDataRows.Add(logRowList);
                            previousLineLogRowList = logRowList;
                        }
                    }

                    //System.Threading.Thread.Sleep(10);  //Wait for next row.
                }
            });
        }

        private static LogRowData getRow(string fullFilename, bool isHeader)
        {
            string line;
            string[] logDataArray = new string[] { };
            LogRowData logRowListResult = new LogRowData();

            try
            {
                if (isHeader)
                {
                    //Asign first (header) row data.
                    //Works but seems to block writting to the file!!!!!!!!!!!!!!!!!!!!!!!!!!!
                    line = File.ReadLines(fullFilename).First().Replace("\"", "");    
                }
                else
                {
                    //Assign data as last row (default behaviour).
                    line = File.ReadLines(fullFilename).Last().Replace("\"", "");
                }

                logDataArray = line.Split(',');

                //Copy Array to Generics List and remove last value if it's empty.
                for (int i = 0; i < logDataArray.Length; i++)
                {
                    if (i < logDataArray.Length)
                    {
                        if (i < logDataArray.Length - 1)
                        {
                            //Value is not at the end, from observation, these always have a value (even if it's zero) and so we'll store the value.
                            logRowListResult.ValueList.Add(logDataArray[i]);
                        }
                        else
                        {
                            //This is the last value
                            if (logDataArray[i].Replace("\"", "").Trim().Length > 0)
                            {
                                //In this case, the last value is not empty, store it as normal.
                                logRowListResult.ValueList.Add(logDataArray[i]);
                            }
                            else { /*The last value is empty, e.g. "123,456,"; the final comma denotes another field but this field is empty so we will ignore it now. */ }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                if (ex.Message == "Sequence contains no elements")
                { /*Empty file, no problem. The code will safely loop and then will pick up the header when it appears.*/ }
                else
                {
                    //TODO: catch this error properly
                    Int32 problemID = 10;   //Unknown ERROR.
                }
            }

            return logRowListResult;
        }
    }
}
Ewan
  • 541
  • 8
  • 23
  • There are also few articles how to open files in multiple processes using proper sharing mode... – Alexei Levenkov May 06 '15 at 16:00
  • possible duplicate of [File.ReadLines without locking it?](http://stackoverflow.com/questions/5338450/file-readlines-without-locking-it) – musefan May 06 '15 at 16:03
  • musefan: that solution is to read the whole file. I have a file which will be apended to for days or even weeks on end. I don;t want to wait 2 weeks and then read the file. I want to read the Last line only for the duration of the 2 week process. I will never read the whole file during my process as it it might be too large to read. – Ewan May 07 '15 at 07:54
  • Alexei: which ones? This might be my solution if they can read the last line only. – Ewan May 07 '15 at 07:55

1 Answers1

0

I found the answer in a combination of other questions. One answer explaining how to read from the end of a file, which I adapted so that it would read only 1 line from the end of the file. And another explaining how to read the entire file without locking it (I did not want to read the entire file but the not locking part was useful). So now you can read the last line of the file (if it contains end of line characters) without locking it. For other end of line delimeters, just replace my 10 and 13 with your end of line character bytes...

Add the method below to public class CopyCSVDataFromLogFile

private static string Reverse(string str)
{
    char[] arr = new char[str.Length];
    for (int i = 0; i < str.Length; i++)
        arr[i] = str[str.Length - 1 - i];
    return new string(arr);
}

and replace this line ...

line = File.ReadLines(fullFilename).Last().Replace("\"", "");

with this code block ...

Int32 endOfLineCharacterCount = 0;
Int32 previousCharByte = 0;
Int32 currentCharByte = 0;
//Read the file, from the end, for 1 line, allowing other programmes to access it for read and write!
using (FileStream reader = new FileStream(fullFilename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, 0x1000, FileOptions.SequentialScan))
{
    int i = 0;
    StringBuilder lineBuffer = new StringBuilder();
    int byteRead;
    while ((-i < reader.Length) /*Belt and braces: if there were no end of line characters, reading beyond the file would give a catastrophic error here (to be avoided thus).*/
        && (endOfLineCharacterCount < 2)/*Exit Condition*/)
    {
        reader.Seek(--i, SeekOrigin.End);
        byteRead = reader.ReadByte();

        currentCharByte = byteRead;

        //Exit condition: the first 2 characters we read (reading backwards remember) were end of line ().
        //So when we read the second end of line, we have read 1 whole line (the last line in the file)
        //and we must exit now.
        if (currentCharByte == 13 && previousCharByte == 10)
        {
            endOfLineCharacterCount++;
        }

        if (byteRead == 10 && lineBuffer.Length > 0)
        {
            line += Reverse(lineBuffer.ToString());
            lineBuffer.Remove(0, lineBuffer.Length);
        }
        lineBuffer.Append((char)byteRead);

        previousCharByte = byteRead;
    }

    reader.Close();
}
codersl
  • 2,222
  • 4
  • 30
  • 33
Ewan
  • 541
  • 8
  • 23