-2

I have a file with 1 million lines. My program is made to check line by line if it contains one of the words that the user requests to be removed. If the line contains the word, it has to be removed and added into the list. Everytime I press Start, the program freezes and nothing shows up in the listbox. However if I add like 1k it will clean the file and display the new file in list. What's the best way to handle this?

My code :

    using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.IO;

namespace BoringAssComboTool
{
    public partial class Form2 : Form
    {
        List<String> list;
        string myFile = null;
        string[] line = null;
        int linecount = 0;
        public Form2()
        {
            InitializeComponent();
        }

        private void groupBox1_Enter(object sender, EventArgs e)
        {

        }

        private void label1_Click(object sender, EventArgs e)
        {

        }

        private void textBox1_TextChanged(object sender, EventArgs e)
        {

        }
        public void button1_Click(object sender, EventArgs e)
        {

            OpenFileDialog openFileDialog = new OpenFileDialog();
            openFileDialog.RestoreDirectory = true;
            openFileDialog.Multiselect = false;
            openFileDialog.Filter = "Combo List (*.txt)|*.txt";
            openFileDialog.FilterIndex = 1;
            if (openFileDialog.ShowDialog() == DialogResult.OK)
            {
                myFile = openFileDialog.FileName;
                MessageBox.Show("You selected " + openFileDialog.FileName);



                linecount = File.ReadAllLines(myFile).Length;
                label3.Text = "Total loaded : " + linecount;
                line = File.ReadAllLines(myFile);
                //MessageBox.Show(line[0]);
                //MessageBox.Show(line[4]);
                list = File.ReadAllLines(myFile).ToList();
              //  StreamReader sr = new StreamReader(myFile);
            }

        }

        private void button3_Click(object sender, EventArgs e)
        {
            int removedlines = 0;


            string domainSplitted = textBox1.Text;
            string[] splitDomain = domainSplitted.Split(',');
            //MessageBox.Show(splitDomain[2]);
            //MessageBox.Show(splitDomain[3]);
            //MessageBox.Show(line[2]);
            int sizeOfArray = splitDomain.Length;
            // MessageBox.Show("Length is " + sizeOfArray);
            for (int x = 0; x < sizeOfArray - 1; x++)
            {
              //  for (int i = 0; i < linecount - 1; i++) 
              for ( int i = linecount - 1; i>=0; i--)
                {
                    if (line[i].Contains(splitDomain[x]))
                    {

                        list.RemoveAt(i);
                        string[] lines = list.ToArray();
                        removedlines++;
                        label4.Text = "Total Removed = " + removedlines;
                        listBox1.DataSource = list;
                        // MessageBox.Show("there is a match on " + line[i]);
                    }
                }
            }
          // listBox1.DataSource = list;
        }

        private void pictureBox3_Click(object sender, EventArgs e)
        {
            this.WindowState = FormWindowState.Minimized;
        }

        private void pictureBox2_Click(object sender, EventArgs e)
        {
            Application.Exit();
        }



        private void label3_Click(object sender, EventArgs e)
        {

        }

        private void button2_Click(object sender, EventArgs e)
        {
            // System.IO.File.WriteAllLines("Cleaned Combo File.txt", list);
            SaveFileDialog save = new SaveFileDialog();

            save.FileName = "Cleaned Combo File.txt";

            save.Filter = "Text File | *.txt";

            if (save.ShowDialog() == DialogResult.OK)

            {

                StreamWriter writer = new StreamWriter(save.OpenFile());

                for (int i = 0; i < listBox1.Items.Count; i++)

                {

                    writer.WriteLine(listBox1.Items[i].ToString());

                }

                writer.Dispose();

                writer.Close();

                MessageBox.Show("Saved succesfully");

            }
        }
        // MessageBox.Show("==" + line[27]);

        protected override void WndProc(ref Message m)
        {
            base.WndProc(ref m);
            if (m.Msg == WM_NCHITTEST)
                m.Result = (IntPtr)(HT_CAPTION);
        }

        private const int WM_NCHITTEST = 0x84;
        private const int HT_CLIENT = 0x1;
        private const int HT_CAPTION = 0x2;

        private void Form2_Load(object sender, EventArgs e)
        {

        }
    }
    }
Denis
  • 31
  • 1
  • 7
  • 1
    You're calling `ReadAllLines` twice...so that's potentially two million lines in memory for a period of time. Why not use [ReadLines](https://msdn.microsoft.com/en-us/library/dd383503(v=vs.110).aspx) instead? – Kenneth K. Feb 28 '18 at 23:08
  • https://stackoverflow.com/questions/2161895/reading-large-text-files-with-streams-in-c-sharp#9643111 The answer in this question might help. – null Feb 28 '18 at 23:08
  • Unless you utilize asynchronous patterns (async/await recommended), your UI will be non-responsive until the operation is completed. – myermian Feb 28 '18 at 23:08
  • when you running a loop with lots of data your UI thread will hang, you have to use` async` `await` in order to make your UI work when your files are loading – Sabir Hossain Feb 28 '18 at 23:09
  • @KennethK. It still crashes for some reason – Denis Feb 28 '18 at 23:14
  • found word should be removed from the file? – M.kazem Akhgary Feb 28 '18 at 23:14

1 Answers1

3

You should not really read all the text into memory. Instead change your program to read line-by-line. You can then append each updated/fixed line into a temp file. After the processing is done, if necessary, overwrite the source file with the temp file.

Here is how you can read file line-by-line:

using (StreamReader sr = new StreamReader(path)) 
{
    while (sr.Peek() >= 0) 
    {
        Console.WriteLine(sr.ReadLine());
    }
}

Here is documentation for StreamReader/StreamWriter classes.

In addition to that, I'd suggest you to switch to async APIs. StreamReader has a ReadLineAsync API too, to satisfy that need. This will allow to avoid lengthy delays of the UI thread.

Artak
  • 2,819
  • 20
  • 31
  • This is cleanest and simplest to understand. OP can also use it to count total number of lines, since OP also did that. It also utilizes a using block to ensure his reader closes and disposes so he doesn't have potentially huge amounts of data sticking around in memory. OP, your ui will still hang until this method finishes, but it will get you desired results without consuming all your memory – user7396598 Feb 28 '18 at 23:14
  • Thanks I'll work with streamreader – Denis Feb 28 '18 at 23:21
  • I'm not sure C# will handle a task as large as this. Why not use R or Python, both of which rip through large files quickly and easily. R: inputFile <- "foo.txt" con <- file(inputFile, open = "r") while (length(oneLine <- readLines(con, n = 1)) > 0) { myLine <- unlist((strsplit(oneLine, ","))) print(myLine) } close(con) ... or ... Python: with open("log.txt") as infile: for line in infile: do_something_with(line) – ASH Mar 02 '18 at 13:19