1

I'm new to C# and can't seem to figure this one out.

In my application I have a code preprocessing module that allows me to manipulate a file (in this case a csv file) before loading it into my application for processing it into my database. One of the files that comes to my application has a column named "Business ID" that I need removed before I move forward in the rest of my preprocessing. There are various files this same code is used to read in and the only difference between them is that this one CSV file has that extra "Business ID" field. It will only need to be removed if the column is present.

Before someone recommends it, the column cannot be removed outside of the program or during the csv generation - it has to be done in this preprocessing.

My code is below - I redacted information in some of the case statements for confidentiality but the remainder of the code is in tact. I appreciate any help y'all can give!

EDIT: Per request here's as much of a snippet as I can provide of the .csv file. This is just the headers. I can't provide an actual record. Hope this is enough for what you wanted to see.

"Transmission Date","Transmission Time","Transaction Code","Client Site","Transaction Date","Site ID","Company","Division","Franchise","Account","Statement Code","House Number","Customer Number","Line Of Business","Download Write Off $","Service $ Write Off","Equipmnt $ Write Off","# of Pieces Rented","Scheme ID","Agency Code","Bill Type Code","Customer Type","Customer Category","VIP Code","First Name","Middle Initial","Last Name","Disconnect Reason","Bill To Name","Billing Addr Line 1","Billing Addr Line 2","Billing Addr Line 3","Billing City","Billing State","Billing Zip Code","Dwnld Cust Stmt Sts","Customer Status","Stop Bill Date","Disconnect Date","Date Last Payment","Last Payment Amt","Subscriber Name","Service Addr Line 1","Service Addr Line 2","Service Addr Line 3","Service City","Service State","Service Zip Code","Dwnld Bal Last Stmt","SS#","DOB","Home Phone","Work Phone","Other Phone","Email","Driver License","Dwnld Disconnect Rsn","Dwnld Disconnct Date","Deposit Date","Total Paid Deposit","Connect Date","Ext Credit Score","Behavioral Score","Management Area","Customer Comment","Cust Business Name","Privacy Code","Business ID"
using System;
using System.IO;
using System.Linq;
using System.Data;
using System.Collections.Generic;
using GSS.Common.Code;
using GSSI.Latitude.Library.DataAccess;

public class CodeProcessor : ICodePreprocessor
{
    public string Execute(string fileContents)
    {
        string resultFile = @"C:\Temp\" + Path.GetFileName(fileContents);
        string fileName = Path.GetFileName(fileContents);
        
        string[] nameParts = fileName.Split('_');
        
        string market = nameParts[0].ToString();
        
        string resiCommFlag = "";
        if (fileName.Contains("Residential")){
            resiCommFlag = "R";
        }
        if (fileName.Contains("Commercial")) {
                resiCommFlag = "C";
            }
        
        using (StreamWriter wr = new StreamWriter(resultFile))
        {
            using (StreamReader sr = new StreamReader(fileContents))
            {
                while (!sr.EndOfStream) 
                {
                    string line = sr.ReadLine();
                    string test1 = line.TrimStart('"');
                    
                    string[] lineArray = line.Split(',');
                    string number = "";
                    string desk = "";
                    string qlevel = "";
                    string current1 = "";
                    string current3 = "";
                    string trxCode = lineArray[2].Replace("\"","");
                    
                        string site = lineArray[5].Replace("\"","");
                        string client = "";
                        
                        switch (market) {
                                case "": 
                                    switch (site) {
                                        case "3":
                                            client = "";
                                            break;
                                        case "5":
                                            client = "";
                                            break;
                                        default:
                                            client = "";
                                            break;
                                    }
                                    break;
                                
                                case "":
                                    switch (site) {
                                        case "1":
                                            switch (resiCommFlag) {
                                                case "R":
                                                    client = "";
                                                    break;
                                                case "C":
                                                    client = "";
                                                    break;
                                            }
                                            break;
                                        case "2":
                                            switch (resiCommFlag) {
                                                case "R":
                                                    client = "";
                                                    break;
                                                case "C":
                                                    client = "";
                                                    break;
                                            }
                                            break;
                                        case "3":
                                            switch (resiCommFlag) {
                                                case "R":
                                                    client = "";
                                                    break;
                                                case "C":
                                                    client = "";
                                                    break;
                                            }
                                            break;
                                        case "4":
                                            switch (resiCommFlag) {
                                                case "R":
                                                    client = "";
                                                    break;
                                                case "C":
                                                    client = "";
                                                    break;
                                            }
                                            break;
                                    }
                                    break;
                                case "":
                                    switch (site) {
                                        case "":
                                            switch (resiCommFlag) {
                                                case "R":
                                                    client = "";
                                                    break;
                                                case "C":
                                                    client = "";
                                                    break;
                                            }
                                            break;
                                    }
                                    break;
                                case "":
                                    switch (site) {
                                        case "1":
                                            switch (resiCommFlag) {
                                                case "R":
                                                    client = "";
                                                    break;
                                                case "C":
                                                    client = "";
                                                    break;
                                            }
                                            break;
                                    }
                                    break;
                                case "":
                                    client = "";
                                    break;
                                
                                default:
                                    client = "";
                                    break;
                        }
                        
                        bool acctFound = false;
                        string account = lineArray[9].Replace("\"","");
                        string sql = @"select number, desk, qlevel,current1,current3 from master where customer = '" + client +
                            "' and account = '" + account + "' order by qlevel,received desc";
                        DataSet ds = null;
                        try
                        {
                            ds = DataManager.GetDataSet(sql, "master");
                            if (ds.Tables.Count > 0 && ds.Tables[0].Rows.Count > 0) {
                                number = ds.Tables[0].Rows[0][0].ToString();
                                desk = ds.Tables[0].Rows[0][1].ToString();
                                qlevel = ds.Tables[0].Rows[0][2].ToString();
                                current1 = ds.Tables[0].Rows[0][3].ToString();
                                current3 = ds.Tables[0].Rows[0][4].ToString();
                                acctFound = true;
                            } 
                        }
                        catch (Exception ex) { throw ex; }
                        finally {
                            if (ds != null) {
                                ds.Dispose();
                                ds = null;
                            }
                        }
                        bool chgTrx = false;
                        switch (trxCode) {
                            case "":
                                switch (desk) {
                                    case "":
                                        chgTrx = true;
                                        break;
                                    case "":
                                        chgTrx = true;
                                        break;
                                    default:
                                        if (qlevel != "" && qlevel != "") {
                                            chgTrx = false;
                                        } else {
                                            chgTrx = true;
                                        }
                                        break;
                                }
                                if (acctFound == false) {
                                    chgTrx = true;
                                }
                                break;
                            case "":
                                chgTrx = false;
                                if (acctFound == false) {
                                    chgTrx = true;
                                }
                                break;
                            case "":
                                chgTrx = false;
                                if (acctFound == false) {
                                    chgTrx = true;
                                }
                                break;
                            default:
                                chgTrx = true;
                                break;
                        }
                        
                        
                        if (chgTrx == true) {
                            
                            trxCode = ""; 
                            lineArray[2] = "\""+trxCode+"\"";
                        }

                    wr.WriteLine(string.Join(",", lineArray)+","+number+","+desk+","+qlevel+","+current1+","+current3 );
                }
            }
        }
        return resultFile;
    }
    
public void Dispose() { }
}
  • Can you put in your question a fragment of the csv file that you're trying to manipulate, please? – Ramon Dias Jul 24 '20 at 14:54
  • I can only provide the headers in the file. I've updated the post. – TWLaughingMan Jul 24 '20 at 15:37
  • And the problem is that `Business Id` is being written to the result on the `wr.WriteLine` line, and you can't have that? – Joshua Robinson Jul 24 '20 at 15:46
  • Correct. Business ID cannot be one of the fields that's being outputted at the end of this. To add a little more perspective: after preprocessing this file is read into my DB with each field mapped to a certain place. The mapping is expecting each field to be in a certain location in the file output. Business ID pushes all the fields after it (which are being added in this query and do not already exist in the file) down one. This ends up messing up my mapping. – TWLaughingMan Jul 24 '20 at 15:54

3 Answers3

2

I found a bit confusing to understand all of your code, so I will stick to the main problem that you described and how I would solve it. I would play it simple, doing the follow:

  • Read the lines of the file
string[] rows = File.ReadAllLines(fileName);
  • This will result in a string[] with each position of the array having a row of the csv. Now you navigate in the rows getting each column.
//starting r with 1 ignores the header line
for(int r =1; r<rows.Count; r++) 
{
  string[] columns = rows[i].Split(',');
           
  //columns.Count -1 will ignore the last column (BusinessId)
  for(int c= 0; c<columns.Count-1; c++) 
  {
             
    //process each column of each row and do your business
             
   }
}

If there's another column that should be ignored, all you need to do is know it's number, and put a comparison in this last for. For example, if you want to ignore the second row:

//rows.Count -1 will ignore the last column (BusinessId)
for(int c = 0; c<columns.Count-1; c++) 
{
  //Let the iteration proceed, ignoring the second column (position 1).
  if(c == 1) continue; 

  //process each column of each row and do your business
             
}
Ramon Dias
  • 835
  • 2
  • 12
  • 23
  • 1
    Appreciate the recommendation. Unfortunately, these files can be extremely large and I have to preserve processing time as much as possible. If I can't figure out another solution I'll try it out. – TWLaughingMan Jul 24 '20 at 18:16
  • 1
    @TWLaughingMan: The inner loop processes the columns. We can assume that the number of columns is a constant (+/-1). Therefore the inner loop is `O(1)` because it does not depend on the number of rows. The outer loop is `O(n)`. Overall: `O(1) * O(n) --> O(n)`. Note that the big-O notation does not measure the absolute amount of work. It tells us how the time or memory complexity evolves with increasing data volume. Therefore we must replace real number of columns by 1 in this notation. See: https://stackoverflow.com/a/697935/880990 – Olivier Jacot-Descombes Jul 27 '20 at 12:14
  • @Olivier Jacot-Descombes Thanks for the explanation. My apologizes, I messed up with the iteration variables. One `r` must go for the rows and another variable `c`, for the columns, therefore in this case, the inner loop depends on the outer. Isn't it? – Ramon Dias Jul 27 '20 at 15:27
  • All the columns processed by in the inner will be processed once for reach row looped by the outer loop. We have an effective work amount proportional to `number_of_rows * number_of_columns` but we still have `O(n)` with n = number_of_rows, because the number of columns is constant, i.e it does not depend on the size of your file. – Olivier Jacot-Descombes Jul 27 '20 at 15:51
  • Right. Removed the note. – Ramon Dias Jul 27 '20 at 17:34
1

Since "Business ID" is the last column in the CSV and you are appending other stuff when you write it back, you can replace the last column with the first column you would append otherwise:

if (lineArray.Length == 68) { // We have a Business ID column.
    lineArray[67] = number;  // Replace Business ID.
    wr.WriteLine(string.Join(",", lineArray)+","+desk+","+qlevel+","+current1+","+current3 );
} else {
    wr.WriteLine(string.Join(",", lineArray)+","+number+","+desk+","+qlevel+","+current1+","+current3);
}

It can be simplified a bit by using string interpolation (since C#6.0):

if (lineArray.Length == 68) { // We have a Business ID column
    lineArray[67] = number; // Replace Business ID
    wr.WriteLine($"{String.Join(",", lineArray)},{desk},{qlevel},{current1},{current3}");
} else {
    wr.WriteLine($"{String.Join(",", lineArray)},{number},{desk},{qlevel},{current1},{current3}");
}
Olivier Jacot-Descombes
  • 104,806
  • 13
  • 138
  • 188
  • This works if there's no data in the Business ID field on a record. It renamed the column to number and appended in the number (and other new fields) correctly to the end of the lineArray. However, if there is data in the Business ID field for that column on a record then it doesn't work. It renames the column to number still but it puts the Business ID data into the newly named Number column and then cuts off {current3}. – TWLaughingMan Jul 24 '20 at 18:15
  • My solution assumes that there are 67 columns in total if there is no Business ID and 68 columns if there is a Business ID column and that this is the last field. It does not matter whether this field is empty or not. A CSV file has the same number of columns (i.e. the same number of separating commas) in every row. It makes no difference whether the fields are empty or not. Debug your code and see where the things go not as expected. – Olivier Jacot-Descombes Jul 25 '20 at 10:09
  • 1
    This did end up working after all. My apologies but there was an oddity in the CSV file being generated. It had an extra column with no column header that I didn't realize. When I accounted for that, your solution worked beautifully. I really appreciate the help! – TWLaughingMan Jul 28 '20 at 13:23
0

If the "Business Id" column always appears as the last column, and the columns are always identical otherwise, you could just skip it when outputting the result.

Something like...

public string Execute(string fileContents)
{
   int columnsToTake = // Number of columns you want... this would be the number of columns in the file without the Business Id
   
   using (StreamWriter wr = new StreamWriter(resultFile))
   {
      using (StreamReader sr = new StreamReader(fileContents))
      {
         // Excluded most of your code...
         wr.WriteLine(wr.WriteLine(string.Join(",", lineArray.Take(columnsToTake))+","+number+","+desk+","+qlevel+","+current1+","+current3 );
      }
   }
}

So, if your source file contains 22 columns without "Business ID" you would set columnsToTake to 22. Then if the source file does not contain the "Business ID" column, you take all 22 columns. If the source file does contain the "Business ID" then you only take the first 22 columns, leaving the last column out of results.

Again, this will only work if the columns are exactly the same except for "Business ID", and "Business ID" appears as the last column.

If you're using a version of .NET which doesn't have the string.Join(string, IEnumerable<string>) overload, then you can use .ToArray to get the IEnumerable back to an array.

public string Execute(string fileContents)
{
   int columnsToTake = // Number of columns you want... this would be the number of columns in the file without the Business Id
   
   using (StreamWriter wr = new StreamWriter(resultFile))
   {
      using (StreamReader sr = new StreamReader(fileContents))
      {
         // Excluded most of your code...
         wr.WriteLine(wr.WriteLine(string.Join(",", lineArray.Take(columnsToTake).ToArray())+","+number+","+desk+","+qlevel+","+current1+","+current3 );
      }
   }
}
Joshua Robinson
  • 3,399
  • 7
  • 22
  • This ended up generating an error. Line 228: The best overloaded method match for 'string.Join(string,string[])' has some invalid arguments Line 228: Argument '2': cannot convert from 'System.Collections.Generic.IEnumerable' to 'string[]' – TWLaughingMan Jul 24 '20 at 18:17
  • No kidding? Well, you might be able to do `string.Join(",", lineArray.Take(columnsToTake).ToArray())` in that case. – Joshua Robinson Jul 24 '20 at 18:25