I have an excel sheet with some rows. Alongside, I have a db table that has the same definition as the excel sheet in terms of column structure. What I want to do is check if a record exists in the excel sheet but not in the db table then insert it into a new table (an existing empty table) called TableReport
for reporting purposes.
CSV File: PathToFile.csv
Original table to do comparison on: FruitTable
New table created for reporting: TableReport
The following code snippets are what you can use to test my scenario. The code below works as I am using a List<T>
when PathToFile.csv
is relatively quite small- As in like 4 rows. Takes under 30 seconds to complete execution of the program. However my real scenario involves PathToFile.csv
have about 200 000 rows and thus the list collection is not so efficient in terms of performance. Thus, I have considered using a Dictionary<TKey, TValue>
collection but I am stuck as to which parts within the code to tweak because with both collections, I will have to iterate the entire csv to get all the rows and add the to the checkIfFruitsMatch
list in this case. Even if I use a dictionary, I would still need to loop and add them before I even perform the comparision and already that is time consuming. Performance is a very critical requirement in this case.
I tried running the program with my current implementation of the list on a csv with 200 000 rows and it took well over 15 minutes, busy looping through the csv and adding the rows to the list and that didn't even finish before I terminated the program.
How can I achieve this to make the program much more faster. It shouldn't take longer than 10 minutes to execute. I have written this in a Windows Forms Application.
SQL Table Definition:
CREATE TABLE [dbo].[FruitTable](
[Id] [int] IDENTITY(1,1) NOT NULL,
[Apples] [nvarchar](20) NOT NULL,
[Oranges] [nvarchar](20) NOT NULL,
[Pears] [nvarchar](20) NOT NULL,
[Bananas] [nvarchar](20) NOT NULL,
[DateAddedUtc] [nvarchar](50) NULL
) ON [PRIMARY]
GO
Stored Procedure Definition:
CREATE PROC [dbo].[spAddFruitsToDB]
@Apples [nvarchar](20),
@Oranges [nvarchar](20),
@Pears [nvarchar](20),
@Bananas [nvarchar](20),
@DateAddedUtc [nvarchar](50)
AS
BEGIN
INSERT INTO TableReport --Has the same definition as FruitTable
VALUES (@Apples, @Oranges, @Pears, @Bananas, @DateAddedUtc)
END
Code:
public class FruitClass {
private SqlConnection mySQLConnection;
private SqlCommand mySQLCommand;
private SqlDataReader mySQLDataReader;
private string myConnectionString;
private void CheckDataValidity()
{
Microsoft.Office.Interop.Excel.Application Excel_app = new Microsoft.Office.Interop.Excel.Application();
Microsoft.Office.Interop.Excel.Workbooks work_books = Excel_app.Workbooks;
Microsoft.Office.Interop.Excel.Workbook work_book = work_books.Open("C:\\PathToFile.csv");
Microsoft.Office.Interop.Excel.Sheets work_sheets = work_book.Worksheets;
Microsoft.Office.Interop.Excel.Worksheet work_sheet = (Microsoft.Office.Interop.Excel.Worksheet)work_sheets.get_Item(1);
List<FruitClass> checkIfFruitsMatch = new List<FruitClass>();
List<FruitClass> dbFruitsToMatch= new List<FruitClass>();
string fruitTagNumberForApples = "";
for (int j = 2; j < work_sheet.Rows.Count; j++)
{
FruitClass fruitInstance = new FruitClass();
fruitInstance.Apples = CellToString(work_sheet.Cells[j, 3]).Trim();
fruitInstance.Oranges = CellToString(work_sheet.Cells[j, 13]).Trim();
fruitInstance.Pears = CellToString(work_sheet.Cells[j, 14]).Trim();
fruitInstance.Bananas = CellToString(work_sheet.Cells[j, 15]).Trim();
fruitTagNumberForApples = fruitInstance.Apples;
checkIfFruitsMatch.Add(fruitInstance);
if (fruitTagNumberForApples == null || fruitTagNumberForApples == "" || fruitTagNumberForApples == string.Empty)
break;
}
//Get fruits in excel and do a comparison with fruits in database table Fruit
dbFruitsToMatch.Add(ReturnFruitRow());
IEnumerable<FruitClass> listComparer = checkIfFruitsMatch.Except(dbFruitsToMatch);
foreach (FruitClass i in listComparer)
{
using (var db = new DBEntities())
{
int countDBexisting = db.FruitTable.Where(x => x.Apples == i.Apples).Count();
if (countDBexisting > 0)
{
//Fruit has been previously logged. No need to insert a duplicate
}
else
{
LogFruitToDB(i, "spAddFruitsToDB"); //Insert records into a new table called "TableReport"
}
}
}
work_book.Close();
Excel_app.Quit();
}
private void LogFruitToDB(FruitClass fruitInstance, string cmdText)
{
myConnectionString = ConfigurationManager.ConnectionStrings["ConnectionString"].ConnectionString;
using (mySQLConnection = new SqlConnection(myConnectionString))
{
mySQLCommand = new SqlCommand(cmdText, mySQLConnection);
mySQLCommand.CommandType = CommandType.StoredProcedure;
SqlParameter Apples_Parameter = new SqlParameter
{
ParameterName = "@Apples",
Value = fruitInstance.Apples
};
mySQLCommand.Parameters.Add(Apples_Parameter);
SqlParameter Oranges_Parameter = new SqlParameter
{
ParameterName = "@Oranges",
Value = fruitInstance.Oranges
};
mySQLCommand.Parameters.Add(Oranges_Parameter);
SqlParameter Pears_Parameter = new SqlParameter
{
ParameterName = "@Pears",
Value = fruitInstance.Pears
};
mySQLCommand.Parameters.Add(Pears_Parameter);
SqlParameter Bananas_Parameter = new SqlParameter
{
ParameterName = "@Bananas",
Value = fruitInstance.Bananas
};
mySQLCommand.Parameters.Add(Bananas_Parameter);
SqlParameter DateAddedUtc_Parameter = new SqlParameter
{
ParameterName = "@DateAddedUtc",
Value = DateTime.UtcNow.ToString()
};
mySQLCommand.Parameters.Add(DateAddedUtc_Parameter);
mySQLConnection.Open();
mySQLCommand.ExecuteNonQuery();
}
}
private FruitClass ReturnFruitRow()
{
FruitClass fruitInfo = new FruitClass();
myConnectionString = ConfigurationManager.ConnectionStrings["ConnectionString"].ConnectionString;
using (mySQLConnection = new SqlConnection(myConnectionString))
{
string procedureName = "select * from dbo.FruitTable";
mySQLCommand = new SqlCommand(procedureName, mySQLConnection);
mySQLCommand.CommandType = CommandType.Text;
mySQLCommand.Connection = mySQLConnection;
mySQLCommand.Connection.Open();
mySQLDataReader = mySQLCommand.ExecuteReader();
if (mySQLDataReader.HasRows)
{
while (mySQLDataReader.Read())
{
fruitInfo.Apples = mySQLDataReader.GetString(1);
fruitInfo.Oranges = mySQLDataReader.GetString(2);
fruitInfo.Pears = mySQLDataReader.GetString(3);
fruitInfo.Bananas = mySQLDataReader.GetInt32(4).ToString();
}
}
mySQLCommand.Connection.Close();
}
return fruitInfo;
}
private string CellToString(object p)
{
try
{
return ((Microsoft.Office.Interop.Excel.Range)p).Value.ToString();
}
catch
{
return "";
}
}
}
public class FruitClass
{
public string Apples;
public string Oranges;
public string Pears;
public string Bananas;
}
Note: The csv
file came in as a normal .xlsx
excel file with columns then got saved as a .csv
.
So, say FruitTable
had a matching record:
Then, Table Report
should look like below when the program is finished:
But now real scenario has about 200 000 records. Also worth mentioning that this application is run once a month.