I have a folder where I will take text files (200-500mb -not very big, but its big text file) and I want to process this file in parallel. the file will have
"ComnanyTestIsert", "Firs Comment", "LA 132", "222-33-22", 1
"ComnanyTestIsert1", "Seconds Comment", "LA 132", "222-33-22", 1
for example, I use 2 such files. I don't quite understand when to use BufferedStream with parallel loop how to set the number of parallel operations? and how to make an insert correctly
static void Main(string[] args)
{
//Basic usage to help you get started:
ProcessFileTaskItem(
new string[] { "\\Insert.txt"
, "\\Insert1.txt" }
, "Data Source=(localdb)\\MSSQLLocalDB;Initial Catalog=test;Integrated Security=True;Connect Timeout=30;Encrypt=False;TrustServerCertificate=False;ApplicationIntent=ReadWrite;MultiSubnetFailover=False"
, "test");
}
/// This will read an array of input files, process the lines in parallel, and upload
/// everything into the database.
public static void ProcessFileTaskItem(string[] SourceFiles, string DatabaseConnectionString, string DestinationTable)
{
//Make sure there's files to read
if (SourceFiles != null && SourceFiles.Length > 0)
{
//Loop through the file array
Parallel.For(0, SourceFiles.Length, x =>
//for (int x = 0; x < SourceFiles.Length; x++)
{
//Make sure the file exists and if so open it for reading.
if (File.Exists(SourceFiles[x]))
{
using (SqlConnection connectionDest = new SqlConnection(DatabaseConnectionString))
{
connectionDest.Open();
//Configure everything to upload to the database via bulk copy.
using (SqlBulkCopy sbc = new SqlBulkCopy(connectionDest, SqlBulkCopyOptions.TableLock, null))
{
//Configure the bulk copy settings
sbc.DestinationTableName = DestinationTable;
sbc.BulkCopyTimeout = 28800; //8 hours
//Now read and process the file
ProcessAllLinesInInputFile(SourceFiles[x], connectionDest, sbc);
}
connectionDest.Close();
}
}
} //for
); //End Parallel reading of files
//Explicitly clean up before exiting
Array.Clear(SourceFiles, 0, SourceFiles.Length);
}
}
/// Processes every line in the source input file.
private static void ProcessAllLinesInInputFile(string SourceFiles, SqlConnection connectionDest, SqlBulkCopy sbc)
{
//Create a local data table. Should be the same name as the table
//in the database you'll be uploading everything to.
DataTable CurrentRecords = new DataTable("test");
//The column names. They should match what's in the database table.
string[] ColumnNames = new string[] { "Name", "Comment", "Address", "Phone", "IsActive" };
using (FileStream fs = File.Open(SourceFiles, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
using (BufferedStream bs = new BufferedStream(fs))
using (StreamReader sr = new StreamReader(bs))
{
string s;
while ((s = sr.ReadLine()) != null)
{
}
}
//Create the datatable with the column names.
for (int x = 0; x < ColumnNames.Length; x++)
CurrentRecords.Columns.Add(ColumnNames[x], typeof(string));
//Now process each line in parallel.
Parallel.For(0, SourceFiles, x =>
{
List<object> values = null; //so each thread gets its own copy.
}
}