0

I am working on an ssis integration and wrote a script task in c# for automating my imports from csv file to my DBs. It works great but I need help with removing commas from strings in pipes(|aaa,aaa| or |a|,|a|) in the csv. For example "Address, city wide". I want a function that can remove that comma(,). I would paste a snippet of my code and what I have done so far.

#
region Namespaces
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Runtime;
using System.Windows.Forms;
using System.Data.SqlClient;
using System.IO;
//using CsvHelper.Configuration;
#
endregion

namespace ST_7ce5ad6fbc104157b534f4eb484a4417 {
 [Microsoft.SqlServer.Dts.Tasks.ScriptTask.SSISScriptTaskEntryPointAttribute]
 public partial class ScriptMain: Microsoft.SqlServer.Dts.Tasks.ScriptTask.VSTARTScriptObjectModelBase {
  public void Main() {
   string datetime = DateTime.Now.ToString("yyyyMMddHHmmss");
   try {
    //Declare Variables
    string SourceFolderPath = Dts.Variables["User::SourceFolder"].Value.ToString();
    string FileExtension = Dts.Variables["User::FileExtension"].Value.ToString();
    string FileDelimiter = Dts.Variables["User::FileDelimiter"].Value.ToString();
    string ArchiveFolder = Dts.Variables["User::ArchiveFolder"].Value.ToString();
    string ColumnsDataType = Dts.Variables["User::ColumnsDataType"].Value.ToString();
    string SchemaName = Dts.Variables["User::SchemaName"].Value.ToString();
    SqlConnection myADONETConnection = new SqlConnection();
    myADONETConnection = (SqlConnection)
     (Dts.Connections["moviesdb"].AcquireConnection(Dts.Transaction) as SqlConnection);
    //Reading file names one by one
    string[] fileEntries = Directory.GetFiles(SourceFolderPath, "*" + FileExtension);
    foreach(string fileName in fileEntries) {
     //Writing Data of File Into Table
     string TableName = "";
     int counter = 0;
     string line;
     string ColumnList = "";
     //MessageBox.Show(fileName);
     System.IO.StreamReader SourceFile =
      new System.IO.StreamReader(fileName);
     while ((line = SourceFile.ReadLine()) != null) {
      if (counter == 0) {
       ColumnList = "[" + line.Replace("\"", "").Replace(FileDelimiter, "],[") + "]";
       //MessageBox.Show(ColumnList);
       //"[" + line.Replace(FileDelimiter, "],[") + "]";
       TableName = (((fileName.Replace(SourceFolderPath, "")).Replace(FileExtension, "")).Replace("\\", ""));
       string CreateTableStatement = "IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[" + SchemaName + "].";
       CreateTableStatement += "[" + TableName + "]')";
       CreateTableStatement += " AND type in (N'U'))DROP TABLE [" + SchemaName + "].";
       CreateTableStatement += "[" + TableName + "]  Create Table " + SchemaName + ".[" + TableName + "]";
       CreateTableStatement += "([" + line.Replace("\"", "").Replace(FileDelimiter, "] " + ColumnsDataType + ",[") + "] " + ColumnsDataType + ")";
       SqlCommand CreateTableCmd = new SqlCommand(CreateTableStatement, myADONETConnection);
       CreateTableCmd.ExecuteNonQuery();
       //MessageBox.Show(CreateTableStatement);
      } else {
       string query = "Insert into " + SchemaName + ".[" + TableName + "] (" + ColumnList + ") ";
       //query += "VALUES('" + line.Replace(FileDelimiter, "','").Replace("\"", "") + "')";
       //query += "VALUES('" + line.Replace(FileDelimiter, "','").Replace("\"", "").Replace("\"'\"", "") + "')";
       query += "VALUES('" + line.Replace("'", "").Replace(FileDelimiter, "','").Replace("\"", "") + "')";
       // MessageBox.Show(query.ToString());
       SqlCommand myCommand1 = new SqlCommand(query, myADONETConnection);
       myCommand1.ExecuteNonQuery();
      }
      counter++;
     }
     SourceFile.Close();
     //move the file to archive folder after adding datetime to it
     File.Move(fileName, ArchiveFolder + "\\" + (fileName.Replace(SourceFolderPath, "")).Replace(FileExtension, "") + "_" + datetime + FileExtension);
     Dts.TaskResult = (int) ScriptResults.Success;
    }
   } catch (Exception exception) {
    // Create Log File for Errors
    using(StreamWriter sw = File.CreateText(Dts.Variables["User::LogFolder"].Value.ToString() +
     "\\" + "ErrorLog_" + datetime + ".log")) {
     sw.WriteLine(exception.ToString());
     Dts.TaskResult = (int) ScriptResults.Failure;
    }
   }
  }#
  region ScriptResults declaration
  enum ScriptResults {
   Success = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Success,
    Failure = Microsoft.SqlServer.Dts.Runtime.DTSExecResult.Failure
  };#
  endregion
 }
}

It actually does what I want and imports successfully, but I need further assistance in tweaking the code to replace commas between strings in pipes in the csv file.

digital.aaron
  • 5,435
  • 2
  • 24
  • 43
  • Use a CSV parser if you want to make your life easier when trying to parse&process CSV data. There should be plenty of 3rd-party libraries offering such functionality; a library for handling CSV data often suggested here on StackOverflow is _CsvHelper_. –  May 31 '19 at 15:01
  • @tmlewin It stuns me how many developers opt to just use SSIS for running script tasks. Why bother? Just create an application. – J Weezy May 31 '19 at 16:12
  • Can you give us an example row from the .csv that has data that needs this further processing? How does it look in the flat file? How does it look in the DB now? How do you want it to look? – digital.aaron May 31 '19 at 17:20
  • Thank you guys for your suggestions, let me start by addressing your suggestions. For the csv file here is the format |apple|,|carrot,Fish|,|pasta|. Now here is the issue, i need the script to ignore the commas between the piped string , reason is that during imports it detects the comma and creates additional column which leads to truncation. i need the script to ignore the comma between the strings so as to allow the import to work successfully. Meanwhile i would explore csvhelper as suggested. Thanks – Tmlewin May 31 '19 at 18:55
  • @digital.aaron . I am preparing a sample file and also detailed information on how the output structure should look like.Thanks – Tmlewin May 31 '19 at 18:56
  • How many files are you processing? Do they have the same names each time? Do they have the same columns each time? I ask because I see you are dropping and recreating the tables in the DB each time you import a file. There are some very specific use-cases where this is a good idea, and I wonder if your problem actually qualifies as a good reason to do this. – digital.aaron May 31 '19 at 19:09
  • I bring up my previous comment because you could VERY EASILY import your data using a Flat File Connection in a Data Flow Task. It will do exactly what you're looking for, even with minimal configuration of the task. You'd set up your text delimiter as `|`, your column delimiter as `,` (the default), and SSIS will correctly recognize the columns. – digital.aaron May 31 '19 at 19:11
  • @digital.aaron , Thanks for your feedback, i appreciate your help. Sorry for the late reply, i tried importing with ssis and there were issues due to the commas between pipes.That why i want to write a script to ignore the commas between string in pipes(delimeter) .I am processing over 50 csv files , some files are like 200mb large. Now the files don't have the same name each time each csv file would be imported in a table . one table for each csv file imported. – Tmlewin Jun 03 '19 at 14:04
  • If that's truly the case, I'd recommend looking into proven CSV processing solutions in C#. For example, I found this answer that has some simple looking code that might work in your script. You might have to play around with the code a bit (looks like `"` is hard-coded as the text delimiter, for example), but it could put you on the right path. https://stackoverflow.com/a/48356933/1169697 – digital.aaron Jun 03 '19 at 15:29
  • Thanks for the pointer , i also tried the method you suggested by using ssis to import the csv files , it imported just a few lines most of the data in the csv didn't import and i got several errors, i figured a straight forward approach as such didn't work out for me i can give u a template of my package.dtsx file for you to look at if you want. Thanks – Tmlewin Jun 03 '19 at 15:50
  • @digital.aaron , hi i found this video https://www.youtube.com/watch?v=Zg1aZpoS0I8&feature=youtu.be . It extremely close to what i want , but i want is to run through the data and look for commas. Then look prior to and after the comma. If you are surrounded by chars then replace the comma with alternate char like the carat "^" symbol or the tilde "~". Then process the file as normal then go back and replace the alternate char with a comma. – Tmlewin Jun 03 '19 at 16:49
  • @Tmlewin did you find a solution to this issue? – Tshepiso Selepe May 23 '20 at 11:40

0 Answers0