I made an app to reformat CSV files, doubling the single quotes inside fields and replacing the new lines inside them with a string like '\n'.
Once the data is inside the database we can replace back the '\n' to new lines.
I needed to do this because the apps I had to process CSV does not deal correctly with new lines.
Feel free to use and change.
In python:
import sys
def ProcessCSV(filename):
file1 = open(filename, 'r')
filename2 = filename + '.out'
file2 = open(filename2, 'w')
print 'Reformatting {0} to {1}...', filename, filename2
line1 = file1.readline()
while (len(line1) > 0):
line1 = line1.rstrip('\r\n')
line2 = ''
count = 0
lastField = ( len(line1) == 0 )
while not lastField:
lastField = (line1.find('","') == -1)
res = line1.partition('","')
field = res[0]
line1 = res[2]
count = count + 1
hasStart = False
hasEnd = False
if ( count == 1 ) and ( field[:1] == '"' ) :
field = field[1:]
hasStart = True
elif count > 1:
hasStart = True
while (True):
if ( lastField == True ) and ( field[-1:] == '"' ) :
field = field[:-1]
hasEnd = True
elif not lastField:
hasEnd = True
if lastField and not hasEnd:
line1 = file1.readline()
if (len(line1) == 0): break
line1 = line1.rstrip('\r\n')
lastField = (line1.find('","') == -1)
res = line1.partition('","')
field = field + '\\n' + res[0]
line1 = res[2]
else:
break
field = field.replace('"', '""')
line2 = line2 + iif(count > 1, ',', '') + iif(hasStart, '"', '') + field + iif(hasEnd, '"', '')
if len(line2) > 0:
file2.write(line2)
file2.write('\n')
line1 = file1.readline()
file1.close()
file2.close()
print 'Done'
def iif(st, v1, v2):
if st:
return v1
else:
return v2
filename = sys.argv[1]
if len(filename) == 0:
print 'You must specify the input file'
else:
ProcessCSV(filename)
In VB.net:
Module Module1
Sub Main()
Dim FileName As String
FileName = Command()
If FileName.Length = 0 Then
Console.WriteLine("You must specify the input file")
Else
ProcessCSV(FileName)
End If
End Sub
Sub ProcessCSV(ByVal FileName As String)
Dim File1 As Integer, File2 As Integer
Dim Line1 As String, Line2 As String
Dim Field As String, Count As Long
Dim HasStart As Boolean, HasEnd As Boolean
Dim FileName2 As String, LastField As Boolean
On Error GoTo locError
File1 = FreeFile()
FileOpen(File1, FileName, OpenMode.Input, OpenAccess.Read)
FileName2 = FileName & ".out"
File2 = FreeFile()
FileOpen(File2, FileName2, OpenMode.Output)
Console.WriteLine("Reformatting {0} to {1}...", FileName, FileName2)
Do Until EOF(File1)
Line1 = LineInput(File1)
'
Line2 = ""
Count = 0
LastField = (Len(Line1) = 0)
Do Until LastField
LastField = (InStr(Line1, """,""") = 0)
Field = Strip(Line1, """,""")
Count = Count + 1
HasStart = False
HasEnd = False
'
If (Count = 1) And (Left$(Field, 1) = """") Then
Field = Mid$(Field, 2)
HasStart = True
ElseIf Count > 1 Then
HasStart = True
End If
'
locFinal:
If (LastField) And (Right$(Field, 1) = """") Then
Field = Left$(Field, Len(Field) - 1)
HasEnd = True
ElseIf Not LastField Then
HasEnd = True
End If
'
If LastField And Not HasEnd And Not EOF(File1) Then
Line1 = LineInput(File1)
LastField = (InStr(Line1, """,""") = 0)
Field = Field & "\n" & Strip(Line1, """,""")
GoTo locFinal
End If
'
Field = Replace(Field, """", """""")
'
Line2 = Line2 & IIf(Count > 1, ",", "") & IIf(HasStart, """", "") & Field & IIf(HasEnd, """", "")
Loop
'
If Len(Line2) > 0 Then
PrintLine(File2, Line2)
End If
Loop
FileClose(File1, File2)
Console.WriteLine("Done")
Exit Sub
locError:
Console.WriteLine("Error: " & Err.Description)
End Sub
Function Strip(ByRef Text As String, ByRef Separator As String) As String
Dim nPos As Long
nPos = InStr(Text, Separator)
If nPos > 0 Then
Strip = Left$(Text, nPos - 1)
Text = Mid$(Text, nPos + Len(Separator))
Else
Strip = Text
Text = ""
End If
End Function
End Module