$ cat decsv.awk
BEGIN { FPAT = "([^,]*)|(\"[^\"]+\")"; OFS="," }
{
# create strings that cannot exist in the input to map escaped quotes to
gsub(/a/,"aA")
gsub(/\\"/,"aB")
gsub(/""/,"aC")
# prepend previous incomplete record segment if any
$0 = prev $0
numq = gsub(/"/,"&")
if ( numq % 2 ) {
# this is inside double quotes so incomplete record
prev = $0 RT
next
}
prev = ""
for (i=1;i<=NF;i++) {
# map the replacement strings back to their original values
gsub(/aC/,"\"\"",$i)
gsub(/aB/,"\\\"",$i)
gsub(/aA/,"a",$i)
}
printf "Record %d:\n", ++recNr
for (i=0;i<=NF;i++) {
printf "\t$%d=<%s>\n", i, $i
}
print "#######"
.
$ awk -f decsv.awk file
Record 1:
$0=<,"This is some text with a , in it.", #data with commas are enclosed in double quotes>
$1=<>
$2=<"This is some text with a , in it.">
$3=< #data with commas are enclosed in double quotes>
#######
Record 2:
$0=<,"line 1 of data
line 2 of data", #data with a couple of newlines>
$1=<>
$2=<"line 1 of data
line 2 of data">
$3=< #data with a couple of newlines>
#######
Record 3:
$0=<,"Data that may a have , in it and
also be on a newline as well.",>
$1=<>
$2=<"Data that may a have , in it and
also be on a newline as well.">
$3=<>
#######
Record 4:
$0=<,"Data that \"may\" a have ""quote"" in it and
also be on a newline as well.",>
$1=<>
$2=<"Data that \"may\" a have ""quote"" in it and
also be on a newline as well.">
$3=<>
#######
The above uses GNU awk for FPAT and RT. I don't know of any CSV format that would allow you to have a newline in the middle of a field that's not enclosed by quotes (if it did you'd never know where any record ended) so the script doesn't allow for that. The above was run on this input file:
$ cat file
,"This is some text with a , in it.", #data with commas are enclosed in double quotes
,"line 1 of data
line 2 of data", #data with a couple of newlines
,"Data that may a have , in it and
also be on a newline as well.",
,"Data that \"may\" a have ""quote"" in it and
also be on a newline as well.",