0

I have an awk script (tst.awk):

NR==FNR {
    ids[++numIds] = $1","
    next
}
FNR==1 { numFiles++ }
{
    id = $1
    sub(/^[^[:space:]]+[[:space:]]+/,"")
    vals[id,numFiles] = $0
    gsub(/[^[:space:],]+/,"NA")
    naVal[numFiles] = $0
}
END {
    for ( idNr=1; idNr<=numIds; idNr++) {
        id = ids[idNr]
        printf "%s%s", id, OFS
        for (fileNr=1; fileNr<=numFiles; fileNr++) {
            val = ((id,fileNr) in vals ? vals[id,fileNr] : naVal[fileNr])
            printf "%s%s", val, (fileNr<numFiles ? OFS : ORS)
        }
    }
}

That is called on the command line with:

awk -f tst.awk master file1 file2 file3 > output.file

(note: there can be a variable number of arguments)

How can I change this script, and command line code, to run it as a bash script?

I have tried (tst_awk.sh):

#!/bin/bash

awk -f "$1" "$2" "$3" "$4"

'NR==FNR {
        ids[++numIds] = $1","
        next
    }
    FNR==1 { numFiles++ }
    {
        id = $1
        sub(/^[^[:space:]]+[[:space:]]+/,"")
        vals[id,numFiles] = $0
        gsub(/[^[:space:],]+/,"NA")
        naVal[numFiles] = $0
    }
    END {
        for ( idNr=1; idNr<=numIds; idNr++) {
            id = ids[idNr]
            printf "%s%s", id, OFS
            for (fileNr=1; fileNr<=numFiles; fileNr++) {
                val = ((id,fileNr) in vals ? vals[id,fileNr] : naVal[fileNr])
                printf "%s%s", val, (fileNr<numFiles ? OFS : ORS)
            }
        }
    }' > output_file

called on command line with:

./tst_awk.sh master file1 file2 file3

I have also tried (tst_awk2.sh):

#!/bin/bash

awk -f master file1 file2 file3

'NR==FNR {
        ids[++numIds] = $1","
        next
    }
    FNR==1 { numFiles++ }



...

        }
    }
}' > output_file

called on command line with:

./tst_awk2.sh

Bot75
  • 179
  • 8
  • Use `awk '...' master file1 file2 file3 > output.file` (replace `...` with awk script inside `tst.awk`) – anubhava Jan 28 '21 at 20:19

2 Answers2

2

-f needs to be followed by the name of the awk script. You're putting the first argument of the shell script after it.

You can use "$@" to get all the script arguments, so you're not limited to just 4 arguments.

#!/bin/bash

awk -f /path/to/tst.awk "$@" > output_file

Use an absolute path to the awk script so you can run the shell script from any directory.

If you don't want to use the separate tst.awk, you just put the script as the literal first argument to awk.

#!/bin/bash

awk 'NR==FNR {
        ids[++numIds] = $1","
        next
    }
    FNR==1 { numFiles++ }
    {
        id = $1
        sub(/^[^[:space:]]+[[:space:]]+/,"")
        vals[id,numFiles] = $0
        gsub(/[^[:space:],]+/,"NA")
        naVal[numFiles] = $0
    }
    END {
        for ( idNr=1; idNr<=numIds; idNr++) {
            id = ids[idNr]
            printf "%s%s", id, OFS
            for (fileNr=1; fileNr<=numFiles; fileNr++) {
                val = ((id,fileNr) in vals ? vals[id,fileNr] : naVal[fileNr])
                printf "%s%s", val, (fileNr<numFiles ? OFS : ORS)
            }
        }
    }' "$@" > output_file
Barmar
  • 741,623
  • 53
  • 500
  • 612
0

you can make your awk script executable by adding the shebang

#! /bin/awk -f

NR==FNR {
   ids[++numIds] = $1","
   next
}...

don't forget to chmod +x tst.awk and run

$ ./tst.awk  master file1 file2 file3 > outfile
karakfa
  • 66,216
  • 7
  • 41
  • 56