You did say any language, so ... I prefer perl
#!/usr/bin/perl
# concatenate files
master(@ARGV);
exit(0);
# master -- master control
sub master
{
my(@argv) = @_;
my($tail,@tails);
local(%tails);
# scan argv for -opt
while (1) {
my($opt) = $argv[0];
last unless (defined($opt));
last unless ($opt =~ s/^-/opt_/);
$$opt = 1;
shift(@argv);
}
# load up the current directory
@tails = dirload(".");
foreach $tail (@tails) {
$tails{$tail} = 0;
}
# look for the base name of a file
foreach $tail (@tails) {
docat($tail)
if ($tail =~ /R[12]_001\.fastq\.gz$/);
}
# default mode is "dry run"
unless ($opt_go) {
printf("\n");
printf("rerun with -go to actually do it\n");
}
}
# docat -- process a pairing
sub docat
{
my($base) = @_;
my($tail);
my($out);
my($cmd);
my($code);
# all commands are joining just two files
$tail = $base;
$tail =~ s/\.fastq/-2.fastq/;
# to an output file
$out = $base;
$out =~ s/\.fastq/-cat.fastq/;
$cmd = "cat $base $tail > $out";
if ($opt_v) {
printf("\n");
printf("IN1: %s\n",$base);
printf("IN2: %s\n",$tail);
printf("OUT: %s\n",$out);
}
else {
printf("%s\n",$cmd);
}
die("docat: duplicate IN1\n")
if ($tails{$base});
$tails{$base} = 1;
die("docat: duplicate IN2\n")
if ($tails{$tail});
$tails{$tail} = 1;
die("docat: duplicate OUT\n")
if ($tails{$out});
$tails{$out} = 1;
{
last unless ($opt_go);
# execute the command and get error code
system($cmd);
$code = $? >> 8;
exit($code) if ($code);
}
}
# dirload -- get list of files in a directory
sub dirload
{
my($dir) = @_;
my($xf);
my($tail);
my(@tails);
# open the directory
opendir($xf,$dir) or
die("dirload: unable to open '$dir' -- $!\n");
# get list of files in the directory excluding "." and ".."
while ($tail = readdir($xf)) {
next if ($tail eq ".");
next if ($tail eq "..");
push(@tails,$tail);
}
closedir($xf);
@tails = sort(@tails);
@tails;
}
Here's the program output with the -v
option:
IN1: N-4-Bmp-1_S20_L001_R1_001.fastq.gz
IN2: N-4-Bmp-1_S20_L001_R1_001-2.fastq.gz
OUT: N-4-Bmp-1_S20_L001_R1_001-cat.fastq.gz
IN1: N-4-Bmp-1_S20_L001_R2_001.fastq.gz
IN2: N-4-Bmp-1_S20_L001_R2_001-2.fastq.gz
OUT: N-4-Bmp-1_S20_L001_R2_001-cat.fastq.gz
IN1: N-4A-2A_S135_L001_R1_001.fastq.gz
IN2: N-4A-2A_S135_L001_R1_001-2.fastq.gz
OUT: N-4A-2A_S135_L001_R1_001-cat.fastq.gz
IN1: N-4A-2A_S135_L001_R2_001.fastq.gz
IN2: N-4A-2A_S135_L001_R2_001-2.fastq.gz
OUT: N-4A-2A_S135_L001_R2_001-cat.fastq.gz
rerun with -go to actually do it