I'm trying to apply some previously written code to some DNA sequencing data that I have collected. I would like to match my sequencing reads to a document containing specific barcodes and output the number of hits for each match. I am having some very basic trouble with calling the two files to compare. The code compiles, however, when I try to call the file "MOBYtest" in line 77, the output is 0, which indicates that the file is not being called appropriately. Does anyone have any insight into how I can incorporate the two files into the following code?
####################################################################################################
#### Supplementary File 2, barcodebin.c ############################################################
####
#### Barcode assigned to a gene using standard binary search
####
#### Can Alkan
#### 2010
####################################################################################################
*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
typedef struct spair{
char *name;
char *seq;
int count;
}_spair;
int binSearch(char *seq, int s, int e);
void revcomp(char *s, char *r);
static int maxfunc(const void *p1, const void *p2);
struct spair **seqs;
int seqlen;
int main(int argc, char **argv){
FILE *fp;
int i;
char fname[100];
int nseq;
char sequence[100]; char name[100];
char sname[100];
char revseq[100];
int retCode=100000;
int s, e; int olds;
int found;
int crop;
int len;
int longest_barcode=0;
int fastq=0;
char line[1000];
int loop_done=0;
int crop_lim;
int min_barcode = 5;
int verbose = 0;
fname[0]=0; nseq = 0; sname[0]=0;
for (i=1;i<argc;i++){
if (!strcmp(argv[i], "-i"))
strcpy(fname, argv[i+1]);
else if (!strcmp(argv[i], "-s"))
strcpy(sname, argv[i+1]);
else if (!strcmp(argv[i], "-n"))
nseq = atoi(argv[i+1]);
else if (!strcmp(argv[i], "-m"))
min_barcode = atoi(argv[i+1]);
else if (!strcmp(argv[i], "-v"))
verbose = 1;
}
if (fname[0]==0 || nseq==0 || sname[0]==0) return 0;
fp = fopen(fname, "MOBYtest");
if ( !fp ) {
fprintf(stderr, "Could not open file %s\n", fname);
return 255;
}
i=0;
seqs = (struct spair ** ) malloc (sizeof (struct spair *) * nseq);
if (verbose) {
fprintf(stderr, "Loading %d sequences ..", nseq);
}
while (fscanf(fp, ">%s\n%s\n", name, sequence) > 0 ){
seqlen=strlen(sequence);
if (seqlen > longest_barcode) longest_barcode = seqlen;
if (seqlen>=3){
seqs[i] = (struct spair * ) malloc (sizeof (struct spair));
seqs[i]->name = (char *) malloc (sizeof (char) * (strlen (name)+1));
seqs[i]->seq = (char *) malloc (sizeof (char) * (2*seqlen+1));
seqs[i]->count = 0;
strcpy(seqs[i]->seq, sequence);
strcpy(seqs[i]->name, name);
i++;
}
}
nseq = i;
qsort(seqs, nseq, sizeof(struct spair *), maxfunc);
if (verbose) {
fprintf(stderr, " done. %d left. Longest barcode: %d bp\n", nseq, longest_barcode);
}
fclose(fp);
fp = fopen(sname, "r");
if ( !fp ) {
fprintf(stderr, "Could not open file %s\n", sname);
return 255;
}
while (fscanf(fp, "%s\n%s\n", name, sequence) > 0){
if (name[0]=='@'){
fgets(line, 1000, fp); // + line
fgets(line, 1000, fp); // quality line
}
sequence[longest_barcode]=0;
strcpy(revseq, sequence);
s = 0, e = nseq; retCode = 10000; olds=-10;
found=0;
crop = 0;
len = strlen(revseq);
crop_lim = longest_barcode - min_barcode + 1; // ad hoc
while (crop<crop_lim){
revseq[len-crop]=0;
while (s<e && retCode!=0){
retCode=binSearch(revseq, s, e);
if (retCode < 0){e = (s+e)/2;}
else if (retCode > 0){olds=s; s = (s+e)/2;}
else if (retCode == 0){
seqs[(s+e)/2]->count++;
crop = crop_lim+1;
break;
}
if (s>=e || s==olds){
crop++;
s = 0, e = nseq; retCode = 10000; olds=-10;
break;
}
}
}
}
if (verbose) {
fprintf(stderr, " done.\n");
}
fprintf(stdout, "Barcode_Name\tBarcode_Length\tFrequency\n");
for (i=0;i<nseq;i++)
fprintf(stdout, "%s\t%d\t%d\n", seqs[i]->name, strlen(seqs[i]->seq), seqs[i]->count);
}
int binSearch(char *seq, int s, int e){
int med = (s+e)/2;
int ret;
ret = strcmp(seq, seqs[med]->seq);
return ret;
}
void revcomp(char *s, char *r){
int len = strlen(s);
int i;
for (i=0;i<len;i++){
switch(s[i]){
case 'A': r[len-i-1] = 'T'; break;
case 'T': r[len-i-1] = 'A'; break;
case 'C': r[len-i-1] = 'G'; break;
case 'G': r[len-i-1] = 'C'; break;
default: r[len-i-1] = s[i]; break;
}
}
r[len]=0;
}
static int maxfunc(const void *p1, const void *p2){
struct spair *a, *b;
int ret;
a = *((struct spair **)p1);
b = *((struct spair **)p2);
ret = strcmp(a->seq, b->seq);
return ret;
}