I am very new in C++!Therefore I would really appreciate if you will consider it and answer as easy as it is possible. I need to parse fasta file with >40000 sequences (near 500Mb) and write ID and sequence length into the new file. I found that it is going very slowly in C++ and for this purpose python works much faster. But I need to learn how I can do it in C++. I am wonder are there any ways to fasten this process for C++?
This is my code:
#include <iostream>
#include <fstream>
#include <string>
#include <time.h>
#include <stdio.h>
using namespace std;
int main() {
time_t start, end;
time(&start);
clock_t begin = clock();
ifstream file;
string line;
string id;
string content;
int len = 0;
int i = 0;
ofstream out;
file.open("contigs.fasta", ios::in);
out.open("output.txt", ios::out);
while (getline(file, line)) {
if (line[0] == '>') {
i++;
if (i != 1) {
//cout << id << "\n" << len << "\n" << content << endl;
//out.write(line.c_str(), line.size());
out << id << " : " << len << endl;
}
id = line;
len = 0;
content = "";
}
else
{
len += line.length();
content += line;
}
}
//cout << id << "\n" << len << "\n" << content << endl;
//out << id << " : " << len << endl;
cout << "Total number of sequences :" << i << "\n";
out.close();
time (&end);
double dif = difftime (end,start);
printf ("Elasped time is %.2lf seconds.", dif );
return 0;
}
Thanks in advance!