I have a struct of strings with 3 million lines. I am trying to sort the file like:
aaaaa
aaaab
aaacc
And so on.
I was trying to do bubblesort. I tried it with 10 lines and it worked, but when I tried the whole 3 million lines file it took over 30 minutes and was still processing. I decided to try quicksort. However, I am running into a problem where it says:
expected 'const char **' but argument is of type 'struct lines *'
How can I fix this? Here is what I am doing:
#include<stdio.h>
#include<string.h>
#include <stdlib.h>
#include <math.h>
#include <stdbool.h>
#include <ctype.h>
void swap_str_ptrs(char const **arg1, char const **arg2)
{
const char *tmp = *arg1;
*arg1 = *arg2;
*arg2 = tmp;
}
void quicksort_strs(char const *args[], unsigned int len)
{
unsigned int i, pvt=0;
if (len <= 1)
return;
// swap a randomly selected value to the last node
swap_str_ptrs(args+((unsigned int)rand() % len), args+len-1);
// reset the pivot index to zero, then scan
for (i=0;i<len-1;++i)
{
if (strcmp(args[i], args[len-1]) < 0)
swap_str_ptrs(args+i, args+pvt++);
}
// move the pivot value into its place
swap_str_ptrs(args+pvt, args+len-1);
// and invoke on the subsequences. does NOT include the pivot-slot
quicksort_strs(args, pvt++);
quicksort_strs(args+pvt, len - pvt);
}
void main()
{
FILE *dnaFile=fopen("hs_alt_HuRef_chr2.fa", "r"); //file im reading
typedef struct lines
{
char lines[100]; //size of each line
} lines;
int i = 0;
char buf[256];
static lines myDNA[3354419]; //creates the 3m spots for all lines
while (fgets (buf, sizeof(buf), dnaFile))
{
if (i > 0)
strcpy(myDNA[i].lines, buf); //inserting each line into the struct array
i++;
}
// this is the bubblesort approach, works, but it takes too lon
/**int a;
int total;
char temp[150];
char report[100][150];
for(a=0; a<3354419; a++)
{
for(total=a+1; total<=3354419; total++)
{
if(strcmp(myDNA[a].lines,myDNA[total].lines)>0)
{
strcpy(temp,myDNA[a].lines);
strcpy(myDNA[a].lines,myDNA[total].lines);
strcpy(myDNA[total].lines,temp);
}
}
}*/
quicksort_strs(myDNA, 3354419); //attempt at quicksort, which crashes
}
USING QSORT
#include<stdio.h>
#include<string.h>
#include <stdlib.h>
#include <math.h>
#include <stdbool.h>
#include <ctype.h>
int compare_function(const void *a,const void *b) {
return (strcmp((char *)a,(char *)b));
}
void main()
{
FILE *dnaFile=fopen("hs_alt_HuRef_chr2.fa", "r"); //file with 3 million lines
typedef struct lines
{
char lines[100];
} lines;
int i = 0;
char buf[256];
static lines myDNA[3354419]; // array holding the 3 million lines
while (fgets (buf, sizeof(buf), dnaFile))
{
if (i > 0)
strcpy(myDNA[i].lines, buf); //putting each line into array
i++;
}
qsort(myDNA, 1000, 100, compare_function); //qsort works for first 1k lines, after, messed up
int a;
for (a = 0; a < 1000; a++){
printf("%s", myDNA[a].lines); //printing lines
}
}