1

I have to read a csv file and then sort its data basen on the dates in ascending order. The plot twist is that the 90% of the data is sorted correctly but the other 10% is not. The dates in the csv file are written in this form 01/17/2015(month/day/year). I tested the code a bit and i noticed that if i run insertion sort twice then the 10% of the data that were not sorted, are perfectly sorted. My question is why is this happening, is it a problem with my code or with this algorithm. The fact that it runs properly for the most part and if i run it aggain it sorts the file perfectly makes me think that the only problem is with the algorithm and not my code.

#include <stdio.h>

typedef struct
{
    int month;
    int day;
    int year;
    float T_degC;
    float PO4uM;
    float SiO3uM;
    float NO2uM;
    float NO3uM;
    float Salnty;
    float O2ml_L;
} hello;

int insertionSort(hello hellos[], int records);

int main()
{
    FILE* file;

    file = fopen("ocean1.csv", "r");

    if (file == NULL)
    {
        printf("Error opening file. \n");
        return 1;
    }

    hello hellos[1405];

    int read = 0;
    int records = 0; // dhladh struct

    do
    {
        read = fscanf(file,
                      "%d/%d/%d, %f, %f, %f, %f, %f, %f, %f",
                      &hellos[records].month,
                      &hellos[records].day,
                      &hellos[records].year,
                      &hellos[records].T_degC,
                      &hellos[records].PO4uM,
                      &hellos[records].SiO3uM,
                      &hellos[records].NO2uM,
                      &hellos[records].NO3uM,
                      &hellos[records].Salnty,
                      &hellos[records].O2ml_L);

        if (read == 10)
        {
            records++;
        }
        if (read != 10 && feof(file) == 0)
        {
            printf("File format incorrect.\n");
            return 1;
        }
        if (ferror(file))
        {

            printf("Error reading file.\n");
            return 1;
        }

    } while (!feof(file));

    fclose(file);

    printf("\n%d records read.\n\n", records);

    insertionSort(hellos, records);
    // insertionSort(hellos, records);

    //Εκτυπωση των αποτελεσματων.

    for (int i = 0; i < records; i++)
    {
        printf("\n%d, %d, %d, %f, %f, %f, %f, %f, %f, %f",
               hellos[i].month,
               hellos[i].day,
               hellos[i].year,
               hellos[i].T_degC,
               hellos[i].PO4uM,
               hellos[i].SiO3uM,
               hellos[i].NO2uM,
               hellos[i].NO3uM,
               hellos[i].Salnty,
               hellos[i].O2ml_L);

        printf("\n");
    }
    return 0;
}

int insertionSort(hello hellos[], int records)
{
    int i;
    hello key;

    for (int j = 1; j < records; j++)
    {
        key = hellos[j];
        i = j - 1;

        printf("\n\n%d", i);
        // printf("\n%d", key.month);
        // printf("\n%d", hellos[i].month);

        if (key.year < hellos[i].year)
        {
            // printf("\nmpla");
            while (i >= 0 && key.year < hellos[i].year)
            {
                hellos[i + 1] = hellos[i];
                i = i - 1;
            }
            hellos[i + 1] = key;
        }
        else if (key.year == hellos[i].year)
        {
            // printf("\nmpla2");
            if (key.month < hellos[i].month)
            {
                // printf("\nmpla3");
                while (i >= 0 && key.month < hellos[i].month && key.year == hellos[i].year)
                {
                    hellos[i + 1] = hellos[i];
                    i = i - 1;
                }
                hellos[i + 1] = key;
            }
            else if (key.month == hellos[i].month)
            {
                // printf("\nmpla4");
                if (key.day < hellos[i].day)
                {
                    // printf("\nmpla5");
                    while (i >= 0 && key.day < hellos[i].day && key.month == hellos[i].month)
                    {
                        hellos[i + 1] = hellos[i];
                        i = i - 1;
                    }
                    hellos[i + 1] = key;
                }
                else if (key.day == hellos[i].day)
                {
                    // printf("\nmpla6");
                    if (key.T_degC < hellos[i].T_degC)
                    {
                        // printf("\nmpla7");
                        while (i >= 0 && key.day == hellos[i].day && key.month == hellos[i].month &&
                               key.year == hellos[i].year)
                        {
                            hellos[i + 1] = hellos[i];
                            i = i - 1;
                        }
                        hellos[i + 1] = key;
                    }
                }
            }
        }
    }
    return 0;
}

Here are some examples of data from my csv:

02/02/2015,20.37,0.22,0,0,0,33.685,5.5
02/01/2015,17.71,0.28,0.5,0,0,33.676,5.93
01/30/2015,10.85,1.32,16.5,0.05,14.8,33.752,4.19
01/31/2015,10.54,1.85,27.4,0.02,21.5,33.881,2.75
01/29/2015,10.49,1.98,30,0.02,22.6,33.946,2.41
01/28/2015,10.39,2.03,30.7,0.02,23,33.96,2.37
01/27/2015,10.22,2.1,31.8,0.02,23.6,33.982,2.31
01/26/2015,9.75,2.19,34.7,0.01,24.8,34.029,2.39
01/25/2015,18.43,0.11,1,0,0,33.464,5.83
01/24/2015,18.25,0.04,2,0,0,33.452,5.95
01/22/2015,15.6,0.19,3.8,0.04,0.7,33.423,5.91
01/23/2015,12.7,0.41,6,0.1,1.4,33.393,5.88
01/21/2015,10.98,1.09,16.6,0.07,14.1,33.481,4.04
01/20/2015,10.93,1.55,23.8,0.04,19.1,33.531,2.82
01/19/2015,10.74,1.67,26.7,0.04,20.7,33.583,2.55
01/16/2015,10.27,1.66,27.8,0.01,21.2,33.636,2.71
01/15/2015,10.02,1.76,34.4,0.03,24.3,33.747,2.11
01/17/2015,20.22,0.15,1,0,0,33.654,5.34
01/18/2015,20.22,0.15,1,0,0,33.654,5.34
Vlad from Moscow
  • 301,070
  • 26
  • 186
  • 335
  • 1
    reading from a file and sorting data are separate things. Did you check if the file contents are read correctly? Did you test your sorting with a (small) hardcoded dataset? Did you use a debugger? – 463035818_is_not_an_ai Jul 05 '22 at 11:37
  • 1
    Have you tried to [*debug*](https://ericlippert.com/2014/03/05/how-to-debug-small-programs/) your program? For example, try to minimize the input to the smallest possible set that replicates the problem, then use a [*debugger*](https://stackoverflow.com/questions/25385173/what-is-a-debugger-and-how-can-it-help-me-diagnose-problems) to step through the code statement by statement while monitoring variables and their values, to see if you can find out when and where things go wrong. – Some programmer dude Jul 05 '22 at 11:39
  • 3
    Is this homework? If not why not use std::sort? Your code looks more like "C" then "C++" to me though. – Pepijn Kramer Jul 05 '22 at 11:39
  • 1
    The shown field-by-field logic results in a boatload of duplicated code and logic. This is an invitation to typos and subtle bugs. The whole thing should be scrapped and rewritten from scratch using a standalone, self-contained, comparison function whose correctness is easily proven, and a single instance of an insertion sort that simply calls the comparison function. It would also be an extra bonus if the whole thing was actually rewritten in C++, as is the apparent intention, instead of C. – Sam Varshavchik Jul 05 '22 at 11:44
  • 1
    @Dimitris Efthimakis This is not a C++ code. – Vlad from Moscow Jul 05 '22 at 11:45
  • 1
    If the results are unpredictable it's your fault, not the algorithm's, in 100% of the cases. Your reasoning is completely backwards. – molbdnilo Jul 05 '22 at 11:46
  • It is a c code guys sorry. I missclicked the cpp – Dimitris Efthimakis Jul 05 '22 at 12:29

3 Answers3

3

First of all I would recommend that you think a bit and figure out a way of writing this without repeating the sort algorithm multiple times. HINT: you only need one function to compare two hellos and determine if the first one is smaller than the second, equal to it, or greater than it.

To the question at hand: you have a logical error under // printf("\nmpla5"); section you should be checking for month AND year equal, currently you search only checks for same month which will obviously sort the list incorrectly. EDIT: Actually, that only fixes a bug in your intended code, the code still won't sort properly unless you do the full compare between two entries (year, then month, then day, then temp). Try writing it all again, this time design your implementation with a compare function in mind (which compares two entries).

odyss-jii
  • 2,619
  • 15
  • 21
  • So i checked again and what is happening is that the variable i is not big enough for the while to run properly and sort the first line of data to the end as it should. And after that the key is going to change into the next value and compare it to the same value as before. So it loses one cycle – Dimitris Efthimakis Jul 05 '22 at 12:28
2

For starters it is not a C++ code. It is a C code.

The logical error is hidden in the if-else statements.

Consider for example this code snippet

        if (key.month < hellos[i].month)
        {
            // printf("\nmpla3");
            while (i >= 0 && key.month < hellos[i].month && key.year == hellos[i].year)
            {
                hellos[i + 1] = hellos[i];
                i = i - 1;
            }
            hellos[i + 1] = key;
        }
        else if (key.month == hellos[i].month)
        {
            //...
        }

If key.month is less than hellos[i].month then the else if statement

else if (key.month == hellos[i].month)

will not get the control though there can be objects (after the preceding if statement) with key.month equal to hellos[i].month but with key.day less than hellos[i].day.

I advice to write a separate comparison function similarly to the function used by qsort and call it to compare two objects of the structure type. For example

int cmp( const void *a, const void *b )
{
    const hello *left  = a;
    const hello *right = b;

    int result = ( right->year < left->year ) - ( left->year < right->year );

    if ( result == 0 )
    {
        result = ( right->month < left->month ) - ( left->month < right->month );
        if ( result == 0 )
        {
            result = ( right->day < left->day ) - ( left->day < right->day );
        }
    }

    return result;
}        

Here is a demonstration program of using this function based on calling qsort.

#include <stdio.h>
#include <stdlib.h>

typedef struct
{
    int month;
    int day;
    int year;
} hello;

int cmp( const void *a, const void *b )
{
    const hello *left  = a;
    const hello *right = b;

    int result = ( right->year < left->year ) - ( left->year < right->year );

    if ( result == 0 )
    {
        result = ( right->month < left->month ) - ( left->month < right->month );
        if ( result == 0 )
        {
            result = ( right->day < left->day ) - ( left->day < right->day );
        }
    }

    return result;
}

int main( void )
{
    hello hellos[] =
    {
        { 02, 02, 2015 },
        { 02, 01, 2015 },
        { 01, 30, 2015 },
        { 01, 31, 2015 },
        { 01, 29, 2015 },
        { 01, 28, 2015 },
    };
    size_t N = sizeof( hellos ) / sizeof( *hellos );

    qsort( hellos, N, sizeof( hello ), cmp );

    for ( size_t i = 0; i < N; i++ )
    {
        printf( "%02d/%02d/%d\n", hellos[i].month, hellos[i].day, hellos[i].year );
    }
}

The program output is

01/28/2015
01/29/2015
01/30/2015
01/31/2015
02/01/2015
02/02/2015

You can insert a call of the comparison function in only one if statement within your function and the following while loop as for example

if ( cmp( &key, &hellos[i] ) < 0 )
{
    //...
}

Taking into account your comments to my answer it seems you are unable to understand what should be done. So I will just include a demonstration program.

#include <stdio.h>

typedef struct
{
    int month;
    int day;
    int year;
} hello;

int cmp( const void *a, const void *b )
{
    const hello *left  = a;
    const hello *right = b;

    int result = ( right->year < left->year ) - ( left->year < right->year );

    if ( result == 0 )
    {
        result = ( right->month < left->month ) - ( left->month < right->month );
        if ( result == 0 )
        {
            result = ( right->day < left->day ) - ( left->day < right->day );
        }
    }

    return result;
}

void insertionSort( hello hellos[], size_t records )
{
    for ( size_t i = 1; i < records; i++ )
    {
        if ( cmp( &hellos[i], &hellos[i-1] ) < 0 )
        {
            hello tmp = hellos[i];
            size_t j = i;

            while ( j != 0 && cmp( &tmp, &hellos[j -1] ) < 0 )  
            {   --j;
                hellos[j + 1] = hellos[j];               
            } 

            hellos[j] = tmp;
        }
    }
}

int main( void )
{
    hello hellos[] =
    {
        { 02, 02, 2015 },
        { 02, 01, 2015 },
        { 01, 30, 2015 },
        { 01, 31, 2015 },
        { 01, 29, 2015 },
        { 01, 28, 2015 },
    };
    size_t N = sizeof( hellos ) / sizeof( *hellos );

    insertionSort( hellos, N );

    for ( size_t i = 0; i < N; i++ )
    {
        printf( "%02d/%02d/%d\n", hellos[i].month, hellos[i].day, hellos[i].year );
    }
}

The program output is the same as shown above

01/28/2015
01/29/2015
01/30/2015
01/31/2015
02/01/2015
02/02/2015

Or in C++ this done very easy using the standard C++ function std::tie declared in the header <tuple>.

Something like

if ( std::tie( key.year, key.month, key.day ) < 
     std::tie( hellos[i].year, hellos[i].month, hellos[i].day ) ) 
{
    //...
} 
Vlad from Moscow
  • 301,070
  • 26
  • 186
  • 335
  • Sorry i miss clicked the tag. Can you please explain your statement a bit more – Dimitris Efthimakis Jul 05 '22 at 12:29
  • @DimitrisEfthimakis What is unclear? – Vlad from Moscow Jul 05 '22 at 12:31
  • So as i answered before the variable 'i' is not big enough to move the first line to the last and can only move it to the second line. And after that the next key will be compared again to the second line which should have been done from the while statement before and not from a new for cycle. – Dimitris Efthimakis Jul 05 '22 at 12:32
  • @DimitrisEfthimakis I have not understood what you mean. Please reread my answer. – Vlad from Moscow Jul 05 '22 at 12:33
  • I thank you for your time but the code i am writing it is from a project and i have to use insertion sort and not any other method. If you run my programm again and do some prints as debugging is that the variable i does not have a big value so when it runs and gets through while it can not repeat as many times as it has to so it can sort itself correctly.Instead while runs only one time. – Dimitris Efthimakis Jul 05 '22 at 12:35
  • @DimitrisEfthimakis Please reread my answer one more. There is clear written how to use the function I showed in your code. – Vlad from Moscow Jul 05 '22 at 12:37
  • look at what i have just written – Dimitris Efthimakis Jul 05 '22 at 12:40
  • @DimitrisEfthimakis Sorry, I can not help you. I am sure that in my answer there is all clear explained. and how to resolve the problem – Vlad from Moscow Jul 05 '22 at 12:42
0

So guys i finally came up with an idea. So my idea is, instead of having if statements inside of if statements, i thought it might be a better solution to make multiple for statements. One for year, one for month and one for day so i can first sort the code based on year after that also based on month and finally for day too. Here is the code:

#include <stdio.h>

typedef struct 
{
int month;
int day;
int year;
float T_degC;
float PO4uM;
float SiO3uM;
float NO2uM;
float NO3uM;
float Salnty;
float O2ml_L;
}
hello;

int insertionSort(hello hellos[], int records);

int main()
{
FILE *file;

file = fopen("ocean.csv", "r");

if(file == NULL)
{
    printf("Error opening file. \n");
    return 1;
}

hello hellos[1405];

int read = 0;
int records = 0;   //dhladh struct

do
{
    read = fscanf(file,
                        "%d/%d/%d, %f, %f, %f, %f, %f, %f, %f",
                        &hellos[records].month,
                        &hellos[records].day,
                        &hellos[records].year,
                        &hellos[records].T_degC,
                        &hellos[records].PO4uM,
                        &hellos[records].SiO3uM,
                        &hellos[records].NO2uM,
                        &hellos[records].NO3uM,
                        &hellos[records].Salnty,
                        &hellos[records].O2ml_L);

    if(read == 10)
    {
        records++;  

    }
    if(read != 10 && feof(file) ==0)
    {
        printf("File format incorrect.\n");
        return 1;
    }
    if(ferror(file))
    {

        printf("Error reading file.\n");
        return 1;
    }

} while (!feof(file));

fclose(file);


printf("\n%d records read.\n\n", records);

insertionSort(hellos, records);
//insertionSort(hellos, records);

//Εκτυπωση των αποτελεσματων.

for(int i=0; i<records; i++)
{
    printf("\n%d, %d, %d, %f, %f, %f, %f, %f, %f, %f",
                        hellos[i].month,
                        hellos[i].day,
                        hellos[i].year,
                        hellos[i].T_degC,
                        hellos[i].PO4uM,
                        hellos[i].SiO3uM,
                        hellos[i].NO2uM,
                        hellos[i].NO3uM,
                        hellos[i].Salnty,
                        hellos[i].O2ml_L);

    printf("\n");
}
return 0;
    }

    int insertionSort(hello hellos[], int records)
    {
int i;
hello key;

for(int j=1; j<records; j++)
{
    key = hellos[j];
    i = j-1;
    if(key.year < hellos[i].year)
    {
        while(i >= 0 && (key.year < hellos[i].year))
        {
            hellos[i+1] = hellos[i];
            i = i-1;  
        }
        hellos[i+1] = key;
    }
}

for(int k=1; k<records; k++)
{
    key = hellos[k];
    i = k-1;
    if((key.month < hellos[i].month) && (key.year == hellos[i].year))
    {
        while(i >= 0 && (key.month < hellos[i].month) && (key.year == hellos[i].year))
        {
            hellos[i+1] = hellos[i];
            i = i-1;  
        }
        hellos[i+1] = key;
    }
}

for(int l=1; l<records; l++)
{
    key = hellos[l];
    i = l-1;
    if((key.day < hellos[i].day) && (key.month == hellos[i].month) && (key.year == hellos[i].year))
    {
        while(i >= 0 && (key.day < hellos[i].day) && (key.month == hellos[i].month) && (key.year == hellos[i].year))
        {
            hellos[i+1] = hellos[i];
            i = i-1;  
        }
        hellos[i+1] = key;
    }
}
return 0;
   }