0

My program need to detect duplicates in given file and replace them with hard links. It work fine on windows (without linking), but it didn't work on linux. For 5 files in folder results are: List & Results. It correctly detects duplicats on windows, but those NULLs probably brake it on linux. I tried to add if at end of listFiles(unactive with //) it make good list but don't detect 2nd duplicat on windows(not tested on linux). btw I mentioned slashes at bottom but point for attention. Thx for help.

#include <stdio.h>
#include <string.h>
#include <dirent.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>

int compare(char *file_1, char *file_2);
int listFiles(char *path, int *count, char ***list);
int hardlink (char *path_1, char *path_2);
int is_regular_file(const char *path);

int main()
{
    char path[100];
    int count=0;
    int comp=0;
    char **list=NULL;
    printf("input path: ");
    scanf("%s", path);
    count=listFiles(path,&count,&list);
    printf("%d\n",count);
    if(list==NULL){
        printf("wrong path");
        return 1;
    }
    for(int i=0;i<count;++i)
    {
        printf("%d %s\n",i,list[i]);
    }
    for (int i=0; i<count; ++i){
        for (int j=i+1; j<count; ++j){
            comp= compare(list[i], list[j]);
            if(comp==0&&list[j]!=NULL&&list[i]!=NULL){
                    printf("%s to dup %s\n",list[j],list[i]);
            //        hardlink (list[i], list[j]);
            for(int k=j ;k<count-1;++k){
                list[k]=list[k+1];
            }
            free(list[count]);
            --count;
            }
        }
    }
    for (int i = 0; i < count; ++i)
        free(list[i]);
    free(list);
    return 0;
}
int listFiles(char *basePath,int *count, char ***list)
{
    char path[1000];
    int temp=*count;
    struct dirent *dp;
    DIR *dir = opendir(basePath);
    if (!dir){
        return temp;
    }
    while ((dp = readdir(dir)) != NULL){
        if (strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0){
            strcpy(path, basePath);
            strcat(path, "/");
            strcat(path, dp->d_name);
            char *file_path=NULL;
            if (is_regular_file(path)){
            int x=0;
                for( int i =0;path[i]!='\0';++i){
                    file_path=realloc(file_path, (x+1)*sizeof(*file_path));
                    (file_path)[x++] = path[i];
                }
                file_path=realloc(file_path, (x+1)*sizeof(*file_path));
                file_path[x]='\0';
            }
         //   if(file_path!=NULL){
                *list=realloc(*list, (temp+1)*sizeof(**list));
                (*list)[temp++] = file_path;
        //    }
        }
        temp=listFiles(path,&temp,list);
    }
    closedir(dir);
    return temp;
}
int compare(char *file_1, char *file_2)
{
    FILE *data_1 = fopen(file_1,"r");
    FILE *data_2 = fopen(file_2,"r");
    char *temp_1=malloc(10000);
    char *temp_2=malloc(10000);
    if (data_1){
            int x=0;
            int c;
            while ((c = fgetc(data_1)) != EOF){
               (temp_1)[x++] = (char) c;
            }
            temp_1[x] = '\0';
            }
    if (data_2){
            int x=0;
            int c;
            while ((c = fgetc(data_2)) != EOF){
               (temp_2)[x++] = (char) c;
            }
            temp_2[x] = '\0';
            }
            fclose(data_1);
            fclose(data_2);
    if(strcmp(temp_1,temp_2)==0)return 0;
    else return 1;
}
int hardlink (char *path_1, char *path_2){
    int temp= remove(path_2);
    if (temp==0)printf("file %s deleted\n", path_2);
    else {
            printf("can't delete %s\n", path_2);
            return 1;
    }
    int linking = link ( path_1 , path_2 ) ;
    if (linking == 0){
        FILE *fil = fopen (path_2, "r");
        if (fil != NULL){
                printf (" hardlink %s for %s created\n",path_2,path_1);
                fclose(fil);
        }
        else printf("can't create hardlink %s\n", path_2);
        return 0;
    }
    else printf("can't create hardlink %s\n", path_2);
    puts("");
    return 0;
}
int is_regular_file(const char *path)
{
    struct stat path_stat;
    stat(path, &path_stat);
    return S_ISREG(path_stat.st_mode);
}

in list files you need to change strcat(path, "\"); to strcat(path, "/"); if you want to work on linux

Don Kielon
  • 31
  • 4
  • 4
    `strcat(path, "\\");` could be your problem. The path seperator in unix is '/' – Majid Laissi Jan 08 '19 at 16:58
  • 1
    `if(comp==0&&list[j]!=NULL&&list[i]!=NULL)` - don't save on spaces... – Eugene Sh. Jan 08 '19 at 16:59
  • 2
    You seem to have given an answer at the end of your question. What exactly do you want to know, then? – John Bollinger Jan 08 '19 at 17:00
  • you have several problems in _compare_ : 2 big memory leaks with 2 malloc without free, and your comparison do not work with files greater than 10000. But finaly you do not need at all to memorize all the content of the two files to compare them, just read them by block (not char by char) and compare the read block, stopping when the read size is different or the content of them is different. Doing that you have no memory leaks nor problem with the size of the file to compare, and this is faster – bruno Jan 08 '19 at 17:10
  • See also [How to get the file separator symbol in standard C/C++ : / or \?](https://stackoverflow.com/q/12971499/2410359). – chux - Reinstate Monica Jan 08 '19 at 17:13
  • 1
    I suspect `strcat(path, "/");` will work in both OS. – chux - Reinstate Monica Jan 08 '19 at 17:20

1 Answers1

1

you're using strcat(path, "\\"); for both Linux and windows. The path seperator is not the same.

const char separator =
#ifdef _WIN32
                        '\\';
#else
                        '/';
#endif
Majid Laissi
  • 19,188
  • 19
  • 68
  • 105