0

I've found on google code that was over 50 lines long and that's completely unnecessary for what I'm trying to do.

I want to make a very simple cp implementation in C.

Just so I can play with the buffer sizes and see how it affects performance.

I want to use only Linux API calls like read() and write() but I'm having no luck.

I want a buffer that is defined as a certain size so data from file1 can be read into buffer and then written to file2 and that continues until file1 has reached EOF.

Here is what I tried but it doesn't do anything

#include <stdio.h>
#include <sys/types.h>

#define BUFSIZE 1024

int main(int argc, char* argv[]){

    FILE fp1, fp2;
    char buf[1024];
    int pos;


    fp1 = open(argv[1], "r");
    fp2 = open(argv[2], "w");

    while((pos=read(fp1, &buf, 1024)) != 0)
    {
        write(fp2, &buf, 1024);
    }


    return 0;
}

The way it would work is ./mycopy file1.txt file2.txt

lost_in_the_source
  • 10,998
  • 9
  • 46
  • 75
Mkey
  • 155
  • 1
  • 4
  • 12
  • I would check both file descriptors against `-1`... – Iharob Al Asimi Nov 18 '15 at 23:51
  • 1
    1. Read the documentation for `open`, "r" and "w" are not valid. 2. you need to write the number of bytes that were read, which is `pos`. 3. if `read` fails it may return a negative number which will cause an infinite loop in your code. – user253751 Nov 18 '15 at 23:53
  • Why `&buf`? Maybe `&buf[0]`, but `&buf`?? Also, why write 1,024 bytes even if you read fewer than that? – David Schwartz Nov 18 '15 at 23:54
  • Does this even compile? Looks like `open` should be `fopen`. And `FILE` should be `FILE *`. – kaylum Nov 18 '15 at 23:55
  • I'm still a bit confused. I had O_CREAT instead of 'w' and it said that O_CREAT was undefined. – Mkey Nov 18 '15 at 23:56
  • Please, just go read the [man page](http://linux.die.net/man/2/open). If you want to use `open` (as opposed to `fopen`) then you need the `#include`s shown in that man page. – kaylum Nov 19 '15 at 00:00
  • Ok I guess I may have posted this question prematurely. I'm going to spend another hour or so trying and come back here. – Mkey Nov 19 '15 at 00:04

4 Answers4

4

This code has an important problem, the fact that you always write 1024 bytes regardless of how many you read.

Also:

  1. You don't check the number of command line arguments.
  2. You don't check if the source file exists (if it opens).
  3. You don't check that the destination file opens (permission issues).
  4. You pass the address of the array which has a different type than the pointer to the first element to the array.
  5. The type of fp1 is wrong, as well as that of fp2.

    #include <stdio.h>
    #include <stdlib.h>
    #include <unistd.h>
    
    #include <fcntl.h>
    
    int main(int argc, char **argv)
    {
        char buffer[1024];
        int files[2];
        ssize_t count;
    
        /* Check for insufficient parameters */
        if (argc < 3)
            return -1;
        files[0] = open(argv[1], O_RDONLY);
        if (files[0] == -1) /* Check if file opened */
            return -1;
        files[1] = open(argv[2], O_WRONLY | O_CREAT | S_IRUSR | S_IWUSR);
        if (files[1] == -1) /* Check if file opened (permissions problems ...) */
        {
            close(files[0]);
            return -1;
        }
    
        while ((count = read(files[0], buffer, sizeof(buffer))) != 0)
            write(files[1], buffer, count);
    
        return 0;
    }
    
Iharob Al Asimi
  • 52,653
  • 6
  • 59
  • 97
  • Could you possibly fix the errors and post a solution? I have never used linux nor C until today so this is all very confusing. I'm trying to run a few tests to see how different buffer sizes affect each OS. – Mkey Nov 19 '15 at 00:00
  • Why do you use char * *argv rather than char* argv[]? – Mkey Nov 19 '15 at 00:06
  • Why do you use `char *argv[]`? – Iharob Al Asimi Nov 19 '15 at 00:07
  • You see, in the end `char *argv[]` is converted to `char **` inside `main()`, but I like it `char **argv` just because `char *argv[]` looks like java, it's silly but it's how I like it. – Iharob Al Asimi Nov 19 '15 at 00:10
  • I tried your code and it doesn't seem to actually copy the file – Mkey Nov 19 '15 at 00:15
  • i named it mycopy2.c and compiled in gcc and then ran with ./mycopy2 file.txt newfile.txt – Mkey Nov 19 '15 at 00:16
  • But after doing an LS newfile.txt does not show up in the directory – Mkey Nov 19 '15 at 00:16
  • The first code I posted I forgot the `O_CREAT` flag. Also, the correct way would be to check whether the source and destination are the same, perhaps check if the destination exists and also, read the permissions and set the same ones to the destination file. – Iharob Al Asimi Nov 19 '15 at 00:18
  • You have helped me tremendously! Thank you! – Mkey Nov 19 '15 at 00:28
  • Let us [continue this discussion in chat](http://chat.stackoverflow.com/rooms/95500/discussion-between-mkey-and-iharob). – Mkey Nov 19 '15 at 00:56
1

Go to section 8.3 of the K&R "The C Programming Language". There you will see an example of what you want to accomplish. Try using different buffer sizes and you will end up seeing a point where the performance tops.

jarr
  • 102
  • 2
  • 7
1
#include <stdio.h>
int cpy(char *, char *);
int main(int argc, char *argv[])
{
    char *fn1 = argv[1];
    char *fn2 = argv[2];

    if (cpy(fn2, fn1) == -1) {
        perror("cpy");
        return 1;
    }
    reurn 0;
}
int cpy(char *fnDest, char *fnSrc)
{
    FILE *fpDest, *fpSrc;
    int c;

    if ((fpDest = fopen(fnDest, "w")) && (fpSrc = fopen(fnSrc, "r"))) {
        while ((c = getc(fpSrc)) != EOF)
            putc(fpDest);
        fclose(fpDest);
        fclose(fpSrc);

        return 0;
    }
    return -1;
}

First, we get the two file names from the command line (argv[1] and argv[2]). The reason we don't start from *argv, is that it contains the program name.

We then call our cpy function, which copies the contents of the second named file to the contents of the first named file.

Within cpy, we declare two file pointers: fpDest, the destination file pointer, and fpSrc, the source file pointer. We also declare c, the character that will be read. It is of type int, because EOF does not fit in a char.

If we could open the files succesfully(if fopen does not return NULL), we get characters from fpSrc and copy them onto fpDest, as long as the character we have read is not EOF. Once we have seen EOF, we close our file pointers, and return 0, the success indicator. If we could not open the files, -1 is returned. The caller can check the return value for -1, and if it is, print an error message.

lost_in_the_source
  • 10,998
  • 9
  • 46
  • 75
1

Good question. Related to another good question:

How can I copy a file on Unix using C?

There are two approaches to the "simplest" implementation of cp. One approach uses a file copying system call function of some kind - the closest thing we get to a C function version of the Unix cp command. The other approach uses a buffer and read/write system call functions, either directly, or using a FILE wrapper.

It's likely the file copying system calls that take place solely in kernel-owned memory are faster than the system calls that take place in both kernel- and user-owned memory, especially in a network filesystem setting (copying between machines). But that would require testing (e.g. with Unix command time) and will be dependent on the hardware where the code is compiled and executed.

It's also likely that someone with an OS that doesn't have the standard Unix library will want to use your code. Then you'd want to use the buffer read/write version, since it only depends on <stdlib.h> and <stdio.h> (and friends).

<unistd.h>

Here's an example that uses function copy_file_range from the unix standard library <unistd.h>, to copy a source file to a (possible non-existent) destination file. The copy takes place in kernel space.

/* copy.c
 *
 * Defines function copy:
 *
 * Copy source file to destination file on the same filesystem (possibly NFS).
 * If the destination file does not exist, it is created. If the destination
 * file does exist, the old data is truncated to zero and replaced by the 
 * source data. The copy takes place in the kernel space.
 *
 * Compile with:
 *
 * gcc copy.c -o copy -Wall -g
 */

#define _GNU_SOURCE 
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>

/* On versions of glibc < 2.27, need to use syscall.
 * 
 * To determine glibc version used by gcc, compute an integer representing the
 * version. The strides are chosen to allow enough space for two-digit 
 * minor version and patch level.
 *
 */
#define GCC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __gnuc_patchlevel__)
#if GCC_VERSION < 22700
static loff_t copy_file_range(int in, loff_t* off_in, int out, 
  loff_t* off_out, size_t s, unsigned int flags)
{
  return syscall(__NR_copy_file_range, in, off_in, out, off_out, s,
    flags);
}
#endif

/* The copy function.
 */
int copy(const char* src, const char* dst){
  int in, out;
  struct stat stat;
  loff_t s, n;
  if(0>(in = open(src, O_RDONLY))){
    perror("open(src, ...)");
    exit(EXIT_FAILURE);
  }
  if(fstat(in, &stat)){
    perror("fstat(in, ...)");
    exit(EXIT_FAILURE);
  }
  s = stat.st_size; 
  if(0>(out = open(dst, O_CREAT|O_WRONLY|O_TRUNC, 0644))){
    perror("open(dst, ...)");
    exit(EXIT_FAILURE);
  }
  do{
    if(1>(n = copy_file_range(in, NULL, out, NULL, s, 0))){
      perror("copy_file_range(...)");
      exit(EXIT_FAILURE);
    }
    s-=n;
  }while(0<s && 0<n);
  close(in);
  close(out);
  return EXIT_SUCCESS;
}

/* Test it out.
 *
 * BASH:
 *
 * gcc copy.c -o copy -Wall -g
 * echo 'Hello, world!' > src.txt
 * ./copy src.txt dst.txt
 * [ -z "$(diff src.txt dst.txt)" ]
 *
 */

int main(int argc, char* argv[argc]){
  if(argc!=3){
    printf("Usage: %s <SOURCE> <DESTINATION>", argv[0]);
    exit(EXIT_FAILURE);
  }
  copy(argv[1], argv[2]);
  return EXIT_SUCCESS;
}

It's based on the example in my Ubuntu 20.x Linux distribution's man page for copy_file_range. Check your man pages for it with:

> man copy_file_range

Then hit j or Enter until you get to the example section. Or search by typing /example.

<stdio.h>/<stdlib.h> only

Here's an example that only uses stdlib/stdio. The downside is it uses an intermediate buffer in user-space.

/* copy.c
 *
 * Compile with:
 * 
 * gcc copy.c -o copy -Wall -g
 *
 * Defines function copy:
 *
 * Copy a source file to a destination file. If the destination file already
 * exists, this clobbers it. If the destination file does not exist, it is
 * created. 
 *
 * Uses a buffer in user-space, so may not perform as well as 
 * copy_file_range, which copies in kernel-space.
 *
 */

#include <stdlib.h>
#include <stdio.h>

#define BUF_SIZE 65536 //2^16

int copy(const char* in_path, const char* out_path){
  size_t n;
  FILE* in=NULL, * out=NULL;
  char* buf = calloc(BUF_SIZE, 1);
  if((in = fopen(in_path, "rb")) && (out = fopen(out_path, "wb")))
    while((n = fread(buf, 1, BUF_SIZE, in)) && fwrite(buf, 1, n, out));
  free(buf);
  if(in) fclose(in);
  if(out) fclose(out);
  return EXIT_SUCCESS;
}

/* Test it out.
 *
 * BASH:
 *
 * gcc copy.c -o copy -Wall -g
 * echo 'Hello, world!' > src.txt
 * ./copy src.txt dst.txt
 * [ -z "$(diff src.txt dst.txt)" ]
 *
 */
int main(int argc, char* argv[argc]){
  if(argc!=3){
    printf("Usage: %s <SOURCE> <DESTINATION>\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  return copy(argv[1], argv[2]);
}

Another way to ensure portability in general while still working with a Unix-like C API is to develop with GNOME (e.g. GLib, GIO)

https://docs.gtk.org/glib/ https://docs.gtk.org/gio/

angstyloop
  • 117
  • 6