0

I know this way of copying files, which I think is pretty much standard way of copying files in C.

#include <stdio.h>
#include <stdlib.h>

int main()
{
   char ch, source_file[20], target_file[20];
   FILE *source, *target;

   printf("Enter name of file to copy\n");
   gets(source_file);

   source = fopen(source_file, "r");

   if( source == NULL )
   {
      printf("Press any key to exit...\n");
      exit(EXIT_FAILURE);
   }

   printf("Enter name of target file\n");
   gets(target_file);

   target = fopen(target_file, "w");

   if( target == NULL )
   {
      fclose(source);
      printf("Press any key to exit...\n");
      exit(EXIT_FAILURE);
   }

   while( ( ch = fgetc(source) ) != EOF )
      fputc(ch, target);

   printf("File copied successfully.\n");

   fclose(source);
   fclose(target);

   return 0;

But this way opens the file and copies line by line. The files I want to copy are HUGE and many. This way will take very VERY LONG. Is there a way I can achieve my goal of copying these files directly. I know terminal or command prompt are completely different things than C language, but a simple

cp sourcefile.txt destinationfile.txt

can do the trick.

Is there any such commands or tricks in C that I can use. I cannot use

system("cp sourcefile.txt destinationfile.txt");

command because I am writing a robust program that should work in Linux and windows.

ITguy
  • 847
  • 2
  • 10
  • 25

4 Answers4

3

Well, what do you imagine the cp command itself do for copying files? If opens source file in read mode, destination file is write mode and copy everything by binary chunks! Ok more things can be involved if you pass other options to cp, but the copy itself is not more magic than that.

That being said, what you do is not that. You are copying the file character by character. Even if the standard library does some buffering, you are repeatedly calling an function when it could be avoided. And... never use gets. It is deprecated for ages because it is unsecure. If the user enters looong file names (more than 19 characters) you get a buffer overflow. And do not forget to test all io functions including output ones. When writing a huge file on a external media such an USB key, you could get out of space on device, and you program would just say it could successfully do the copy.

Copying loop could be something like:

#define SIZE 16384
char buffer[SIZE];
int crin, crout = 0;

while ((crin = fread(buffer, 1, SIZE, source)) > 0) {
    crout = fwrite(buffer, 1, crin, target);
    if (crout != crin) { /* control everything could be written */
        perror("Write error");
        crout = -1;
        break;
    }
if (crin < 0) { /* test read error (removal of amovible media, ...) */
    perror("Read error");
}

A low level optimization here would be to directly use posix functions instead of standard library ones, because as soon as you are using binary IO in big chunks, the buffering of the standard library gives no advantage, and you simply have its overhead.

Serge Ballesta
  • 143,923
  • 11
  • 122
  • 252
1

This is how i have moved a file in the past without having to open it:

#include <stdio.h>
int main()
{
  rename("C:\\oldFile.txt", "C:\\newfile.txt");
  return 0;
}
Maxqueue
  • 2,194
  • 2
  • 23
  • 55
  • up vote for you but this would remove my original file and I need to keep them untouched. – ITguy Nov 04 '15 at 16:16
  • 1
    Also `rename()` might very well fail if source and destination aren't in the same file system. – alk Nov 04 '15 at 16:23
1

One thing to be aware is that you're copying the slowest possible way, because you're doing it character by character. One improvement would be to copy full lines or bigger text chunks, using fgets and fputs

Even better is to not copy the file as a text file, but instead just as a binary chunk. This is achieved by opening the file in binary mode with the b flag, so e.g. target = fopen(target_file, "wb"); and using fread and fwrite instead of the put character functions.

In both scenarios you have to use a temporary buffer with a reasonable size (could be the size of the file or fixed). To determine the optimal size is not trivial.

Yet another way to copy, and according to my operating systems professor what cp does, is by using memory mapped files. How to use memory mapped files is unfortunately not portable, but depending on your operating system i.e. platform. For unix the manpage of mmap is your friend. This is an example unix implementation by me:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdint.h>
#include <errno.h>
#include <time.h>
#include <string.h>
#include <sys/shm.h>
#include <signal.h>
#include <stdbool.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>


int main(int argc, const char * argv[]) {

    if (argc != 3)
    {
        fprintf(stderr, "Usage %s <SourceFile> <DestinationFile>\n",argv[0]);
        return EXIT_FAILURE;
    }

    int source_file_desc = open(argv[1], O_RDONLY);
    if (source_file_desc == -1) {
        perror("Can't open source file");
        return EXIT_FAILURE;
    }
    struct stat source_info;
    if (stat(argv[1], &source_info) != 0) {
        perror("Can't get source file infos");
        return EXIT_FAILURE;
    }
    void *source_mem = mmap(NULL, source_info.st_size, PROT_READ, MAP_FILE|MAP_PRIVATE, source_file_desc, 0);
    if (source_mem == MAP_FAILED) {
        perror("Mapping source file failed");
        return EXIT_FAILURE;
    }

    int destination_file_desc = open(argv[2], O_TRUNC|O_CREAT|O_RDWR);
    if (destination_file_desc == -1) {
        perror("Can't open destination file");
    }
    if (chmod(argv[2], source_info.st_mode) != 0) {
        perror("Can't copy file permissions");
    }
    if (lseek(destination_file_desc, source_info.st_size-1, SEEK_SET) == -1) {
        perror("Can'T seek to new end of destination file");
    }
    unsigned char dummy = 0;
    if (write(destination_file_desc,  &dummy, 1) == -1)
    {
        perror("Couldn't write dummy byte");
    }


    void *destination_mem = mmap(NULL, source_info.st_size, PROT_WRITE,MAP_FILE|MAP_SHARED, destination_file_desc,0);
    if (destination_mem == MAP_FAILED) {
        perror("Mapping destination file failed");
    }

    memcpy(destination_mem, source_mem, source_info.st_size);

    munmap(source_mem,source_info.st_size);
    munmap(destination_mem, source_info.st_size);
    close(source_file_desc);
    close(destination_file_desc);

    return EXIT_SUCCESS;
}
Superlokkus
  • 4,731
  • 1
  • 25
  • 57
  • Why use memory mapped files? Just open in binary mode and read a large chunk, then write the chunk to the destination. If you want it to be as fast as possible, then thread the reads/writes so you're not serializing the latencies (though this may just cause the serialization delays to move further up the chain) – Russ Schultz Nov 04 '15 at 16:18
  • AFAIK the most operating systems got some optimizing when using mmf, but I'm afraid I got no hard facts, yet. – Superlokkus Nov 04 '15 at 16:24
  • The performance should be similar to copying a block that’s aligned to the block size: the OS can optimize if it can just page out entire blocks with no offset. – Davislor Nov 04 '15 at 16:52
  • Also, glib has platform-independent memory-mapped files. – Davislor Nov 04 '15 at 17:00
  • 1
    Given that file copying is strictly a sequential operation that gains little to no benefit from caching, `read`/`write` with a sufficiently large buffer will probably be faster than `mmap`. See [this answer](http://stackoverflow.com/a/6383253/1687119) for more details. – dbush Nov 04 '15 at 19:59
  • @dbush Thank you very much, I appeased my doubt's with the complex internals of the kernel, but since you're link also takes care of that. – Superlokkus Nov 04 '15 at 20:27
0

If it’s not a problem that any changes to one copy would affect the other, you can create a link to the file. How this works depends on the OS.

If you want to optimize a file copy as much as possible using only the standard library, here is what I suggest (untested):

#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

extern bool copy_file( FILE* dest, FILE* restrict src );
static bool error_helper( const char* file, int line, const char* msg );

#if defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64) || defined(__i386) || defined(_M_IX86) || defined(_X86_) || defined(__X86__) || defined(__I86__) || defined(__INTEL__) || defined(__386)
#  define PAGE_SIZE 4096U
#else
#  error "Define the page size on your system, or use a system call such as sysconf() to find it."
#endif

#define non_fatal_stdlib_error() error_helper( __FILE__, __LINE__, strerror(errno) )

bool copy_file( FILE* dest, FILE* restrict src )
{
  errno = 0;
  if ( !(dest = freopen( NULL, "w+", dest )) )
    return non_fatal_stdlib_error();

  /* Try to help the library out by turning buffering off and allocating an aligned block; it might be able to detect that at runtime.
   * On the other hand, the unbuffered implementation might be worse. */
  setvbuf( src,  NULL, _IONBF, BUFSIZ );
  setvbuf( dest, NULL, _IONBF, BUFSIZ );

  char* const buffer = aligned_alloc( PAGE_SIZE, PAGE_SIZE );
  if (!buffer)
    return non_fatal_stdlib_error();

  size_t n = fread( buffer, 1, PAGE_SIZE, src );
  while ( PAGE_SIZE == n ) {
    const size_t written = fwrite( buffer, 1, PAGE_SIZE, dest );
    if ( written != PAGE_SIZE )
      return non_fatal_stdlib_error();

    n = fread( buffer, 1, PAGE_SIZE, src );
  } // end while

  if (ferror(src))
    return non_fatal_stdlib_error();

  if ( n > 0 ) {
    const size_t written = fwrite( buffer, 1, n, dest );
    if ( written != n )
      return non_fatal_stdlib_error();
  }

  return true;
}

bool error_helper( const char* file, int line, const char* msg )
{
  fflush(stdout);
  fprintf( stderr, "Error at %s, line %d: %s.\n", file, line, msg );
  fflush(stderr);
  return false;
}

This at least gives the library implementation a chance to detect that all reads and writes are single memory pages.

Davislor
  • 14,674
  • 2
  • 34
  • 49