0

i changed chunk.c file which is coded with high-level i/o function to low-level system calls.

this is chunk.c coded with high-level i/o function

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define DEFAULT_PREFIX "x"
#define DEFAULT_CHUNK_SIZE 1000
#define ALPHABET_SIZE 26
#define MAX_DIGITS 2

void print_usage() {
    printf("Usage: chunk [-l line_count | -w word_count | -c character_count] [-p prefix] [-s suffix] [-f filename.txt | < filename.txt]\n");
}

int main(int argc, char *argv[]) {
    char *prefix = DEFAULT_PREFIX;
    int chunk_size = DEFAULT_CHUNK_SIZE;
    int suffix_start = 0;
    char *filename = NULL;

    // Parse command line arguments
    int opt;
    while ((opt = getopt(argc, argv, "l:w:c:p:s:f:")) != -1) {
        switch (opt) {
            case 'l':
                chunk_size = atoi(optarg);
                break;
            case 'w':
                // TODO: implement word count option
                printf("Error: -w option not implemented\n");
                print_usage();
                return 1;
            case 'c':
                chunk_size = atoi(optarg);
                break;
            case 'p':
                prefix = optarg;
                break;
            case 's':
                suffix_start = atoi(optarg);
                break;
            case 'f':
                filename = optarg;
                break;
            default:
                print_usage();
                return 1;
        }
    }

    // Open input file
    FILE *input_file = NULL;
    input_file = fopen(filename, "r");
        if (input_file == NULL) {
            printf("Error: could not open file '%s'\n", filename);
            return -1;
        }

    // Read input file and write output files
    int line_count = 0;
    int chunk_count = 0;
    char suffix[MAX_DIGITS + 1];
    suffix[MAX_DIGITS] = '\0';
    FILE *output_file = NULL;
    
    while (!feof(input_file)) {
        if (line_count == 0) {
            // Close previous output file
            if (output_file != NULL) {
                fclose(output_file);
                output_file = NULL;
            }
            // Open new output file
            snprintf(suffix, MAX_DIGITS + 1, "%0*d", MAX_DIGITS, suffix_start + chunk_count);
            char *filename_buffer = (char *) malloc(strlen(prefix) + MAX_DIGITS + 1);
            snprintf(filename_buffer, strlen(prefix) + MAX_DIGITS + 1, "%s%s", prefix, suffix);
            output_file = fopen(filename_buffer, "w");
            free(filename_buffer);
            if (output_file == NULL) {
                printf("Error: could not create output file '%s'\n", filename_buffer);
                return -1;
            }
            chunk_count++;
        }
    
        // Read input line
        char line_buffer[1024];
        fgets(line_buffer, 1024, input_file);
        if (feof(input_file)) {
            break;
        }
        // Write output line
        fputs(line_buffer, output_file);
        line_count++;
        if (line_count == chunk_size) {
            line_count = 0;
        }
    }
    // Close last output file
    if (output_file != NULL) {
        fclose(output_file);  
   }
}

and this is the changed one which coded with low-level system calls.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>

#define DEFAULT_PREFIX "x"
#define DEFAULT_CHUNK_SIZE 1000
#define ALPHABET_SIZE 26
#define MAX_DIGITS 2

void print_usage() {
    printf("Usage: chunk [-l line_count | -w word_count | -c character_count] [-p prefix] [-s suffix] [-f filename.txt | < filename.txt]\n");
    }

int main(int argc, char *argv[]) {
char *prefix = DEFAULT_PREFIX;
int chunk_size = DEFAULT_CHUNK_SIZE;
int suffix_start = 0;
char *filename = NULL;

// Parse command line arguments
int opt;
while ((opt = getopt(argc, argv, "l:w:c:p:s:f:")) != -1) {
    switch (opt) {
    case 'l':
      chunk_size = atoi(optarg);
      break;

    case 'w':
      // TODO: implement word count option
      printf("Error: -w option not implemented\n");
      print_usage();
      return 1;

    case 'c':
      chunk_size = atoi(optarg);
      break;

    case 'p':
      prefix = optarg;
      break;

    case 's':
      suffix_start = atoi(optarg);
      break;

    case 'f':
      filename = optarg;
      break;

    default:
      print_usage();
      return 1;
    }
}

// Open input file
int input_fd = STDIN_FILENO;
if (filename != NULL) {
    input_fd = open(filename, O_RDONLY);
        if (input_fd == -1) {
            printf("Error: could not open file '%s': %s\n", filename, strerror(errno));
        return -1;
        }
}

// Read input file and write output files
int line_count = 0;
int chunk_count = 0;
char suffix[MAX_DIGITS + 1];
suffix[MAX_DIGITS] = '\0';
int output_fd = -1;

while (1) {
    if (line_count == 0) {
        // Close previous output file
        if (output_fd != -1) {
        close(output_fd);
        output_fd = -1;
    }
    
// Open new output file
snprintf(suffix, MAX_DIGITS + 1, "%0*d", MAX_DIGITS, suffix_start + chunk_count);

char *filename_buffer = (char *) malloc(strlen(prefix) + MAX_DIGITS + 1);

snprintf(filename_buffer, strlen(prefix) + MAX_DIGITS + 1, "%s%s", prefix, suffix);

output_fd = open(filename_buffer, O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR);

free(filename_buffer);

if (output_fd == -1) {
    printf("Error: could not create output file '%s': %s\n", filename_buffer, strerror(errno));
    return -1;
    }
chunk_count++;
}

// Read input line
char line_buffer[1024];
ssize_t bytes_read = read(input_fd, line_buffer, 1024);
if (bytes_read == -1) {
    printf("Error: could not read input file: %s\n", strerror(errno));
    return -1;
}
if (bytes_read == 0) {
    break;
}

// Write output line
ssize_t bytes_written = write(output_fd, line_buffer, bytes_read);
if (bytes_written == -1) {
    printf("Error: could not write output file: %s\n", strerror(errno));
    return -1;
}
line_count++;
if (line_count == chunk_size) {
    line_count = 0;
    }
}

    // Close last output file
    if (output_fd != -1) {
    close(output_fd);
    }
}

both codes are not complete yet but anyway, the expected result ls

$ chunk -l 100 -f z_answer.jok.txt -p part- -s 00
$ echo $?   # check exit status
0
$ wc *part* z_answer.jok.txt 
  100   669  4052 part-00
  100   725  4221 part-01
  100   551  3373 part-02
  100   640  3763 part-03
  100   588  3685 part-04
  100   544  3468 part-05
   90   473  3017 part-06
  690  4190 25579 z_answer.jok.txt
 1380  8380 51158 total

when i run the chunk.c which is coded with high-level i/o function, i can get a correct results like the above.

but when i run the chunk.c which is coded with low-level system call, then the result is like

$ chunk -l 100 -f z_answer.jok.txt -p part- -s 00
$ echo $?   # check exit status
0
$ wc *part* z_answer.jok.txt 
  100   669  4052 part-00

means chunk.c file does not work properly. I thought the problem is in the while loop(or the condition), but still cannot find out the problem. what should i change to make it run properly?

springbook
  • 21
  • 2
  • `while (!feof (file))`, you can't check for `EOF` beforehand. See https://stackoverflow.com/q/5431941/20017547 – Harith Mar 21 '23 at 17:03
  • there are three different comment line arguments which is -l.-w,-c which set with getopt(), but i coded only one of them(-l)yet. so it works when i run the program with -l option. – springbook Mar 21 '23 at 17:14
  • _Why_ do you want to use `read` (vs. `fgets`)? You have to replicate [some of] the functionality of `fgets`. See my answer: [read line by line in the most efficient way *platform specific*](https://stackoverflow.com/a/33620968/5382650). It explains why `read` doesn't work too well (i.e. you don't align on the newlines). If you want the low level approach, using `mmap` and (e.g.) `strchr(bp,'\n')` can separate the data into lines. – Craig Estey Mar 21 '23 at 19:21

0 Answers0