3

I've completed writing of counting words code finally. It counts total number of words in files. (i.e. txt). Now, I want to use multiple fork() to access and read every file. I studied in the last week. Besides, I use global variable to hold number of counted words. As far as I know, If I apply fork(), used global variables are assigned as 0. To avoid it, I tried to use mmap() and similar functions this is okey. But, I also want to use pipe() also (fifo() if it is possible) to communicate (hold values of numbers).

I use nftw() function to go in folders and files. My logic is on the below picture. How can use fork() and pipe() (fifo()) on this code ? fork() is really complicated for me because of my inexperience. I'm new using of pipe() and fork(). According to my idea logic of the code is that if I can use fork() and pipe(), there will be fork() every file(i.e. txt) and access them by using fork. If there is another folder and there are files, again creates fork() from one of created forks , then access file. I try to explain also drawing below. Thank you. I want to learn using of them.

int countInEveryFolder(const char *dir)

is used because I don't know how to count files until the next folder in nftw() function. Number of files is necessary because it is number of fork.

Every folder should be parent of files. The files are included by the folder.

fork scheme

Code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <dirent.h>
#include <errno.h>
#include <ftw.h>
#include <ctype.h>
#include <sys/mman.h>
#include <locale.h>
#include <errno.h>



#define MAX_PATH_LEN        2048

unsigned long total_words = 0UL;
unsigned long total_dirs = 0UL;
unsigned long total_files = 0UL;


// Just proves counting number of file in a folder
int countInEveryFolder(const char *dir) {
    struct stat stDirInfo;
    struct dirent * stFiles;
    DIR * stDirIn;
    char szFullName[MAX_PATH_LEN];
    char szDirectory[MAX_PATH_LEN];
    struct stat stFileInfo;

    int numOfFile = 0;

        strncpy( szDirectory, dir, MAX_PATH_LEN - 1 );


    if (lstat( szDirectory, &stDirInfo) < 0)
    {
        perror (szDirectory);
        return 0;
    }
    if (!S_ISDIR(stDirInfo.st_mode))
        return 0;
    if ((stDirIn = opendir( szDirectory)) == NULL)
    {
        perror( szDirectory );
        return 0;
    }
    while (( stFiles = readdir(stDirIn)) != NULL)
    {
        if (!strcmp(stFiles->d_name, ".") || !strcmp(stFiles->d_name, ".."))
            continue;
        sprintf(szFullName, "%s/%s", szDirectory, stFiles -> d_name );

        if (lstat(szFullName, &stFileInfo) < 0)
            perror ( szFullName );

        /* is the file a directory? */
        if (S_ISREG(stFileInfo.st_mode))
        {
            printf( "Filename: %s\n", szFullName );
            numOfFile++;
        }

    }  // end while
    closedir(stDirIn);
    return numOfFile;
}




// Count words in files.
unsigned long count_words_in_file(const char *const filename)
{
    unsigned long count = 0UL;
    int errnum = 0;
    int c;
    FILE *in;

    in = fopen(filename, "rt");
    if (in == NULL) {
        errnum = errno;
        fprintf(stderr, "%s: %s.\n", filename, strerror(errnum));
        errno = errnum;
        return 0UL;
    }

    /* Skip leading whitespace. */
    do {
        c = getc(in);
    } while (isspace(c));

    /* Token loop. */
    while (c != EOF) {

        /* This token is a word, if it starts with a letter. */
        if (isalpha(c))
            count++;

        /* Skip the rest of this token. */
        while (!isspace(c) && c != EOF)
            c = getc(in);

        /* Skip the trailing whitespace. */
        while (isspace(c))
            c = getc(in);
    }

    /* Paranoid checking for I/O errors. */
    if (!feof(in) || ferror(in)) {
        fclose(in);
        fprintf(stderr, "Warning: %s: %s.\n", filename, strerror(EIO));
        errnum = EIO;
    } else
        if (fclose(in)) {
            fprintf(stderr, "Warning: %s: %s.\n", filename, strerror(EIO));
            errnum = EIO;
        }
    errno = errnum;

    return count;
}


// Recursively go in folders
int nftw_callback(const char *filepath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
{

    // Directory
    if (typeflag == FTW_DP || typeflag == FTW_D)
    {

        total_dirs++;
        printf("%*s%s\n", ftwbuf->level * 4, "", filepath);
        //countInEveryFolder(filepath);

    }
    // Folder
    else if (typeflag == FTW_F)
    {
        total_files++;
        total_words += count_words_in_file(filepath);
        printf("%*s%s\n", ftwbuf->level * 4, "", filepath);
    }
    return 0;
}

/* Error message */
void err_sys(const char *msg)
{
    perror(msg);
    fflush(stdout);
    exit(EXIT_FAILURE);
}



int main(int argc, char *argv[])
{


    total_files = total_dirs = total_words = 0UL;
    if (nftw(argv[1], nftw_callback, 15, FTW_PHYS) == 0) {
        /* Success! */
        printf("%s: %lu files, %lu directories, %lu words total.\n",
               argv[1], total_files, total_dirs, total_words);

    } else {
        /* Failed... */
        err_sys("ntfw");
    }
    putchar('\n');



    //printf( "\nTotal words = %d\n\n", *wordCount);
    //printf( "\nTotal folders = %d\n\n", *folderCount);
    //printf( "\nTotal childs = %d\n\n", *childCount);      //fork()


    return 0;
}
NewCoder
  • 183
  • 1
  • 14
  • Instead of fork you can use threads that is also light weight and easy then this due to pipe and all these stuffs .http://stackoverflow.com/questions/5514464/difference-between-pthread-and-fork-on-gnu-linux – sonus21 Apr 08 '15 at 15:22
  • I will learn it also but it is time to learn using of fork() and pipe(). @sonukumar – NewCoder Apr 08 '15 at 15:24
  • This is a fun problem to learn with. However calling `fork()` to create a process for each `count()` has too much overhead with current operating systems, and could bring down the system (see "Fork Bomb"). A slightly better approach is to `fork()` for each directory, but that has a similar overhead. A much better approach would be to `fork()` a number of worker processes equal to the number of CPUs on the system, have the parent process recursively scan for files on the filesystem, and add these filepaths to a queue. Each worker would communicate with the parent via it's own `pipe()`. – OregonTrail Apr 08 '15 at 15:28
  • I understand you're doing this in a learning purpose, but as a side note: it is unlikely that `fork()` (or threading) will improve performance. If your underlying file system sits on a HDD it woud likely lead to worse performance. – Xaqq Apr 08 '15 at 16:34
  • @NewCoder http://pastebin.com/V6dvEFnP check this it works without pipe .Fork is used for each and every file . – sonus21 Apr 08 '15 at 16:37

1 Answers1

0

To start I would write the program with two phases. A single-process phase in which all the file-paths are queued up (into a linked-list or dequeue), and a multi-process phase in which the worker processes receive work via their pipe() and send counts back to the main process via their pipe(). The main process would use select() to multiplex the input from its children.

Once you understand how to use select() with pipe()s, then work on having the filepath discovery be concurrent.

This design would be much easier to implement in Go, node.js, or greenlet with Python, but learning how to do it in C gives you a level of understanding for the underlying operations that you don't get with newer languages.

OregonTrail
  • 8,594
  • 7
  • 43
  • 58