0

I have a problem like here: Write a program to read a multiple line text file and write the 'N' longest lines to stdout. Where the file to be read is specified on the command line.

Now I wrote my program like this:

#include <stdio.h>
#include <string.h>

int main(int argc, char *argv[])
{
  int a,k,n,i=0,j;
  int part;
  char ar[1000][1000],str[1000];
  /* char file[200]; */
  /* scanf("%s",file); */
  FILE *f = fopen(argv[1],"r");
  if ( f == NULL || argc < 2)
  {
    return 0;
  }

   fscanf(f,"%d",&a);

   while (fscanf(f,"%s",str)==1)
   {
        strcpy(ar[i++],str);

      for ( k = 0 ; k < i ; k++ )
      {
        for ( j = k ; j < i ; j++)
        {
          if ( strlen(ar[k]) < strlen(ar[j]))
          {
            strcpy(str,ar[k]);
            strcpy(ar[k],ar[j]);
            strcpy(ar[j],str);
          }
        }
      }

   }

   for ( j = 0 ; j < a ; j++ )
   {
    puts(ar[j]);
  }

return 0;
}

First of all it is working well for me but on submission it is giving me runtime error. Secondly I want to do it using pointers and dynamic allocation of memory. How can I do that?

Sorry, I went to bed for some time. Can you explain me what's wrong with my code. Why it is not working. I think no one explained me where I am doing wrong. Please let me know how can I draw attention of people after a few hours from posting my question. Again thanks a lot for all of you for giving me so much time.

jahan
  • 521
  • 1
  • 4
  • 15
  • 1
    What runtime error are you seeing, specifically ? Please copy and paste the actual error message(s) into your question. – Paul R May 12 '14 at 08:39
  • Site is not specifying any error. – jahan May 12 '14 at 08:41
  • Even if dynamic memory allocation wasn't already answered a bunch of times, do you think that really merits a question on its own? You are probably walking off the preallocated array (perhaps some of those files is longer?), it doesn't look like it is enough for an overflow. – dtech May 12 '14 at 08:41
  • @ddriver What is the solution for longer files.I think the dynamic allocation might save memory. – jahan May 12 '14 at 08:43
  • 1
    Just use `malloc(size)` to allocate memory, working with dynamic memory is almost identical to working with a regular array. Just don't forget to free the memory you allocate. Dynamic allocation will not save memory, but will allow you to use longer arrays than you can put on the stack. – dtech May 12 '14 at 08:46
  • @ddriver Can you explain a bit more? – jahan May 12 '14 at 08:49
  • @jahan - http://en.wikipedia.org/wiki/C_dynamic_memory_allocation – dtech May 12 '14 at 08:52
  • @ddriver dynamic allocation is likely to use more memory, and heap memory is slower: avoid it as much as you can. – Elias Van Ootegem May 12 '14 at 09:23
  • 1
    @EliasVanOotegem - it will take up an extra pointer and a few bytes of allocation data - a minor increase relative to the overall memory usage. As for access - it really depends how well the compiler will do. There is chance the stack array will be faster if accessed directly as stack offset and not dereferenced from a pointer at a stack offset. At any rate, stack space is usually very limited, so I wouldn't present dynamic memory allocation as a bad practice. – dtech May 12 '14 at 09:55
  • @ddriver: I'm not saying dynamic allocation is bad practice, I'm saying that if you don't have to allocate memory on the heap, then don't. Let's be honest: `char string[] = "foobar";` is easier, more efficient and easier to understand than `char *string = malloc(7); string[0] = '\0'; strcpy(string, "foobar");` if for no other reason that the latter requires 2 function calls – Elias Van Ootegem May 12 '14 at 10:14
  • 1
    @jahan you are accessing `argv[1]` **before** you check if `argc` is large enough. That triggers undefined behavior if the program is run without any argument. – lethal-guitar May 12 '14 at 10:15
  • Also, the compiler will most likely keep the array pointer in a register, so performance will be the same, because whether you are using an offset from the stack pointer or from another pointer in a register is identical. The penalty would be moving the array pointer to a register on access, which is a single operation. Like the memory overhead - a minor and I'd say insignificant performance hit. In this scenario anyway. – dtech May 12 '14 at 10:16

4 Answers4

1

If you really want to conserve memory then you should not copy any contents of the file, but instead mmap it into your process and only save the offsets into the longest lines in your code. On longer files this should give you significantly more performance since it outsources memory management to the kernel.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <sys/stat.h>


int main(int argc, char ** argv) {
    char * filename = argv[1];
    if (!filename) {
        fprintf(stderr, "Usage: %s filename\n", argv[0]);
        exit(-1);
    }

    int fd = open(filename, O_RDONLY);
    if (fd < 0) {
        perror("open");
        abort();
    }

    struct stat file_stats;
    if(fstat(fd, &file_stats)) {
        perror("fstat");
        abort();
    }

    char * file_contents = mmap(NULL    // anywhere
        , file_stats.st_size + 1        // file length + 1 byte for null terminator
        , PROT_READ                     // we only need read only
        , MAP_PRIVATE                   // this doesn't really matter since we are read only
        , fd                            // from this file descriptor
        , 0);                           // from beginning

    if (file_contents == MAP_FAILED) {
        perror("mmap");
        abort();
    }

    // optional
    // Expect page references in sequential order.
    // Hence, pages can be aggressively read ahead, and may be freed soon after they are accessed.
    madvise(file_contents, file_stats.st_size + 1, MADV_SEQUENTIAL);

    struct {
        size_t start;
        size_t end;
    } longest_lines[10]; // struct to hold ofsets for longest lines

    memset(longest_lines, 0, sizeof(longest_lines)); // initialise
    int shortest_line_id = 0;

    char * start = file_contents;
    char * next;

    // while we are not at the end
    while (start < file_contents + file_stats.st_size) {
        if (!(next = strchr(start, '\n'))) // if line ternimator wasn't found, then go to the end
            next = file_contents + file_stats.st_size;

        size_t line_length = next - start;

        // if this line is longer then our shortest
        if (line_length > longest_lines[shortest_line_id].end - longest_lines[shortest_line_id].start) {
            longest_lines[shortest_line_id].start = start - file_contents;
            longest_lines[shortest_line_id].end = next - file_contents;

            // determine new shortest line
            int i;
            for (i = 0; i < sizeof(longest_lines)/sizeof(*longest_lines); i++) {
                if (
                longest_lines[i].end - longest_lines[i].start
                <
                longest_lines[shortest_line_id].end - longest_lines[shortest_line_id].start
                )
                    shortest_line_id = i;
            }
        }
        // next line starts at this offset
        start = next + 1;
    }

    int i; // print them
    for (i = 0; i < sizeof(longest_lines)/sizeof(*longest_lines); i++) {
        printf("%.*s\n", (int)(longest_lines[i].end - longest_lines[i].start), file_contents + longest_lines[i].start);
    }

    return 0;
}
Sergey L.
  • 21,822
  • 5
  • 49
  • 75
1

Because it seems like some kind of homework or sth. I won't provide a full solution. But I will try to give you some peaces that you can start with.

You should divide your task by its responsibilities and try to create your programm a little more modular.

The several tasks seem to be:

  1. Get the count of lines in your file
    • You can use a loop with fgets and a counter-variable to do this
  2. Allocate a char*-Array (char**)
    • You can use malloc() with the counter-variable
  3. Read all lines of your file into the allocated memory-sections and allocate memory for each line
    • You can use getline() to get a pointer to an allocated memory section
    • Really getline takes a lot work from you, but you can also combine the following functions to to the same:
      • fgets
      • malloc
      • realloc
      • strlen
  4. Sort your char** by the length of your line
    • You can use strlen and some swap-function to archieve this (by simple apply sth. like bubblesort)
  5. Output the first N lines of the sortet char** with printf
  6. Don't forget to free all allocated memory with free()!

For Hints see the following post.

Note that the steps I provide do not optimize for memory or cpu-usage. But if your program works and you understoud what you've done this could be the next step.

Also this task shows the great benefits of modern programming languages where such a task would be a five line script. Here's the F# version that performs what you want to archieve:

open System
open System.IO
open System.Linq

[<EntryPoint>]
let main args =
  try
    let linesToPrint = Int32.Parse(args.ElementAt(0))
    let path         = @"C:\Users\XYZ\Desktop\test.txt"
    File.ReadAllLines(path).OrderByDescending(fun line -> line.Length).Take(linesToPrint).ToArray()
    |> Array.iter(printfn "%s")
  with
  | ex -> eprintfn "%s" ex.Message
  0

Note the sortness and the readability of the solution. Nevertheless it is worth to program such a task in c or maybe also in assembler to get a deeper understanding for how computers work.

Community
  • 1
  • 1
Peter Ittner
  • 551
  • 2
  • 13
  • Thanks for your time and answer. Still I am confused what F# is ? And can you tell me what's wrong with my code? – jahan May 12 '14 at 14:39
  • `F#` is a functional programming language. Maybe one worth learning for the future, because as you see it is much easier to solve your task with it. The problems in your code are the following: `fscanf(f,"%s",str)` splits every line also by its whitespaces. So you do **NOT** get a whole line into str but a word within a line. You should use: `while ((fgets(str, 1000, f) != NULL))` to get a whole line into str. Also your variable `a` is set wrong. I debugged your code and `a` is set to -858993460. See http://stackoverflow.com/questions/1910724/c-retrieving-total-line-numbers-in-a-file – Peter Ittner May 12 '14 at 15:23
  • In the question it was mentioned that the file will give the first line as N. – jahan May 12 '14 at 15:27
  • Oh i didn't see that. So variable a is set correct. But the problem with `fscanf` remains. I think this is your problem that causes the exception. The program has to crash if there are more than 1000 words within the input file. So if there is a input file with 10 lines and 1200 words your program will crash because you treat words as lines and only allocate memory for 1000 ones. So use the `fgets` approach and your code should work. – Peter Ittner May 12 '14 at 15:40
  • There should be a limit for input for any program. No doubt the above program will not work for the condition described by you but still the problem with every program exists for a higher value of input. – jahan May 12 '14 at 16:15
  • *'Read all lines ... sort them ... output first N lines'* seems quite a bad advice. The whole problem looks like a programming contest, and we do not know how long the input file is and how big N is. Anyway reading the whole file is an overkill (unless you know it will be short). I would suggest to load just N lines, then iterate through the rest of a file and if the next line is longer than the shortest one of those N, then replace it. For small N a simple array with sequential search will do, for big N (more than 30...?) I would implement a min-heap. – CiaPan Jul 06 '15 at 18:40
1
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int main(int argc, char *argv[]){
    int a, n=0, i;
    char **ar, str[1000];
    int *lens;

    FILE *f = fopen(argv[1],"r");

    if ( f == NULL || argc < 2){
        return 0;
    }

    fscanf(f,"%d",&a);
    if(NULL==(ar=calloc(a, sizeof(char*))) || NULL==(lens=calloc(a, sizeof(int)))){
        perror("malloc");
        return -1;
    }
    while (fscanf(f, " %999[^\n]", str)==1){
        ++n;
        int len = strlen(str);
        for(i = 0;i < a;++i){
            if(lens[i] < len){
                free(ar[a-1]);
                memmove(&lens[i+1], &lens[i], (a-i-1)*sizeof(int));
                memmove(&ar[i+1], &ar[i], (a-i-1)*sizeof(char*));
                ar[i]=strdup(str);
                lens[i]=len;
                break;
            }
        }
    }
    fclose(f);
    for (i = 0 ; i < n && i < a ; i++ ){
        puts(ar[i]);
        free(ar[i]);
    }
    free(ar);free(lens);

    return 0;
}
BLUEPIXY
  • 39,699
  • 7
  • 33
  • 70
0

If you're happy sticking with your maximum line length of 1000 you can do:

char (*ar)[1000] = malloc(a * sizeof *ar);

if (!ar)
    // error handling....

and then use ar as you used it before.

There is a problem : fscanf(f,"%s",str). This reads a single word, and does no length checking. I guess you actually want to read a whole line, and not overflow your buffer:

fgets(str, sizeof str, f);

If you want to you can remove the newline from str after this but actually it will make no difference to your program.

There is currently a problem with your algorithm; you read every line into ar. Instead you should just make ar's size be a (I presume that a is meant to be N), take out the line strcpy(ar[i++],str);, and only insert the line if it is bigger than the current smallest member.

M.M
  • 138,810
  • 21
  • 208
  • 365