1

I need to print all phrases from a file (phrases can end in '.', '?' or '!')

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

char* read_file(char *name) {
    FILE *file;
    char *text;
    long num_bytes;

    file = fopen(name, "r");

    if(!file) {
        printf("File could not be opened!");
        exit(EXIT_FAILURE);
    }

    fseek(file, 0, SEEK_END);
    num_bytes = ftell(file);
    fseek(file, 0, SEEK_SET);

    text = (char*) malloc(num_bytes * sizeof(char));
    fread(text, 1, num_bytes, file);
    
    fclose(file);

    return text;
}

I have this piece of code that kind of works but if my file as the following text: "My name is Maria. I'm 19." the second phrase is printed with a ' ' in the beggining. Can someone please help finding a way to ignore those spaces? Thank you

maria
  • 19
  • 2
  • 1
    `char *line = (char*) malloc(sizeof(text));` doesn't do what you think. `text` is a pointer, so `sizeof(text)` returns the size of the pointer (probably 8, but depends on your architecture), not the size of the memory block it points to. – yano May 18 '22 at 16:27
  • Oh yes @yano it should be char ```*line = (char*) malloc(sizeof(char) * strlen(text));```, but still that's not the problem – maria May 18 '22 at 16:29
  • 1
    Replace `printf("File could not be opened!");` with `perror(name);`. Error messages belong on stderr and should provide a reason. – William Pursell May 18 '22 at 16:40
  • The method of fseeking to determine the file size is fundamentally broken. Just read the data until you cannot read anymore. In some instances, you do need the file size (this is very rare). In those cases, you'll want to use a platform specific method for finding the size (eg `stat`) – William Pursell May 18 '22 at 16:42
  • 1
    _"I need to print all phrases from a file I have this piece of code that kind of works"_... It appears you edited out the code you had originally included. Without this code all of the answers, comments and interaction no longer make sense. May I suggest that you add your code back into the post. Please comment when you do that. – ryyker May 18 '22 at 17:41
  • the question is missing the code that the question is asking for help on. – user3629249 May 19 '22 at 15:15

3 Answers3

3

To start, you have several problems that will invoke Undefined Behaviour. In

char *line = (char*) malloc(sizeof(text));

sizeof (text) is the size of a pointer (char *), not the length of the buffer it points to.

sizeof (char *) depends on your system, but is very likely to be 8 (go ahead and test this: printf("%zu\n", sizeof (char *));, if you are curious), which means line can hold a string of length 7 (plus the null-terminating byte).

Long sentences will easily overflow this buffer, leading to UB.

(Aside: do not cast the return of malloc in C.)

Additionally, strlen(text) may not work properly as text may not include the null-terminating byte ('\0'). fread works with raw bytes, and does not understand the concept of a null-terminated string - files do not have to be null-terminated, and fread will not null-terminate buffers for you.

You should allocate one additional byte to in the read_file function

text = malloc(num_bytes + 1);
text[num_bytes] = 0;

and place the null-terminating byte there.

(Aside: sizeof (char) is guaranteed to be 1.)

Note that ftell to determine the length of a file should not be relied upon.


isspace from <ctype.h> can be used to determine if the current character is whitespace. Its argument should be cast to unsigned char. Note this will include characters such as '\t' and '\n'. Use simple comparison if you only care about spaces (text[i + 1] == ' ').

A loop can be used to consume the trailing whitespace after matching a delimiter.

Make sure to null-terminate line before printing it, as %s expects a string.

Use %u to print an unsigned int.

Do not forget to free your dynamically allocated memory when you are done with it. Additionally, heavily consider checking any library function that can fail has not done so.

#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void pdie(const char *msg) {
    perror(msg);
    exit(EXIT_FAILURE);
}

char *read_file(char *name) {
    FILE *file = fopen(name, "r");

    if (!file)
        pdie(name);

    fseek(file, 0, SEEK_END);
    long num_bytes = ftell(file);

    if (-1 == num_bytes)
        pdie(name);

    fseek(file, 0, SEEK_SET);

    char *text = malloc(num_bytes + 1);

    if (!text)
        pdie("malloc");

    if (-1 == num_bytes)
        pdie(name);

    text[num_bytes] = 0;

    if (fread(text, 1, num_bytes, file) != num_bytes)
        pdie(name);

    fclose(file);

    return text;
}

int main(int argc, char **argv) {
    if (argc < 2) {
        fprintf(stderr, "usage: %s TEXT_FILE\n", argv[0]);
        return EXIT_FAILURE;
    }

    char *text = read_file(argv[1]);
    unsigned int count = 0;

    size_t length = strlen(text);
    size_t index = 0;
    char *line = malloc(length + 1);

    if (!line)
        pdie("malloc");

    for (size_t i = 0; i < length; i++) {
        line[index++] = text[i];

        if (text[i] == '.' || text[i] == '?' || text[i] == '!') {
            line[index] = '\0';
            index = 0;

            printf("[%u] <<%s>>\n", ++count, line);

            while (isspace((unsigned char) text[i + 1]))
                i++;
        }
    }

    free(text);
    free(line);

    return EXIT_SUCCESS;
}

Input file:

My name is Maria. I'm 19. Hello world! How are you?

stdout:

[1] <<My name is Maria.>>
[2] <<I'm 19.>>
[3] <<Hello world!>>
[4] <<How are you?>>
Oka
  • 23,367
  • 6
  • 42
  • 53
  • Pedantic: "Its argument should be cast to unsigned char" --> to work well with nearly non-existent non 2's compliment as well as 2's complement, access the string via `unsigned char *`: `isspace((unsigned char) text[i + 1])` --> `isspace(((unsigned char *) text)[i + 1])`. Makes a difference when `char` is _signed_ and non 2's compliment. This issue likely irrelevant with next C release which may mandate 2's complement. – chux - Reinstate Monica May 18 '22 at 19:35
0

You can test for a whitespace character by comparing the char in question to ' '.

if(text[i] == ' ')
    // text[i] is whitespace
chameleon
  • 136
  • 1
  • 7
0

One possible solution, advance to the next non-whitespace character when you find the end of the sentence. You also need to make sure you've mallocd enough memory for the current phrase:

#include <ctype.h>  // for isspace
... 

size_t textLength = strlen(text);
// malloc based on the text length here, plus 1 for the NUL terminator.
// sizeof(text) gives you the size of the pointer, not the size of the
// memory block it points to.
char *line = malloc(textLength+1);

for(size_t i = 0; i < textLength; i++) {
    line[index] = text[i];
    index++;
    if(text[i] == '.' || text[i] == '?' || text[i] == '!') {
        count++;
        printf("[%d] %s\n", count, line);
        memset(line, 0, index + 1);
        index = 0;
        // advance to the next non-whitespace char
        do
        {
            // advance to the next char (we know the current char is not a space)
            i++;
        // keep advancing i while the next char is in range of the
        // text and the next char is a space.
        }while (i+1 < textLength && isspace(text[i+1]) != 0);
    }
}

Output:

[1] My name is Maria.
[2] I'm 19.

Demonstration

There's also no need to cast the return value of malloc

yano
  • 4,827
  • 2
  • 23
  • 35