4

I'm trying to write a code that extracts all words/strings between the and tags using strstr. But it seems that it just gets stuck to the first string extracted, which is "quick". How can I get the code to keep going after extracting the first string?

#include <stdio.h>
#include <string.h>

int main()
{

    char feed[] = "The <item> quick </item> brown <item> fox </item> jumps <item> over </item> the <item> lazy dog </item>";


    const char needle[] = "<item>";
    const char popo[] = "</item>";
    char *ret;
    char *ter;
    int n;
    n = 0;

    while (feed[n] != '\0')
    {
        ret = strstr(feed, needle)+6;
        ter = strstr(ret, popo);
        size_t len = ter - ret;
        char *res = (char*)malloc(sizeof(char)*(len+1));
        strncpy(res, ret, len);
        res[len] = '\0';

        printf("%s",res);
        n++;
    }
    return 0;
}
Iharob Al Asimi
  • 52,653
  • 6
  • 59
  • 97
  • Heavy on the return key - Not so heavy on the spacebar - indent the code! – Ed Heal May 02 '15 at 15:01
  • [Do not cast `malloc()`](http://stackoverflow.com/a/605858/1983495). This, `ret = strstr(feed, needle)+6;` is dangerous, if `strstr()` reutrns `NULL` you end up with `(void *)0x06` instead of `(void *)0x00` and dereferencing that is a problem. Also, use `memcpy()` instead of `strncpy()` like `memcpy(res, ret, len)` and check that `res != NULL` before doing that. – Iharob Al Asimi May 02 '15 at 15:05
  • If I remove the +6, the output will include the string "". – Syvil San Pablo May 02 '15 at 15:13
  • You can add the length of `` after you check `ret != NULL`, check my answer. – Iharob Al Asimi May 02 '15 at 15:15

2 Answers2

2

You need to make the ret pointer to point to the current position in the string, and increment it by length on each iteration, and pass ret to the first strstr() instead of feed, check out this implementation

#include <stdio.h>
#include <string.h>

int main()
{

    char       feed[]   = "The <item> quick </item> brown <item> fox </item> "
                          "jumps <item> over </item> the <item> lazy dog </item>";
    const char needle[] = "<item>";
    const char popo[]   = "</item>";
    char      *head;
    int n;
    n = 0;

    head = feed;
    while (feed[n] != '\0')
    {
        char  *tail;
        char  *copy;
        size_t length;

        head = strstr(head, needle);
        /*            ^ always start at the current position. */
        if (head == NULL)
         {
            fprintf(stderr, "Invalid input...???\n");
            return -1;
         }
        tail   = strstr(head, popo);
        length = tail - head - 6;
        head  += 6;
        if (length < 0)
         {
            fprintf(stderr, "Invalid input...???\n");
            return -1;
         }
        copy = malloc(length + 1);
        if (copy != NULL)
         {
            memcpy(copy, head, length);
            copy[length] = '\0';

            printf("*%s*\n", copy);
            /* If you are not going to keep it, free it */
            free(copy);
         }
        head += length; /* <-- this is the imprtant thing */
        n++;
    }
    return 0;
}
Iharob Al Asimi
  • 52,653
  • 6
  • 59
  • 97
  • Can I ask, which variable and in which part does the program extract the first word and get the result? I want to put each string in an array for use later on in the code – Syvil San Pablo May 02 '15 at 16:59
  • you can do that in the `/* If you are not going to keep it, free it */` by not freeing the pointer and storing it somewhere. – Iharob Al Asimi May 02 '15 at 17:14
  • When I do memcpy(meti[a].item, copy, length); after the copy[length] ='\0'; It only copies the last result of the program, or in other words, the last string enclosed in the last tags. Why does it go directly to the last? I need to copy each string extracted, starting from the first to the last, in an array to printf them later on. Please help – Syvil San Pablo May 03 '15 at 13:41
1

On this line:

ret = strstr(feed, needle)+6;

You are always starting your search from the beginning of the feed string. You need to pass a different starting point to strstr, which you already have in ter. So you should be able to do something like this:

ter = feed;
while (ter != NULL) 
{
     ret = strstr(ter, needle) + 6;
...

With this the start of your search will keep moving farther down the feed string.

There are some other issues in your code:

  1. strstr() can return NULL if it doesn't find a match - you need to check for that or you program will crash.
  2. You need to free() the memory you malloc()
  3. As @iharob points out "Do not cast malloc()"
Community
  • 1
  • 1
shf301
  • 31,086
  • 2
  • 52
  • 86