3

Finding some text and replacing it with new text within a C string can be a little trickier than expected. I am searching for an algorithm which is fast, and that has a small time complexity.

What should I use?

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • 1
    Same as [What's the most efficient algorithm for string replacing? ](http://stackoverflow.com/questions/1905207/whats-the-most-efficient-algorithm-for-string-replacing). As noted there, replacing is pretty trivial, once you have a good find algorithm. You might also find some of [these questions](http://stackoverflow.com/questions/tagged/string-search) helpful. – Matthew Flaschen Dec 10 '10 at 11:12

6 Answers6

7

I couldn't find an implementation of search/replace in C that I liked so I present here my own. It does not use things like strstr(), snprintf(), arbitrary length temporary buffers, etc. It only requires that the haystack buffer is large enough to hold the resulting string after replacements are made.

// str_replace(haystack, haystacksize, oldneedle, newneedle) --
//  Search haystack and replace all occurences of oldneedle with newneedle.
//  Resulting haystack contains no more than haystacksize characters (including the '\0').
//  If haystacksize is too small to make the replacements, do not modify haystack at all.
//
// RETURN VALUES
// str_replace() returns haystack on success and NULL on failure. 
// Failure means there was not enough room to replace all occurences of oldneedle.
// Success is returned otherwise, even if no replacement is made.
char *str_replace(char *haystack, size_t haystacksize,
                    const char *oldneedle, const char *newneedle);

// ------------------------------------------------------------------
// Implementation of function
// ------------------------------------------------------------------
#define SUCCESS (char *)haystack
#define FAILURE (void *)NULL

static bool
locate_forward(char **needle_ptr, char *read_ptr, 
        const char *needle, const char *needle_last);
static bool
locate_backward(char **needle_ptr, char *read_ptr, 
        const char *needle, const char *needle_last);

char *str_replace(char *haystack, size_t haystacksize,
                    const char *oldneedle, const char *newneedle)
{   
    size_t oldneedle_len = strlen(oldneedle);
    size_t newneedle_len = strlen(newneedle);
    char *oldneedle_ptr;    // locates occurences of oldneedle
    char *read_ptr;         // where to read in the haystack
    char *write_ptr;        // where to write in the haystack
    const char *oldneedle_last =  // the last character in oldneedle
        oldneedle +             
        oldneedle_len - 1;      

    // Case 0: oldneedle is empty
    if (oldneedle_len == 0)
        return SUCCESS;     // nothing to do; define as success

    // Case 1: newneedle is not longer than oldneedle
    if (newneedle_len <= oldneedle_len) {       
        // Pass 1: Perform copy/replace using read_ptr and write_ptr
        for (oldneedle_ptr = (char *)oldneedle,
            read_ptr = haystack, write_ptr = haystack; 
            *read_ptr != '\0';
            read_ptr++, write_ptr++)
        {
            *write_ptr = *read_ptr;         
            bool found = locate_forward(&oldneedle_ptr, read_ptr,
                        oldneedle, oldneedle_last);
            if (found)  {   
                // then perform update
                write_ptr -= oldneedle_len;
                memcpy(write_ptr+1, newneedle, newneedle_len);
                write_ptr += newneedle_len;
            }               
        } 
        *write_ptr = '\0';
        return SUCCESS;
    }

    // Case 2: newneedle is longer than oldneedle
    else {
        size_t diff_len =       // the amount of extra space needed 
            newneedle_len -     // to replace oldneedle with newneedle
            oldneedle_len;      // in the expanded haystack

        // Pass 1: Perform forward scan, updating write_ptr along the way
        for (oldneedle_ptr = (char *)oldneedle,
            read_ptr = haystack, write_ptr = haystack;
            *read_ptr != '\0';
            read_ptr++, write_ptr++)
        {
            bool found = locate_forward(&oldneedle_ptr, read_ptr, 
                        oldneedle, oldneedle_last);
            if (found) {    
                // then advance write_ptr
                write_ptr += diff_len;
            }
            if (write_ptr >= haystack+haystacksize)
                return FAILURE; // no more room in haystack
        }

        // Pass 2: Walk backwards through haystack, performing copy/replace
        for (oldneedle_ptr = (char *)oldneedle_last;
            write_ptr >= haystack;
            write_ptr--, read_ptr--)
        {
            *write_ptr = *read_ptr;
            bool found = locate_backward(&oldneedle_ptr, read_ptr, 
                        oldneedle, oldneedle_last);
            if (found) {    
                // then perform replacement
                write_ptr -= diff_len;
                memcpy(write_ptr, newneedle, newneedle_len);
            }   
        }
        return SUCCESS;
    }
}

// locate_forward: compare needle_ptr and read_ptr to see if a match occured
// needle_ptr is updated as appropriate for the next call
// return true if match occured, false otherwise
static inline bool 
locate_forward(char **needle_ptr, char *read_ptr,
        const char *needle, const char *needle_last)
{
    if (**needle_ptr == *read_ptr) {
        (*needle_ptr)++;
        if (*needle_ptr > needle_last) {
            *needle_ptr = (char *)needle;
            return true;
        }
    }
    else 
        *needle_ptr = (char *)needle;
    return false;
}

// locate_backward: compare needle_ptr and read_ptr to see if a match occured
// needle_ptr is updated as appropriate for the next call
// return true if match occured, false otherwise
static inline bool
locate_backward(char **needle_ptr, char *read_ptr, 
        const char *needle, const char *needle_last)
{
    if (**needle_ptr == *read_ptr) {
        (*needle_ptr)--;
        if (*needle_ptr < needle) {
            *needle_ptr = (char *)needle_last;
            return true;
        }
    }
    else 
        *needle_ptr = (char *)needle_last;
    return false;
}

Example usage

#define BUF 30
char *retval1, *retval2;
char message[BUF] = "Your name is $USERNAME.";
char username[] = "admin";
char username_toolong[] = "System Administrator";

int main() {
    retval1 = str_replace(message, BUF, "$USERNAME", username_toolong);
    retval2 = str_replace(message, BUF, "$USERNAME", username);
    if (!retval1)
        printf("Not enough room to replace $USERNAME with `%s'\n", username_toolong);
    if (!retval2)
        printf("Not enough room to replace $USERNAME with `%s'\n", username);
    printf("%s\n", message);
    return 0;
}

Output

Not enough room to replace $USERNAME with `System Administrator'
Your name is admin.

Cheers.

Brandin
  • 906
  • 11
  • 20
  • Welcome to Stack Overflow. Your code works, which is good! Well done. Why is your `paste()` function is preferable to `memmove()` or `memcpy()`? When you invoke it, you know all the lengths, so you'd probably do better with the standard functions. You should probably add const qualifiers to the `oldneedle` and `newneedle` arguments. Your test code could be more rigorous, and should cover the empty old needle case. That behaviour is not obvious; given an empty needle and an empty haystack, you get the replacement; given an empty needle and a non-empty haystack, you get back the haystack. – Jonathan Leffler Sep 22 '12 at 20:43
  • Thanks for the comments; I have addressed these points - you are right, there is no need for a custom `paste()` function; `memcpy` is the right choice. Also if oldneedle is empty it is simpler and hopefully more obvious just to always do the same thing (no replacement). – Brandin Sep 22 '12 at 22:54
  • I was testing some code using this algorithm and found that if the lengths were exactly right (or do I mean wrong?), the code would write one byte beyond the end of the haystack array. The problem is in conditions of the first (Pass 1) loop in the 'case 2' code. I fixed the `for` condition to: `for (oldneedle_ptr = (char *)oldneedle, read_ptr = haystack, write_ptr = haystack; *read_ptr != '\0' && write_ptr < haystack + haystacksize; read_ptr++, write_ptr++)` and moved the test `if (write_ptr >= haystack + haystacksize) return FAILURE;` outside the loop. – Jonathan Leffler Jul 03 '16 at 23:08
  • Works but turns extremely slow as the buffer gets bigger... – RCECoder Jan 13 '21 at 03:59
2

Knuth-Morris-Pratt (which is classic) or Boyer-Moore (which is sometimes faster)?

Try using a Google search for 'string searching algorithms'.

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
max taldykin
  • 12,459
  • 5
  • 45
  • 64
1

Using std::string (from <string>) you can simply use find and replace.

Edit: Touché. This is for C++ only.

Is this any good to you? http://www.daniweb.com/forums/thread51976.html

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
Antony Woods
  • 4,415
  • 3
  • 26
  • 47
1

I can't help but wonder what algorithm strstr() implements. Given that these are fairly standard algorithms, it's entirely possible that a good implementation of strstr() uses one of them.

However there's no guarantee that strstr() implements an optimised algorithm or that the same algorithm is used from one platform to another.

AlastairG
  • 4,119
  • 5
  • 26
  • 41
  • 2
    I've googled for this a while, hoping to find a list of implementations. But no such luck. Kind of odd, don't you think? I did find this: http://old.nabble.com/glibc-strstr-is-no-longer-quadratic-td17251927.html So it apparently used to be a poor algorithm in this glibc implementation. – buddhabrot Dec 10 '10 at 12:30
0

My solution, based on the others, but a bit safer I believe:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_SOURCE_SIZE (0x100000)

char * searchReplace(char * string, char *toReplace[], char *replacements[], int numReplacements){
    int i = 0;
    char *locOfToRep;
    char *toRep;
    char *rep;
    int lenToRep,lenStr,lenAfterLocRep;
    static char buffer[MAX_SOURCE_SIZE];
    for(i = 0; i < numReplacements; ++i){
        toRep = toReplace[i];
        rep = replacements[i];
        //if str not in the string, exit.
        if (!(locOfToRep = strstr(string,toRep))){
           exit(EXIT_FAILURE);
        }
        lenToRep = strlen(toRep); 
        lenStr = strlen(string); 
        lenAfterLocRep = strlen(locOfToRep); 

        //Print the string upto the pointer, then the val, and then the rest of the string.
        sprintf(buffer, "%.*s%s%s", lenStr-lenAfterLocRep, string,rep,locOfToRep+lenToRep);

        string = buffer;
    }
    return buffer;
}

int main(){
    char * string = "Hello, world!";
    int numVals;
    char *names[2] = {"Hello", "world"};
    char *vals[2] = {"Goodbye", "you"};
    numVals = 2;
    string = searchReplace(string, names, vals, numVals);
    printf("%s\n",string);
}
Tritlo
  • 539
  • 4
  • 8
0

here is a nice code

#include <stdio.h>
#include <string.h>

char *replace_str(char *str, char *orig, char *rep)
{
  static char buffer[4096];
  char *p;

  if(!(p = strstr(str, orig)))  // Is 'orig' even in 'str'?
    return str;

  strncpy(buffer, str, p-str); // Copy characters from 'str' start to 'orig' st$
  buffer[p-str] = '\0';

  sprintf(buffer+(p-str), "%s%s", rep, p+strlen(orig));

  return buffer;
}

int main(void)
{
  puts(replace_str("Hello, world!", "world", "Miami"));

  return 0;
}
m.qayyum
  • 409
  • 2
  • 9
  • 24
  • 1
    Why use strncpy when you know exactly how many bytes to copy? memcpy() will be quicker because it doesn't need to check for null terminators. Similarly, sprintf will be much much slower than two calls to memcpy and one call to strlen. Using a static buffer makes this function non-reentrant and also risks buffer overflows. – AlastairG Dec 10 '10 at 12:32
  • What do you do if strlen(rep) > strlen(orig) ? – user411313 Dec 10 '10 at 22:10
  • puts(replace_str("Hello", "Hellox")) is an interesting test case. – EvilTeach Sep 22 '12 at 18:58