What is Best way for Replace with More Performance Using C?

Question

I want to write good function with utf8 support for Better Performance.

I did in-depth research and found the following `Replace()` candidates:

char* replace(char* orig, char* rep, char* with)
{
  //33-34
    char* result; // the return string
    char* ins;    // the next insert point
    char* tmp;    // varies
    size_t len_rep;  // length of rep
    size_t len_with; // length of with
    size_t len_front; // distance between rep and end of last rep
    int count;    // number of replacements
    /* char* strstr(char const* s1, char const* s2);
    retourne un pointeur vers la première occurrence de s2 dans s1
    (ou NULL si s2 n’est pas incluse dans s1). */
    if (!orig)
        return NULL;
    if (!rep || !(len_rep = strlen(rep)))
        return NULL;
    if (!(ins = strstr(orig, rep)))
        return NULL;
    if (!with)
        with = "";
    len_with = strlen(with);
    /*  {   initialisation;
            while (condition) {
                Instructions
                mise_à_jour;
        } }*/
    // compte le nombre d'occurences de la chaîne à remplacer
    for (count = 0; (tmp = strstr(ins, rep)); ++count) {
        ins = tmp + len_rep;
    }
    // allocation de mémoire pour la nouvelle chaîne
    tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1);
    if (!result)
        return NULL;
    /* char* strcpy(char* dest, char const* src);
    copie la chaîne src dans la chaîne dest. Retourne dest.
    Attention ! aucune vérification de taille n’est effectuée ! */

    /* char* strncpy(char* dest, char const* src, size_t n);
    copie les n premiers caractères de src dans dest. Retourne dest.
    Attention ! n’ajoute pas le '\0' à la fin si src contient plus de n
    caractères !*/
    // from here on,
    //    tmp points to the end of the result string
    //    ins points to the next occurrence of rep in orig
    //    orig points to the remainder of orig after "end of rep"
    while (count--) { // count évaluée, puis incrémentée
    // donc ici tant que count est > 0
        ins = strstr(orig, rep);
        len_front = ins - orig;
        tmp = strncpy(tmp, orig, len_front) + len_front;
        tmp = strcpy(tmp, with) + len_with;
        orig += len_front + len_rep; // move to next "end of rep"
    }
    strcpy(tmp, orig);
    return result;
}


char* replace8(char *str, char *old,char *new)
{
  //1.2 :||||||||||
  int i, count = 0;
  int newlen = strlen(new);
  int oldlen = strlen(old);
  for (i = 0; str[i]; ++i)
    if (strstr(&str[i], old) == &str[i])
      ++count, i += oldlen - 1;
  char *ret = (char *) calloc(i + 1 + count * (newlen - oldlen), sizeof(char));
  if (!ret) return "";
  i = 0;
  while (*str)
    if (strstr(str, old) == str)
      strcpy(&ret[i], new),
      i += newlen,
      str += oldlen;
    else
      ret[i++] = *str++;
  ret[i] = '\0';
  return ret;
}



char *replace7(char *orig, char *rep, char *with)
{
  //33-34-35
    char *result; // the return string
    char *ins;    // the next insert point
    char *tmp;    // varies
    int len_rep;  // length of rep
    int len_with; // length of with
    int len_front; // distance between rep and end of last rep
    int count;    // number of replacements
    if (!orig)
    {
        return NULL;
    }
    if (!rep)
    {
        rep = "";
    }
    len_rep = strlen(rep);
    if (!with)
    {
        with = "";
    }
    len_with = strlen(with);

    ins = orig;
    for (count = 0; tmp = strstr(ins, rep); ++count)
    {
        ins = tmp + len_rep;
    }
    // first time through the loop, all the variable are set correctly
    // from here on,
    //    tmp points to the end of the result string
    //    ins points to the next occurrence of rep in orig
    //    orig points to the remainder of orig after "end of rep"
    tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1);
    if (!result)
    {
        return NULL;
    }
    while (count--)
    {
        ins = strstr(orig, rep);
        len_front = ins - orig;
        tmp = strncpy(tmp, orig, len_front) + len_front;
        tmp = strcpy(tmp, with) + len_with;
        orig += len_front + len_rep; // move to next "end of rep"
    }
    strcpy(tmp, orig);
    return result;
}



char *replace6(char *st, char *orig, char *repl)
{
  //17-18
  static char buffer[4000];
  char *ch;
  if (!(ch = strstr(st, orig)))
   return st;
  strncpy(buffer, st, ch-st);
  buffer[ch-st] = 0;
  sprintf(buffer+(ch-st), "%s%s", repl, ch+strlen(orig));
  return buffer;
}



char* replace3(char* s, char* term, char* new_term)
{
  //error
    char *nw = NULL, *pos;
    char *cur = s;
    while(pos = strstr(cur, term))
    {
        nw = (char*)realloc(nw, pos - cur + strlen(new_term));
        strncat(nw, cur, pos-cur);
        strcat(nw, new_term);
        cur = pos + strlen(term);
    }
    strcat(nw, cur);
    free(s);
    return nw;
}



char *replace2(char *original,char *pattern,char *replacement)
{
  //34-37
  size_t replen = strlen(replacement);
  size_t patlen = strlen(pattern);
  size_t orilen = strlen(original);
  size_t patcnt = 0;
  char * oriptr;
  char * patloc;
  // find how many times the pattern occurs in the original string
  for (oriptr = original; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
  {
    patcnt++;
  }
  {
    // allocate memory for the new string
    size_t retlen = orilen + patcnt * (replen - patlen);
    char * const returned = (char *) malloc( sizeof(char) * (retlen + 1) );
    if (returned != NULL)
    {
      // copy the original string,
      // replacing all the instances of the pattern
      char * retptr = returned;
      for (oriptr = original; patloc = strstr(oriptr, pattern); oriptr = patloc + patlen)
      {
        size_t skplen = patloc - oriptr;
        // copy the section until the occurence of the pattern
        strncpy(retptr, oriptr, skplen);
        retptr += skplen;
        // copy the replacement
        strncpy(retptr, replacement, replen);
        retptr += replen;
      }
      // copy the rest of the string.
      strcpy(retptr, oriptr);
    }
    return returned;
  }
}



char *replace4(char *string, char *oldpiece, char *newpiece)
{
  //20-21
   int str_index, newstr_index, oldpiece_index, end,
   new_len, old_len, cpy_len;
   char *c;
   static char newstring[10000];
   if ((c = (char *) strstr(string, oldpiece)) == NULL)
      return string;
   new_len        = strlen(newpiece);
   old_len        = strlen(oldpiece);
   //str            = strlen(string);
   end            = strlen(string) - old_len;
   //int count;
   //for (count = 0; (strstr(oldpiece, newpiece)); ++count){}
   //newstring = malloc(str + (old_len - new_len) * count + 1);

   oldpiece_index = c - string;
   newstr_index = 0;
   str_index = 0;
   while(str_index <= end && c != NULL)
   {
      /* Copy characters from the left of matched pattern occurence */
      cpy_len = oldpiece_index-str_index;
      strncpy(newstring+newstr_index, string+str_index, cpy_len);
      newstr_index += cpy_len;
      str_index    += cpy_len;
      /* Copy replacement characters instead of matched pattern */
      ///*newstring=realloc(newstring,sizeof(newstring)+new_len+old_len+end+newstr_index);
      strcpy(newstring+newstr_index, newpiece);
      newstr_index += new_len;
      str_index    += old_len;
      /* Check for another pattern match */
      if((c = (char *) strstr(string+str_index, oldpiece)) != NULL)
         oldpiece_index = c - string;
   }
   /* Copy remaining characters from the right of last matched pattern */
   strcpy(newstring+newstr_index,
  string+str_index);
  return newstring;
}



char *replace5(char *orig, char *rep, char *with)
{
  //32-33-35
  char *result;
  char *ins;
  char *tmp;
  int len_rep;
  int len_with;
  int len_front;
  int count;
  if (!orig)
      return NULL;
  if (!rep)
      rep = "";
  len_rep = strlen(rep);
  if (!with)
      with = "";
  len_with = strlen(with);
  ins = orig;
  for (count = 0; (tmp = strstr(ins, rep)); ++count) {
      ins = tmp + len_rep;
  }
  tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1);
  if (!result)
      return NULL;
  while (count--) {
      ins = strstr(orig, rep);
      len_front = ins - orig;
      tmp = strncpy(tmp, orig, len_front) + len_front;
      tmp = strcpy(tmp, with) + len_with;
      orig += len_front + len_rep;
  }
  strcpy(tmp, orig);
  return result;
}

replace4() and replace6() is fast from other, but not malloc,realloc.

Replace() Performance Tests

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <assert.h>
#include <stdint.h>
#include <sys/stat.h>
#include <stdarg.h>
char *temp;
int main()
{
  for(int current=1;current<=80000;current++)
  {
    temp=/*replace6*//*replace3*/replace4("1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890","0","black");
    //printf("%s",temp);
  }
  return 0;
}

How can I make a better replace() function?
Can I add malloc,realloc to replace4() or replace6() ?

I'm voting to close this question as off-topic because the code is working, this question should be on https://codereview.stackexchange.com/. I'm not sure... maybe cause we talk about performance this is the right place. — Stargateur, Jul 03 '17 at 21:47
If the code *knows* the string lengths, it shouldn't use strlen() or strcat(). Also: (in most cases) you shouldn't mix string indexing and pointer arithmetic. — wildplasser, Jul 03 '17 at 21:54
What is the significance of your 'UTF-8 support' requirement? None of the functions validate the strings that they're given to ensure that they are UTF-8. If the inputs are valid UTF-8, the output will be valid UTF-8. Bogus input will produce bogus output. So, if you chop the last byte off a multi-byte character in the replacement, for example (off-by-one on the length), then you may produce dramatically awful output that isn't valid UTF-8, but that's GIGO at work (garbage in, garbage out). — Jonathan Leffler, Jul 03 '17 at 23:16
You should probably review using function pointers to allow you to test all the replace functions in a single execution. — Jonathan Leffler, Jul 03 '17 at 23:18
Also, the cases where string replacement performance seems important, are almost certainly using an inferior algorithm, and could be dramatically enhanced by switching to a more efficient approach *at the algorithm level*. For example, a state machine could do a number of replacements in parallel (all applied on the original input, rather than one after another), in a single pass over the data. Then there are regular expressions (included in POSIX.1-2001 and later C libraries, and available as libraries elsewhere) for even more manipulative power. — Nominal Animal, Jul 03 '17 at 23:57
may say a example function replace() with good performance? @NominalAnimal — C Perfomance, Jul 11 '17 at 01:56
The `replace3()` function is broken — severely broken. Even removing the unwarranted `free(s)` leaves it horribly broken. Of the remainder, `replace4()` and `replace6()` can be stopped from using static data by using `char *buffer = malloc(4000);` or whatever size is statically allocated. Then simple performance measurements indicate that `replace6()` is fastest, then `replace4()`, and `replace8()` is slowest. Example times: `replace1 0.524358` — `replace2 0.516771` — `replace4 0.333612` — `replace5 0.504394` — `replace6 0.052463` — `replace7 0.515424` — `replace8 0.803110`. — Jonathan Leffler, Jul 11 '17 at 04:34
tank you , @JonathanLeffler may you send code of a better way for `replace()`? — C Perfomance, Jul 14 '17 at 01:25
I can't add an answer to the question while it is on hold. I don't have your contact information so I can't yet send to you (see my profile for mine). I'm not sure I have any new version of the code that's worth showing you; all I did was add timing and a test framework. You can find the timing code at https://jleffler.github.com/soq/tree/master/src/libsoq (files `timer.c` and `timer.h`). Note that `replace4()` and `replace6()` are fast but live dangerously. It would be a good idea if you gave credit for where you found each of the different algorithms. — Jonathan Leffler, Jul 14 '17 at 05:14
https://jleffler.github.io/soq/tree/master/src/libsoq this page is 404 , tank you, you can use https://pastebin.com/ for send code. @JonathanLeffler — C Perfomance, Jul 14 '17 at 06:19
Yes; that was a thinko. The correct URL is: https://github.com/jleffler/soq/tree/master/src/libsoq. I don't use pastebin. (Also, please note that the correct term in English is "Thank you".) — Jonathan Leffler, Jul 14 '17 at 06:53
why i not see replace() function at https://github.com/jleffler/soq/blob/master/src/libsoq/timer.c !? @JonathanLeffler — C Perfomance, Jul 14 '17 at 07:27
Note that `replace6()` only replaces the first occurrence of the target string; that explains why it is so fast (it isn't correct). Both `replace4()` and `replace6()` return the original string if there was no match — leaking memory if you casually replace the static buffer with a dynamically allocated buffer. — Jonathan Leffler, Jul 15 '17 at 20:39

score 2 · Answer 1 · edited Jul 03 '17 at 23:08

Very like none of your presented solutions is the fastest you can write for a balanced performance comparison.

First of all there are a couple of points regarding function 4 and 6:

they are not re-entrant, since they use a function-static buffer
obviously they don't work above a certain input-size
if you do not need re-entrant functions they provide the fastest memory allocation, because they cost nothing after the first call
if you do not copy the result the next call will overwrite it, although the pointer will remain valid, probably leading to strange bugs
function 6 furthermore only seems to replace one occurence

Function 3 will very likely perform worse than the others, because it will allocate memory more than once, with the possible exception of it being faster in case the buffer is already large enough for the result and might do nothing.

For the remaining functions, I note that they follow this structural pattern:

scan over the input to count the number of pattern occurrences
allocate sufficient memory for the output
copy input to output while replacing the character you want to substitute
as a side-note: they seem to do different checks on the input, and I have not checked which ones check all common corner cases, but function 8 e.g. does no NULL-checks at all, which may be the right thing in case you trust your inputs, or it is just flat out ub. You would not want to sacrifice correctness.

It is probably easier at this point to divide and conquer and solve these independently, because the memory allocation is likely going to account for a substantial part of the cost of a call. Thus it is unclear, if you can gain anything, by e.g. iterating over parts of the string doing a replace on cache-sized chunks of the input and estimating the total size using a heuristic after the first block. Using a heuristic for the allocation may however be OK, but you will have to balance costly re-allocations/copies against wasting memory.

Regarding 1. I would say that you cannot avoid a full-length scan if you actually want to know the exact amount of memory you need, and from personal experience and SO I would say strstr is typically very fast. I would expect low returns from optimizing here.

Part 3. is similar in this regard, as using the c std-lib copy functions is probably going to result in the compiler using some hand-crafted assembly that destroys everything you can write.

The second step however can be optimized well in my opinion:

First of all note that it is possible to do an inplace substring-replace if the replacement is no longer than the sequence to replace
Second, a caller might pass in a buffer that is larger than the string it contains, in which case the input-buffer may already be sufficiently large
Third, a caller might have alternative means (e.g. a memory pool) that allows him to do a faster allocation, if you allow him to pass in the output buffer

That means that if you provide a call that allows the caller to pass in the buffer, you may end up faster.

You can therefore implement 1. as a function which counts the number of occurrences of a pattern in a string (very likely quite useful on its own), 2. can be done by the user, and 3. can be a function that assumes it receives a sufficiently large output buffer. If you feel like you want the convenience of doing one call, you can just use these functions internally in a 4th helper, although you would probably want to at least check the size of the input and work in-place in case you can.

Regarding your tests, you are currently doing just one check which covers a VERY simple case. If you want to test the performance of a call in a meaningful way you should consider at least the following:

Look at the dimensions that influence the performance of the algorithm such as:
- size of the input-string
- size of the sequence to replace
- size of the sequence to replace with
- check especially how performance changes when crossing L1/L2 cache limits and page-size multiples
Do a couple of iterations before the timing to warm up (branch-predict etc.)
Operate on random sequences simply ensuring their dimensional properties to prevent impacting the caching behavior, because you operate on the same 100000 times
Collect enough samples to eliminate the impact of outliers (I think you are fine on that part with 80k iterations)
Keep measurements for the different dimensions separate initially, look at them independent, only then fuse them to get an overall picture

Ultimately, know that you can look at the assembly and try to improve it, if you REALLY know what you do. Also, if you can rule out certain cases, because you know you work in a special subspace of the general problem domain, that is frequently where you can gain most by doing a special solution just for your case.

Very good, May say example code function for replace() ? – C Perfomance Jul 04 '17 at 10:33 — C Perfomance, Jul 04 '17 at 10:33

What is Best way for Replace with More Performance Using C?

I did in-depth research and found the following Replace() candidates:

Replace() Performance Tests

1 Answers1

I did in-depth research and found the following `Replace()` candidates: