I was in the middle of arriving similar to Ishmael's solution, only I was tempted to just use the 256-byte boolean array as opposed to Ishamel's more cache-friendly 64-byte bitmask array.
So I was really curious about how these perform against each other and whipped up a quick benchmark.
Benchmark Games
#include <string>
#include <algorithm>
#include <iostream>
#include <cassert>
#include <vector>
#include <ctime>
#include <cctype>
using namespace std;
static string optimize_original(string& toBeProcessed, const string& toBeIgnored, char ch)
{
string upperProcessed = toBeProcessed;
transform(upperProcessed.begin(), upperProcessed.end(), upperProcessed.begin(), ::toupper);
string upperIgnored = toBeIgnored;
transform(upperIgnored.begin(), upperIgnored.end(), upperIgnored.begin(), ::toupper);
vector<char> vectorAfterProcessed;
bool found;
for(size_t i = 0; i <= upperProcessed.size(); i++)
{
found = false;
for(size_t j = 0; j <= upperIgnored.size(); j++)
{
if(upperProcessed[i] == upperIgnored[j])
{
vectorAfterProcessed.push_back(upperProcessed[i]);
found = true;
}
}
if(found != true)
vectorAfterProcessed.push_back(ch);
}
return string(vectorAfterProcessed.begin(), vectorAfterProcessed.end());
}
static string optimize_paul(string toBeProcessed, string toBeIgnored, char ch)
{
transform(toBeProcessed.begin(), toBeProcessed.end(), toBeProcessed.begin(), ::toupper);
transform(toBeIgnored.begin(), toBeIgnored.end(), toBeIgnored.begin(), ::toupper);
string test;
size_t start = 0;
while (start < toBeProcessed.size())
{
size_t n = toBeProcessed.find_first_not_of(toBeIgnored, start);
if ( n != string::npos)
{
toBeProcessed[n] = ch;
start = n+1;
}
else
break;
}
return toBeProcessed;
}
static string optimize_ike(string input, const string& to_keep, char rep)
{
bool used[256] = {false};
for (size_t j=0; j < to_keep.size(); ++j)
{
used[tolower(to_keep[j])] = true;
used[toupper(to_keep[j])] = true;
}
for (size_t j=0; j < input.size(); ++j)
{
if (used[input[j]])
input[j] = toupper(input[j]);
else
input[j] = rep;
}
return input;
}
static string optimize_ishmael(string input, const string& to_keep, char rep)
{
uint32_t bitmask[8] = {0};
for (size_t j=0; j < to_keep.size(); ++j)
{
const uint8_t lower = static_cast<uint8_t>(tolower(to_keep[j]));
bitmask[lower >> 5] |= (1 << (lower & 31));
const uint8_t upper = static_cast<uint8_t>(toupper(to_keep[j]));
bitmask[upper >> 5] |= (1 << (upper & 31));
}
for (size_t j=0; j < input.size(); ++j)
{
const uint8_t chr = static_cast<uint8_t>(input[j]);
if (bitmask[chr >> 5] & (1 << (chr & 31)))
input[j] = toupper(input[j]);
else
input[j] = rep;
}
return input;
}
static double sys_time()
{
return static_cast<double>(clock()) / CLOCKS_PER_SEC;
}
enum {string_len = 10000000};
enum {num_tests = 5};
int main()
{
const string to_keep = "abcd";
for (int k=0; k < 5; ++k)
{
string in;
for (int j=0; j < string_len; ++j)
in += rand() % 26 + 'A';
double time = sys_time();
volatile const string a = optimize_original(in, to_keep, '*');
cout << ((sys_time() - time) * 1000) << " ms for original" << endl;
time = sys_time();
volatile const string b = optimize_paul(in, to_keep, '*');
cout << ((sys_time() - time) * 1000) << " ms for Paul's" << endl;
time = sys_time();
volatile const string c = optimize_ike(in, to_keep, '*');
cout << ((sys_time() - time) * 1000) << " ms for Ike's" << endl;
time = sys_time();
volatile const string d = optimize_ishmael(in, to_keep, '*');
cout << ((sys_time() - time) * 1000) << " ms for Ishmael's" << endl;
cout << endl;
}
}
Results
515 ms for original
218 ms for Paul's
78 ms for Ike's
63 ms for Ishmael's
514 ms for original
203 ms for Paul's
78 ms for Ike's
73 ms for Ishmael's
515 ms for original
218 ms for Paul's
78 ms for Ike's
63 ms for Ishmael's
515 ms for original
202 ms for Paul's
67 ms for Ike's
62 ms for Ishmael's
515 ms for original
218 ms for Paul's
78 ms for Ike's
62 ms for Ishmael's
Winner -- Ishamel
The winner appears to be Ishmael when it comes to speed, arriving at not only the theoretically fastest solution at O(N+M) [the original is O(N*M)] but also the most micro-efficient.
I believe his solution is clearly superior to mine. I just wanted to offer the benchmarks comparing all of these for posterity.
Paul's solution is perhaps the most elegant from a modern C++ perspective, utilizing what's available and standard to replace the inner loop with higher-level logic. Speed isn't always (or even usually) everything.