C++ 11 has a regular expression library in the standard. So, unless you want to "do it by hand", you could just use <regex>
header file.
#include <iostream>
#include <cinttypes>
#include <string>
#include <regex>
int main(int argc, const char* argv[]) {
std::regex words("\\w+");
std::string input("how are you?");
size_t nwords = 0;
for (auto iter = std::sregex_iterator(input.begin(),
input.end(),
words);
iter != std::sregex_iterator();
++iter) {
std::cout << (*iter).str() << std::endl;
nwords++;
}
std::cout << nwords << std::endl;
return 0;
}
If you do want to code this by hand, it might be easiest to think about the problem in terms of finite state machines.
- There are 2 states: {IN_WORD, IN_SPACES}. The current character in your iteration defines the current state.
- When in state IN_WORD, you collect the characters into a string.
- When in state IN_SPACES, you just skip the character.
- On a transition from IN_WORD -> IN_SPACES, a word is done and you increase your word counter.
- If you are in state IN_WORD when the iteration is done (past last character), you need to increase your word counter as well.
#include <iostream>
#include <cinttypes>
#include <string>
#include <regex>
#include <cctype>
size_t manual_word_counter( const std::string& input) {
if (input.empty()) // empty string is easy...
return UINTMAX_C(0);
enum State { IN_WORD, IN_SPACES };
size_t index = UINTMAX_C(0);
auto determine_state = [&input, &index] () -> State {
auto c = input[index];
if (std::isspace(c) || std::ispunct(c))
return IN_SPACES;
return IN_WORD;
};
size_t counter = UINTMAX_C(0);
State currentState = determine_state();
for (index = 1; index < input.size(); index++) {
State newState = determine_state();
if (currentState == IN_WORD && newState == IN_SPACES)
counter++;
currentState = newState;
}
if (currentState == IN_WORD)
counter++;
return counter;
}
int main(int argc, const char* argv[]) {
std::regex words("\\w+");
std::string input("how are you?");
size_t nwords = 0;
for (auto iter = std::sregex_iterator(input.begin(),
input.end(),
words);
iter != std::sregex_iterator();
++iter) {
std::cout << (*iter).str() << std::endl;
nwords++;
}
std::cout << nwords << std::endl;
std::cout
<< "manual solution yields: "
<< manual_word_counter(input) << " words." << std::endl;
return 0;
}