The snippet below comes from this answer.
#include <string>
#include <vector>
void tokenize(std::string str, std::vector<string> &token_v){
size_t start = str.find_first_not_of(DELIMITER), end=start;
while (start != std::string::npos){
// Find next occurence of delimiter
end = str.find(DELIMITER, start);
// Push back the token found into vector
token_v.push_back(str.substr(start, end-start));
// Skip all occurences of the delimiter to find new start
start = str.find_first_not_of(DELIMITER, end);
}
}
Now for a buffer like this:
std::array<char, 150> buffer;
I want to have a sting_view
(that points to the buffer) and pass it to the tokenizer function and the tokens should be returned in the form of std::string_view
s via an out parameter (and not a vector) and also it will return the numbers of tokens that were extracted. The interface looks like this:
size_t tokenize( const std::string_view inputStr,
const std::span< std::string_view > foundTokens_OUT,
const size_t expectedTokenCount )
{
// implementation
}
int main( )
{
std::array<char, 150> buffer { " @a hgs -- " };
const std::string_view sv { buffer.data( ), buffer.size( ) };
const size_t expectedTokenCount { 4 };
std::array< std::string_view, expectedTokenCount > foundTokens; // the span for storing found tokens
const size_t num_of_found_tokens { tokenize( sv, foundTokens, expectedTokenCount ) };
if ( num_of_found_tokens == expectedTokenCount )
{
// do something
std::clog << "success\n" << num_of_found_tokens << '\n';
}
for ( size_t idx { }; idx < num_of_found_tokens; ++idx )
{
std::cout << std::quoted( foundTokens[ idx ] ) << '\n';
}
}
I would appreciate it if someone could implement a similar tokenize function but for string_view
that splits based on space and tab characters. I tried to write one myself but it didn't work as expected (didn't support the tab). Also, I want this function to stop the work and return expectedTokenCount + 1
if the number of tokens found in inputStr
exceeds the expectedTokenCount
. This is obviously more efficient.
Here is my dummy version:
size_t tokenize( const std::string_view inputStr,
const std::span< std::string_view > foundTokens_OUT,
const size_t expectedTokenCount )
{
if ( inputStr.empty( ) )
{
return 0;
}
size_t start { inputStr.find_first_not_of( ' ' ) };
size_t end { start };
size_t foundTokensCount { };
while ( start != std::string_view::npos && foundTokensCount < expectedTokenCount )
{
end = inputStr.find( ' ', start );
foundTokens_OUT[ foundTokensCount++ ] = inputStr.substr( start, end - start );
start = inputStr.find_first_not_of( ' ', end );
}
return foundTokensCount;
}
Note: The ranges library does not have proper support yet (at least on GCC) so I'm trying to avoid that.