How can one write a helper method that for a given string like foo[0][1][2][3]
splits it into the array's name and collection (e.g. vector) of indices? In the example above it should produce foo
and 0, 1, 2, 3
respectively.
The format of a string is always like name[index_0][index_1]....[index_n]
.
The number of indices (n
) is not known in advance. All should be numbers. For simplicity, no spaces are allowed in a string. The name of an array (name
) can be arbitrary. The helper function throws in case the string is not according to the specified format.
Performance is not an issue here. I'm looking for the most elegant/short solution.
UPDATE
Well, regular expressions were suggested in the very first comment. I was new to the area and went through the hassle of getting it done in C++. Please, feel free to simplify it. In the meantime, two non-regular expressions based solutions were proposed by @MartinYork and @Frodyne. At a first glance, regular expressions didn't bring anything fascinating here. The solution does not seem to be much shorter or much more elegant in my mind.
#include <stdexcept>
#include <iostream>
#include <string>
#include <regex>
#include <tuple>
std::tuple<std::string, std::vector<int>> helper(std::string str) {
// used to validate that the incoming string is in format
// array[0][1][2]
const std::regex rx_validate{
"([[:alnum:]]+)((?:\\[[[:digit:]]+\\])+)$"};
std::match_results<std::string::const_iterator> match_results;
std::regex_search(str, match_results, rx_validate);
// regex_search array[0][1][2] gives
// match_results[0]: array[0][1][2]
// match_results[1]: array
// match_results[2]: [0][1][2]
if (match_results.size() == 3) {
std::vector<int> indices;
// used to extract indices, it is guaranteed that
// numbers are between brackets, no extra checks
// needed
const std::regex rx_index{"[0-9]+"};
const std::string match{match_results[2]};
auto it = std::sregex_iterator(match.begin(), match.end(), rx_index);
for (; it != std::sregex_iterator(); ++it)
indices.push_back(std::stoi((*it).str()));
return std::make_tuple(match_results[1], indices);
} else {
throw std::invalid_argument("Invalid format (" + str + ")");
}
}
int main() {
const std::string str{"a[0][1][2][3][4][5]"};
const auto tuple = helper(str);
std::cout << "Name: " << std::get<0>(tuple) << std::endl;
for (int index: std::get<1>(tuple))
std::cout << index << std::endl;
}
UPDATE2
@rici suggested a modification of an algorithm that used regular expressions. It's a bit shorter and more concise.
I was really interested in comparing those algorithms in terms of performance.
Not going to advocate the numbers :-) Everybody should decide on their own.
The below program compiled as g++ -std=c++11 -Ofast
and running on i7-8550U
gives:
Regex measurements...
min/max/avg 955/154859/1072.88
Stream measurements...
min/max/avg 722/41252/800.402
#include <iostream>
#include <cstdlib>
#include <cstdint>
#include <limits>
#include <string>
#include <vector>
#include <regex>
#include <tuple>
#include <time.h>
inline uint64_t Timestamp() {
timespec time_now;
clock_gettime(CLOCK_REALTIME, &time_now);
return static_cast<uint64_t>(time_now.tv_sec) * 1000000000ULL + time_now.tv_nsec;
}
std::tuple<std::string, std::vector<int>> helper_stream(std::string const& info)
{
std::stringstream is(info);
std::string name;
std::vector<int> index;
if (std::getline(is, name, '[')) {
is.putback('[');
name.erase(std::remove(std::begin(name), std::end(name), ' '), std::end(name));
int value;
char b1;
char b2;
while(is >> b1 >> value >> b2 && b1 == '[' && b2 == ']') {
index.push_back(value);
}
}
return std::make_tuple(name, index);
}
std::tuple<std::string, std::vector<int>> helper_regex(std::string str) {
static const std::regex strip_prefix{"^[[:alpha:]][[:alnum:]]*"};
static const std::regex index{"\\[([[:digit:]]+)\\]|."};
std::match_results<std::string::const_iterator> match;
if (std::regex_search(str, match, strip_prefix)) {
auto e = match[0].second;
std::vector<int> indices;
for (auto it = std::sregex_iterator(e, str.end(), index), lim = std::sregex_iterator(); it != lim; ++it) {
if ((*it)[1].matched)
indices.push_back(std::stoi((*it)[1]));
else throw std::invalid_argument("Invalid format");
}
return std::make_tuple(std::string(str.cbegin(), e), indices);
}
else
throw std::invalid_argument("Invalid format (" + str + ")");
}
std::string make_str(int n) {
std::string str{"array"};
for (int i = 0; i < n; ++i) {
str += "[";
str += std::to_string(std::rand());
str += "]";
}
return str;
}
template <typename F>
void measurements(F f) {
constexpr int kNumRounds = 1000000;
constexpr int kLength = 3;
std::vector<uint64_t> time_diffs(kNumRounds);
for (int i = 0; i < kNumRounds; ++i) {
const std::string str{make_str(kLength)};
const auto before = Timestamp();
f(str);
const auto after = Timestamp();
time_diffs[i] = after - before;
}
uint64_t min{std::numeric_limits<uint64_t>::max()};
uint64_t max{std::numeric_limits<uint64_t>::min()};
uint64_t sum{0};
for (int i = 0; i < kNumRounds; ++i) {
const auto time_diff = time_diffs[i];
if (time_diff < min)
min = time_diff;
if (time_diff > max)
max = time_diff;
sum += time_diff;
}
std::cout << "min/max/avg " << min << "/" << max << "/" << static_cast<double>(sum) / kNumRounds << std::endl;
}
int main() {
std::cout << "Regex measurements..." << std::endl;
measurements(helper_regex);
std::cout << "Stream measurements..." << std::endl;
measurements(helper_stream);
return 0;
}