I'm trying to convert a boost::string_view to an integer. This post discusses using from_chars()
, but this is available in C++17 and I'm looking for a C++14 solution.
What would be the best option here?
I'm trying to convert a boost::string_view to an integer. This post discusses using from_chars()
, but this is available in C++17 and I'm looking for a C++14 solution.
What would be the best option here?
In the spirit [sic] of @t-niese's now-deleted answer I'd suggest a Spirit approach. Since C++14 is on the table, let's use X3:
template <typename Int = int, unsigned Radix=10>
Int parse_int(std::string_view sv) {
static constexpr boost::spirit::x3::int_parser<Int, Radix> p{};
Int val;
if (!parse(begin(sv), end(sv), p, val))
throw std::runtime_error("parse_int");
return val;
}
This surprisingly small thing does surprisingly many things. It can parse into any integer type, including non-standard (like Boost Multiprecision, GMP or MPFR).
You can even make it parse integers into non-integral types if you really want although it will not parse non-integer formats, see How to parse space-separated floats in C++ quickly? for that if you're interested in more.
Also see there to learn just how performant these routines are in practice.
int main() {
expect("0", 0);
expect("+0", 0);
expect("-0", 0);
expect("+1", 1);
expect("-1", -1);
expect<int8_t>("-127", -127);
expect<int8_t>("-128", -128);
// edge case
expect<uint8_t>("-1", -1); // surprising?
expect<unsigned long>("-1", std::stoul("-1")); // Nope, matches stoul!
auto std_roundtrip = [](auto value) { expect(std::to_string(value), value); };
std_roundtrip(std::numeric_limits<intmax_t>::min());
std_roundtrip(std::numeric_limits<intmax_t>::max());
std_roundtrip(std::numeric_limits<uintmax_t>::min());
std_roundtrip(std::numeric_limits<uintmax_t>::max());
// radix
expect<int, 2>("-01011", -11);
expect<int, 8>("-01011", -521);
expect<int, 16>("a0", 160);
// invalids
should_fail(""); // empty
should_fail("+"); // lone sign
should_fail("+ 9999"); // space
// extensibility:
using Large = boost::multiprecision::int1024_t;
for (auto huge : { Large(42) << 700, -(Large(42) << 701) })
expect(boost::lexical_cast<std::string>(huge), huge);
// doesn't require the target type to be integral either
using Decimal = boost::multiprecision::cpp_dec_float_50;
expect<Decimal>("123456789", 123456789);
// but it's still an integer parser:
should_fail<Decimal>("1e10");
should_fail<Decimal>("1.0");
}
Prints
"0" -> 0 OK
"+0" -> 0 OK
"-0" -> 0 OK
"+1" -> 1 OK
"-1" -> -1 OK
"-127" -> OK
"-128" -> € OK
"-1" -> ÿ OK
"-1" -> 18446744073709551615 OK
"-9223372036854775808" -> -9223372036854775808 OK
"9223372036854775807" -> 9223372036854775807 OK
"0" -> 0 OK
"18446744073709551615" -> 18446744073709551615 OK
"-01011" -> -11 OK
"-01011" -> -521 OK
"a0" -> 160 OK
OK (should not parse)
OK (should not parse)
OK (should not parse)
"220925707865031687304121575080965403953114271718573302098927797928886750480477394528773994523658714951533284691959485143946816154507719762251368220367378995698119394187394673124049877831141554125394316572902817792" -> 220925707865031687304121575080965403953114271718573302098927797928886750480477394528773994523658714951533284691959485143946816154507719762251368220367378995698119394187394673124049877831141554125394316572902817792 OK
"-441851415730063374608243150161930807906228543437146604197855595857773500960954789057547989047317429903066569383918970287893632309015439524502736440734757991396238788374789346248099755662283108250788633145805635584" -> -441851415730063374608243150161930807906228543437146604197855595857773500960954789057547989047317429903066569383918970287893632309015439524502736440734757991396238788374789346248099755662283108250788633145805635584 OK
"123456789" -> 1.23457e+08 OK
OK (should not parse)
OK (should not parse)
NOTE UPDATED to the better non-throwing interface which updates the string_view to reflect what part of input was consumed (see below in BONUS TOPICS). Last two tests now print:
"3e10" -> 3 OK -> trailing "e10" "-7.0" -> -7 OK -> trailing ".0"
#include <boost/spirit/home/x3.hpp>
template <typename Int = int, unsigned Radix=10>
static inline std::optional<Int> parse_int(std::string_view& remain) {
static constexpr boost::spirit::x3::int_parser<Int, Radix> p{};
Int val;
auto f = begin(remain), l = end(remain);
if (!parse(f, l, p, val))
return std::nullopt;
remain = remain.substr(f - begin(remain));
return val;
}
#include <iostream>
#include <iomanip>
#include <boost/lexical_cast.hpp>
template <typename Int>
std::string to_string(Int const& value) {
using widen = std::common_type_t<int, Int>; // pesky chars keep showing as non-numbers
return boost::lexical_cast<std::string>(static_cast<widen>(value));
}
template <typename Int = int, unsigned Radix = 10>
void expect(std::string_view input, Int expected) {
std::cout << std::quoted(input);
if (auto actual = parse_int<Int, Radix>(input)) {
if (expected == actual.value())
std::cout << " -> " << to_string(actual.value()) << " OK\n";
else
std::cout << " -> " << to_string(actual.value()) << " MISMATCH (" << to_string(expected) << " expected instead)\n";
if (!input.empty())
std::cout << " -> trailing " << std::quoted(input) << "\n";
} else {
std::cout << " FAILED (" << to_string(expected) << " expected instead)\n";
}
}
template <typename Int = int, unsigned Radix = 10>
void should_fail(std::string_view input) {
std::cout << std::quoted(input);
if (auto actual = parse_int<Int, Radix>(input)) {
std::cout << " -> " << to_string(actual.value())
<< " MISMATCH (expected to fail parse instead)\n";
if (!input.empty())
std::cout << " -> trailing " << std::quoted(input) << "\n";
} else {
std::cout << " OK (should not parse)\n";
}
}
#include <boost/multiprecision/cpp_dec_float.hpp>
#include <boost/multiprecision/cpp_int.hpp>
int main() {
expect("0", 0);
expect("+0", 0);
expect("-0", 0);
expect("+1", 1);
expect("-1", -1);
expect<int8_t>("-127", -127);
expect<int8_t>("-128", -128);
// edge case
expect<uint8_t>("-1", -1); // surprising?
expect<unsigned long>("-1", std::stoul("-1")); // Nope, matches stoul!
auto std_roundtrip = [](auto value) { expect(std::to_string(value), value); };
std_roundtrip(std::numeric_limits<intmax_t>::min());
std_roundtrip(std::numeric_limits<intmax_t>::max());
std_roundtrip(std::numeric_limits<uintmax_t>::min());
std_roundtrip(std::numeric_limits<uintmax_t>::max());
// radix
expect<int, 2>("-01011", -11);
expect<int, 8>("-01011", -521);
expect<int, 16>("a0", 160);
// invalids
should_fail(""); // empty
should_fail("+"); // lone sign
should_fail("+ 9999"); // space
// extensibility:
using Large = boost::multiprecision::int1024_t;
for (auto huge : { Large(42) << 700, -(Large(42) << 701) })
expect(boost::lexical_cast<std::string>(huge), huge);
// doesn't require the target type to be integral either
using Decimal = boost::multiprecision::cpp_dec_float_50;
expect<Decimal>("123456789", 123456789);
// but it's still an integer parser:
expect<Decimal>("3e10", 3);
expect<Decimal>("-7.0", -7);
}
To parse strictly unsigned integers (so -1
becomes invalid input), replace x3::int_parser
with x3::uint_parser
. (Note that the target type can be signed regardless of allowed input formats).
template <typename Int = int, unsigned Radix=10>
Int parse_uint(std::string_view sv) {
static constexpr boost::spirit::x3::uint_parser<Int, Radix> p{};
Int val;
if (!parse(begin(sv), end(sv), p >> boost::spirit::x3::eoi, val))
throw std::runtime_error("parse_int");
return val;
}
To get the from_chars
behaviour where you are left with the next character unparsed, just drop the x3::eoi
expression:
template <typename Int = int, unsigned Radix=10>
Int parse_int(std::string_view sv, std::string_view& remain) {
static constexpr boost::spirit::x3::int_parser<Int, Radix> p{};
Int val;
auto f = begin(sv), l = end(sv);
if (!parse(f, l, p, val))
throw std::runtime_error("parse_int");
remain = { &*f, size_t(std::distance(f,l)) };
return val;
}
See its behaviour Live On Coliru
std::string_view input = "123bogus", remain;
std::cout
<< "input: " << std::quoted(input) << " -> "
<< parse_int(input, remain)
<< " remaining: " << std::quoted(remain)
<< std::endl;
Prints
input: "123bogus" -> 123 remaining: "bogus"
Actually, as an afterthought, optional<>
could be far better than exceptions to signal failure:
template <typename Int = int, unsigned Radix=10>
static inline std::optional<Int> parse_int(std::string_view& remain) {
static constexpr boost::spirit::x3::int_parser<Int, Radix> p{};
Int val;
auto f = begin(remain), l = end(remain);
if (!parse(f, l, p, val))
return std::nullopt;
remain = remain.substr(f - begin(remain));
return val;
}
Combines the best of both worlds. And e.g. this:
int main() {
auto input = "123bogus";
std::string_view remain = input;
if (auto parsed = parse_int(remain))
printf("input: '%s' -> %d remaining: '%s'\n",
input, parsed.value(), remain.data());
}
Compiles all the way down to: Compiler Explorer
.LC0:
.string "123bogus"
.LC1:
.string "input: '%s' -> %d remaining: '%s'\n"
main:
sub rsp, 8
mov ecx, OFFSET FLAT:.LC0+3
mov edx, 123
xor eax, eax
mov esi, OFFSET FLAT:.LC0
mov edi, OFFSET FLAT:.LC1
call printf
xor eax, eax
add rsp, 8
ret
In boost you could use spirit::qi
, to parse a sequence given by iterators into a numeric type:
#include <boost/spirit/include/qi.hpp>
#include <boost/utility/string_view.hpp>
int main() {
namespace qi = boost::spirit::qi;
boost::string_view number_view("12345");
int dest;
if (qi::parse(
number_view.begin(), number_view.end(),
qi::int_,
dest)) {
std::cout << dest << std::endl;
}
return 0;
}
Prints
12345