1

I have the following,

class BATSTradeMsg : public BATSMessageBase
{

    BATSTradeMsg(int timestamp, char msgtype, uint64_t orderId, char side, uint32_t shares,
                    std::string const &symbol, uint64_t price, uint64_t execId) :
            BATSMessageBase(timestamp, msgtype),
            m_orderId(orderId),
            m_side(side),
            m_shares(shares),
            m_symbol(symbol),
            m_price(price),
            m_execId(execId)
    {
    }

    uint64_t    m_orderId; // Base 36 Numeric values come over the wire in ascii
    char        m_side;
    uint32_t    m_shares;
    std::string m_symbol;
    uint64_t    m_price;
    uint64_t    m_execId; // Base 36 Numeric values come over the wire in ascii

};

// order and execution ids are 12 characters base 36
qi::uint_parser< uint64_t, 36, 12, 12 > p_orderId;
qi::uint_parser< uint64_t, 36, 12, 12 > p_execId;
qi::uint_parser< uint32_t, 10,  6,  6 > p_shares;
qi::uint_parser< uint32_t, 10, 10, 10 > m_price;
qi::uint_parser< uint32_t, 10,  8,  8 > p_ts;

if (msgtype == BATSTradeMsg::longMsgCode)
    m_wire_msg = ( p_ts >> qi::char_(msgtype)
                        >> p_orderId
                        >> qi::char_(BATSTradeMsg::sideFlag)
                        >> p_shares
                        >> qi::as_string[qi::repeat(8)[qi::char_]]
                        >> m_price
                        >> p_execId )
            [qi::_val = phi::construct<BATSTradeMsg>(
                    qi::_1, qi::_2, qi::_3, qi::_4, qi::_5, qi::_6, qi::_7, qi::_8)];

else if ( msgtype == BATSTradeMsg::shortMsgCode )
    m_wire_msg = ( p_ts >> qi::char_(msgtype)
                        >> p_orderId
                        >> qi::char_(BATSTradeMsg::sideFlag)
                        >> p_shares
                        >> qi::as_string[qi::repeat(6)[qi::char_]]
                        >> m_price
                        >> p_execId )
            [qi::_val = phi::construct<BATSTradeMsg>(
                    qi::_1, qi::_2, qi::_3, qi::_4, qi::_5, qi::_6, qi::_7, qi::_8)];

Basically there are two message types, long and short, with the only difference being the 6th field can be a 6 or 8 character string.

However, i realised that i cannot do something like,

m_wire_msg = ( p_ts >> qi::char_(msgtype)
                        >> p_orderId
                        >> qi::char_(BATSTradeMsg::sideFlag)
                        >> p_shares
                        >> ( qi::as_string[qi::repeat(6)[qi::char_]] | qi::as_string[qi::repeat(8)[qi::char_]])
                        >> m_price
                        >> p_execId )
            [qi::_val = phi::construct<BATSTradeMsg>(
                    qi::_1, qi::_2, qi::_3, qi::_4, qi::_5, qi::_6, qi::_7, qi::_8)];

and have it parse both these message correctly,

"28800168P1K27GA00000YB000300AAPL  00018319001K27GA00000Z"
"28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z"
Danny
  • 391
  • 2
  • 12
  • 1
    If you provide a full (almost?) compilable example (with desired input and expected results) you'll have a much better chance of getting help. – llonesmiz Jun 07 '18 at 20:48
  • Is there a character missing in the first example message? How are we supposed to know that `P` or `r` are the codes for "short" vs "long"? Are we having to guess that "side" can be "[B]uy" or "[S]ell"? – sehe Jun 07 '18 at 22:19
  • sorry for the confusion, I'm not sure why but the formatting for the msg with "..... AAPL00018..." above appeared as having only one space, instead of the correct 2 spaces. let me make it code instead – Danny Jun 08 '18 at 06:30
  • no wonder everyone is confused. so sorry about it. I have fixed up. – Danny Jun 08 '18 at 06:30

1 Answers1

2

Here's what I'd suggest:

Data Type

namespace BATS {
    enum class MessageCode : char { Long = 'r', Short = 'P' };

    struct MessageBase {
        int         timestamp;
        MessageCode msgtype;
    };

    struct TradeMsg : MessageBase {
        uint64_t    orderId; // Base 36 Numeric values come over the wire in ascii
        char        side;
        uint32_t    shares;
        std::string symbol;
        uint64_t    price;
        uint64_t    execId;  // Base 36 Numeric values come over the wire in ascii
    };
}

Then use simple Fusion adaptation instead of semantic actions¹:

BOOST_FUSION_ADAPT_STRUCT(BATS::TradeMsg, timestamp, msgtype, orderId, side, shares, symbol, price, execId)

Parser

The parser then basically becomes:

I assumed "side" could be "B" or "S" (for Buy or Sell).

template <typename It> 
struct Parser : qi::grammar<It, BATS::TradeMsg()> {
    Parser() : Parser::base_type(r_wire_msg) {

        // see below

        r_wire_msg 
            = r_long_wire_msg
            | r_short_wire_msg
            ;

        BOOST_SPIRIT_DEBUG_NODES((r_wire_msg)(r_short_wire_msg)(r_long_wire_msg))
    }

  private:
    // order and execution ids are 12 characters base 36
    qi::uint_parser<uint64_t, 36, 12, 12> p_orderId;
    qi::uint_parser<uint64_t, 36, 12, 12> p_execId;
    qi::uint_parser<uint32_t, 10,  6,  6> p_shares;
    qi::uint_parser<uint32_t, 10, 10, 10> p_price;
    qi::uint_parser<uint32_t, 10,  8,  8> p_ts;
    qi::rule<It, BATS::TradeMsg()> r_wire_msg, r_long_wire_msg, r_short_wire_msg;
};

Of course the two sub-rules are very similar:

        r_long_wire_msg
             = p_ts 
            >> qi::char_(BATS::MessageCode::Long)
            >> p_orderId
            >> qi::char_("BS")
            >> p_shares
            >> qi::as_string[qi::repeat(8)[qi::char_]]
            >> p_price
            >> p_execId 
            ;

        r_short_wire_msg
             = p_ts 
            >> qi::char_(BATS::MessageCode::Short)
            >> p_orderId
            >> qi::char_("BS")
            >> p_shares
            >> qi::as_string[qi::repeat(6)[qi::char_]]
            >> p_price
            >> p_execId 
            ;

Demo Program

Here's 3 test cases dissected:

  1. the "short" example from the question (ERROR)
  2. my attempted fix of the "short" example
  3. the "long" example from the question

Live On Coliru

int main() {
    using It = std::string::const_iterator;
    Parser<It> const parser;

    for (std::string const input : {
            "28800168P1K27GA00000YB000300AAPL  00018319001K27GA00000Z",
            "28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z" })
    {
        std::cout << "Input: " << std::quoted(input) << "\n";

        It f = begin(input), l = end(input);

        BATS::TradeMsg msg;
        if (parse(f, l, parser, msg)) {
            std::cout << "Parsed\n";
        } else {
            std::cout << "Parse failed\n";
        }

        if (f!=l)
            std::cout << "Remaining data: " << std::quoted(std::string(f,l), '\'') << "\n";
    }
}

Prints

Input: "28800168P1K27GA00000YB000300AAPL  00018319001K27GA00000Z"
Parsed
Input: "28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z"
Parsed

¹ Boost Spirit: "Semantic actions are evil"? <-- note, that's the title of a question

sehe
  • 374,641
  • 47
  • 450
  • 633
  • actually the full source is here. https://github.com/bigfatwhale/bats_pitch_parser/blob/master/tests/pitch_parser_test.cpp If you look at the test case test_parse_trade, you can see that it check that both the long and short msgs in the BATS specification are parsed properly. – Danny Jun 08 '18 at 06:02
  • The first test case, parse("28800168P1K27GA00000YB000300AAPL 00018319001K27GA00000Z") differs from the second one, parse("28800168r1K27GA00000YB000300AAPLSPOT00018319001K27GA00000Z") by just being 2 characters longer for the stock symbol. Hence my question of why can't i use the OR construct like below, ( qi::as_string[qi::repeat(6)[qi::char_]] | qi::as_string[qi::repeat(8)[qi::char_]]) to parse them both? Let's leave aside whether semantic actions are evil for now... – Danny Jun 08 '18 at 06:04
  • 1
    That doesn't work because both `qi::repeat(6)[qi::char_]` and `qi::repeat(8)[qi::char_]` will usually succeed at that position. If you write `a | b` when `a` and `b` will always match, the `b` branch will never be taken. – sehe Jun 08 '18 at 06:56