1

Developping with Boost Spirit 2, I am trying to follow example in order to get progression (will add semantic actions later) in my pgn parser (see also related previous question). But I can't manage to avoid compilation errors : cpp

#include "pgn_games_extractor.h"
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <tuple>

#include <iostream>

BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)

namespace loloof64 {
namespace qi = boost::spirit::qi;

typedef std::tuple<std::size_t, game_move> move_t;
typedef std::tuple<std::vector<pgn_tag>, std::vector<move_t>> game_t;
typedef std::tuple<std::size_t, std::vector<game_t>> pgn_t;

template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
    pgn_parser() : pgn_parser::base_type(games) {
        using namespace qi;

        CurrentPos<Iterator> filepos;

        const std::string no_move;
        result.add
            ("1-0",     result_t::white_won)
            ("0-1",     result_t::black_won)
            ("1/2-1/2", result_t::draw)
            ("*",       result_t::undecided);

        quoted_string    = '"' >> *~char_('"') >> '"';
        tag              = '[' >> +alnum >> quoted_string >> ']';
        header           = +tag;
        regular_move     = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
        single_move      = raw [ regular_move >> -char_("+#") ];
        full_move        = filepos.current_pos >> uint_
            >> (lexeme["..." >> attr(no_move)] | "." >> single_move)
            >> (single_move | attr(no_move))
            >> -result;

        game_description = +full_move;
        single_game      = -header >> game_description;
        games            = filepos.save_start_pos >> *single_game;

        BOOST_SPIRIT_DEBUG_NODES(
                    (tag)(header)(quoted_string)(regular_move)(single_move)
                    (full_move)(game_description)(single_game)(games)
                )
    }

  private:
    qi::rule<Iterator, pgn_tag(),              qi::space_type> tag;
    qi::rule<Iterator, std::vector<pgn_tag>,   qi::space_type> header;

    qi::rule<Iterator, move_t(),               qi::space_type> full_move;
    qi::rule<Iterator, std::vector<move_t>,    qi::space_type> game_description;

    qi::rule<Iterator, game_t(),               qi::space_type> single_game;
    qi::rule<Iterator, pgn_t(),  qi::space_type> games;

    // lexemes
    qi::symbols<char, result_t> result;
    qi::rule<Iterator, std::string()> quoted_string;
    qi::rule<Iterator> regular_move;
    qi::rule<Iterator, std::string()> single_move;
};
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
    std::ifstream inputFile(inputFilePath);
    parseInput(inputFile);
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }

loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
    // dtor
}

void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
    if (inputFile.fail() || inputFile.bad())
        throw new InputFileException("Could not read the input file !");

    typedef boost::spirit::istream_iterator It;
    loloof64::pgn_parser<It> parser;
    std::vector<loloof64::pgn_game> temp_games;

    It iter(inputFile >> std::noskipws), end;

    //////////////////////////////////
    std::cout << "About to parse the file" << std::endl;
    //////////////////////////////////

    bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);

    //////////////////////////////////
    std::cout << "Finished to parse the file" << std::endl;
    //////////////////////////////////

    if (success && iter == end) {
        games.swap(temp_games);
    } else {
        std::string error_fragment(iter, end);
        throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
    }
}

and the header file : header.

#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP

#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>

#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/include/phoenix.hpp>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>

namespace loloof64 {

    namespace phx = boost::phoenix;
    namespace qi = boost::spirit::qi;


    /*
     * This class has been taken from http://marko-editor.com/articles/position_tracking/
    */
    template<typename Iterator>
    struct CurrentPos {
      CurrentPos() {
        save_start_pos = qi::omit[boost::spirit::repository::qi::iter_pos[
                phx::bind(&CurrentPos::setStartPos, this, qi::_1)]];
        current_pos = boost::spirit::repository::qi::iter_pos[
                qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)];
      }

      qi::rule<Iterator> save_start_pos;
      qi::rule<Iterator, std::size_t()> current_pos;

    private:
      void setStartPos(const Iterator &iterator) {
        start_pos_ = iterator;
      }

      std::size_t getCurrentPos(const Iterator &iterator) {
        return std::distance(start_pos_, iterator);
      }

      Iterator start_pos_;
    };

    enum result_t { white_won, black_won, draw, undecided };

    struct pgn_tag {
        std::string key;
        std::string value;
    };

    struct game_move {
        unsigned move_number;
        std::string white_move;
        std::string black_move;
        result_t result;
    };

    struct pgn_game {
        std::vector<pgn_tag> header;
        std::vector<game_move> moves;
    };

    class PgnGamesExtractor {
      public:
        PgnGamesExtractor(std::string inputFilePath);
        PgnGamesExtractor(std::istream &inputFile);
        /*
        Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
        */
        std::vector<pgn_game> getGames() const { return games; }
        virtual ~PgnGamesExtractor();

      protected:
      private:
        std::vector<pgn_game> games;
        void parseInput(std::istream &inputFile);
    };

    class PgnParsingException : public virtual std::runtime_error {
      public:
        PgnParsingException(std::string message) : std::runtime_error(message) {}
    };

    class InputFileException : public virtual std::runtime_error {
      public:
        InputFileException(std::string message) : std::runtime_error(message) {}
    };
}

#endif // PGNGAMESEXTRACTOR_HPP

I did not post the compilation errors as there are too many and as the files can be easily tested.

Community
  • 1
  • 1
loloof64
  • 5,252
  • 12
  • 41
  • 78

2 Answers2

2

Of course, it's not gonna work well with a streaming interface. You can retain the start iterator, but

  1. you won't know the stream length ahead of time (unless you get it out-of-band)

  2. calculating the current position (distance from the start iterator) each time is going to be horrendously inefficient.

Since you mentioned in a comment you were parsing files, you should consider using memory mapping (boost::iostream::mapped_file_source or mmap e.g.). That way, the distance calculation is instantaneous, using pointer arithmetic on the random-access iterators.


Here's a working example, with the following changes/notes:

  1. using memory mapped input data3
  2. omit[] in save_start_pos is useless (there is no declared attribute)
  3. getCurrentPos was horrifically inefficient (to the extent that just using omit[current_pos] in the full_move rule slowed the parsing down several orders of magnitude.

    This is because boost::spirit::istream_iterator holds on to all previously read state in a deque and traversing them doesn't come for free when doing std::distance

  4. Your CurrentPos<Iterator> filepos; instance goes out of scope after construction! This means that invoking save_start_pos/current_pos is Undefined Behaviour¹. Move it out of the constructor.

  5. A subtler point is to use full_move %= ... when you add the semantic action (see docs and blog)

  6. You changed the types on some of the rules to include position information, alongside the AST types. That's both unnecessary and flawed: the AST types would not be compatible with the tuple<size_t, T> versions of the rules.

    Besides, e.g. the games rule didn't even expose a position, because save_start_pos synthesizes unused_type (no attribute).

    So, drop the whole tuple business, and just work with the state of the filepos member inside your semantic action:

        full_move       %=
                            omit[filepos.current_pos [ reportProgress(_1) ]]  >> 
                            uint_
                            >> (lexeme["..." >> attr(no_move)] | "." >> single_move)
                            >> (single_move | attr(no_move))
                            >> -result;
    
  7. Finally, as a demonstration on how to report strictly increasing progress indications², I included a simple phoenix actor:

    struct reportProgress_f {
        size_t total_;
        mutable double pct = 0.0;
    
        reportProgress_f(size_t total) : total_(total) {}
    
        template<typename T>
        void operator()(T pos) const { 
            double newpct = pos * 100.0 / total_;
            if ((newpct - pct) > 10) {
                //sleep(1); // because it's way too fast otherwise...
                pct = newpct;
                std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << std::flush;
            };
        }
    };
    phx::function<reportProgress_f> reportProgress;
    

    Note reportProgress needs to be constructed with knowledge about start and end iterators, see the constructor for pgn_parser


¹ in the recorded live stream you can see I spotted the error on the first reading, then forgot about after I made it to compile. The program crashed, dutifully :) Then I remembered.

² even in the face of backtracking

3 (not strictly required, but I guess the goal wasn't to simply make it so slow you actually need the progress indicator?)

Live On Coliru

#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP

#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>

namespace loloof64 {

    namespace phx = boost::phoenix;
    namespace qi  = boost::spirit::qi;
    namespace qr  = boost::spirit::repository::qi;

    /*
     * This class has been taken from http://marko-editor.com/articles/position_tracking/
    */
    template<typename Iterator>
    struct CurrentPos {
        CurrentPos() {
            save_start_pos = qr::iter_pos [phx::bind(&CurrentPos::setStartPos, this, qi::_1)] >> qi::eps;
            current_pos    = qr::iter_pos [qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)] >> qi::eps;
        }

        qi::rule<Iterator> save_start_pos;
        qi::rule<Iterator, std::size_t()> current_pos;

        private:
        void setStartPos(const Iterator &iterator) {
            start_pos_ = iterator;
        }

        std::size_t getCurrentPos(const Iterator &iterator) {
            return std::distance(start_pos_, iterator);
        }

        Iterator start_pos_;
    };

    enum result_t { white_won, black_won, draw, undecided };

    struct pgn_tag {
        std::string key;
        std::string value;
    };

    struct game_move {
        unsigned move_number;
        std::string white_move;
        std::string black_move;
        result_t result;
    };

    struct pgn_game {
        std::vector<pgn_tag> header;
        std::vector<game_move> moves;
    };

    class PgnGamesExtractor {
      public:
        PgnGamesExtractor(std::string const& inputFilePath);
        /*
        Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
        */
        std::vector<pgn_game> getGames() const { return games; }
        virtual ~PgnGamesExtractor();

      protected:
      private:
        std::vector<pgn_game> games;
        void parseInput(std::string const&);
    };

    class PgnParsingException : public virtual std::runtime_error {
      public:
        PgnParsingException(std::string message) : std::runtime_error(message) {}
    };

    class InputFileException : public virtual std::runtime_error {
      public:
        InputFileException(std::string message) : std::runtime_error(message) {}
    };
}

#endif // PGNGAMESEXTRACTOR_HPP
//#include "pgn_games_extractor.h"

#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

#include <iostream>
#include <iomanip>

BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)

namespace loloof64 {
    namespace qi = boost::spirit::qi;

    template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>(), qi::space_type> {
        pgn_parser(Iterator start, Iterator end) 
            : pgn_parser::base_type(games),
              reportProgress(std::distance(start, end))
        {
            using namespace qi;

            const std::string no_move;
            result.add
                ("1-0",     result_t::white_won)
                ("0-1",     result_t::black_won)
                ("1/2-1/2", result_t::draw)
                ("*",       result_t::undecided);

            quoted_string    = '"' >> *~char_('"') >> '"';
            tag              = '[' >> +alnum >> quoted_string >> ']';
            header           = +tag;
            regular_move     = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
            single_move      = raw [ regular_move >> -char_("+#") ];
            full_move       %=
                                omit[filepos.current_pos [ reportProgress(_1) ]]  >> 
                                uint_
                                >> (lexeme["..." >> attr(no_move)] | "." >> single_move)
                                >> (single_move | attr(no_move))
                                >> -result;

            game_description = +full_move;
            single_game      = -header >> game_description;
            games            = filepos.save_start_pos >> *single_game;

            BOOST_SPIRIT_DEBUG_NODES(
                        (tag)(header)(quoted_string)(regular_move)(single_move)
                        (full_move)(game_description)(single_game)(games)
                    )
        }

    private:
        struct reportProgress_f {
            size_t total_;
            mutable double pct = 0.0;

            reportProgress_f(size_t total) : total_(total) {}

            template<typename T>
            void operator()(T pos) const { 
                double newpct = pos * 100.0 / total_;
                if ((newpct - pct) > 10) {
                    //sleep(1); // because it's way too fast otherwise...
                    pct = newpct;
                    std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << "    " << std::flush;
                };
            }
        };
        phx::function<reportProgress_f> reportProgress;

        CurrentPos<Iterator> filepos;

        qi::rule<Iterator, pgn_tag(),               qi::space_type> tag;
        qi::rule<Iterator, std::vector<pgn_tag>,    qi::space_type> header;

        qi::rule<Iterator, game_move(),             qi::space_type> full_move;
        qi::rule<Iterator, std::vector<game_move>,  qi::space_type> game_description;

        qi::rule<Iterator, pgn_game(),              qi::space_type> single_game;
        qi::rule<Iterator, std::vector<pgn_game>(), qi::space_type> games;

        // lexemes
        qi::symbols<char, result_t> result;
        qi::rule<Iterator, std::string()> quoted_string;
        qi::rule<Iterator> regular_move;
        qi::rule<Iterator, std::string()> single_move;
    };
}

#include <boost/iostreams/device/mapped_file.hpp>

loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
    // dtor
}

loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string const& inputFilePath) {
    parseInput(inputFilePath);
}

void loloof64::PgnGamesExtractor::parseInput(std::string const& inputFilePath) {
    boost::iostreams::mapped_file_source mf(inputFilePath);

    //if (inputFile.fail() || inputFile.bad())
        //throw new InputFileException("Could not read the input file !");

    typedef char const* It;
    std::vector<loloof64::pgn_game> temp_games;

    /* It iter(inputFile >> std::noskipws), end; */
    auto iter = mf.begin();
    auto end  = mf.end();
    loloof64::pgn_parser<It> parser(iter, end);

    //////////////////////////////////
    //std::cout << "About to parse the file" << std::endl;
    //////////////////////////////////

    bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);

    //////////////////////////////////
    //std::cout << "Finished to parse the file" << std::endl;
    //////////////////////////////////

    if (success && iter == end) {
        games.swap(temp_games);
    } else {
        std::string error_fragment(iter, end);
        throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
    }
}

int main() {
    loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
    std::cout << "Parsed " << pge.getGames().size() << " games\n";
    for (auto& g : pge.getGames())
        for (auto& m : g.moves)
            std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}

With sample output

Progress 32.6
Progress 44.5
Progress 55.5
Progress 67.2
Progress 77.2
Progress 89.1
Progress 100.0Parsed 1 games
1.  e4  e5
2.  Nf3 Nc6
3.  d4  exd4
4.  Bc4 Qf6
5.  O-O d6
6.  Ng5 Nh6
7.  f4  Be7
8.  e5  Qg6
9.  exd6    cxd6
10. c3  dxc3
11. Nxc3    O-O
12. Nd5 Bd7
13. Rf3 Bg4
14. Bd3 Bxf3
15. Qxf3    f5
16. Bc4 Kh8
17. Nxe7    Nxe7
18. Qxb7    Qf6
19. Be3 Rfb8
20. Qd7 Rd8
21. Qb7 d5
22. Bb3 Nc6
23. Bxd5    Nd4
24. Rd1 Ne2+
25. Kf1 Rab8
26. Qxa7    Rxb2
27. Ne6 Qxe6
28. Bxe6    Rxd1+
29. Kf2 

Note that on a terminal, the progress indication will self-update using a carriage-return instead of printing separate lines

sehe
  • 374,641
  • 47
  • 450
  • 633
  • You can review the live-coding session where I fixed/extended the sample here: https://www.livecoding.tv/video/progress-reporting-on-qi-parser/ – sehe Dec 13 '15 at 21:50
  • Thank you very much : deep and simple answer as well as explanations. – loloof64 Dec 13 '15 at 21:50
  • Yes I've just saw it this afternoon. Thank you very much. – loloof64 Dec 13 '15 at 21:52
  • 1
    In a previous question you mentioned a `qi::on_success` approach. It would be something like [this](http://coliru.stacked-crooked.com/a/31d0e4e09cf11573) right? (I guess it times out on coliru, works fine locally, it also requires random access iterators). – llonesmiz Dec 15 '15 at 13:13
0

Solved the problem by following this Sehe video tutorial Also, one should notice that, as this time he is using a boost::iostreams::mapped_file_source instead of a ifstream as I did, the process is really speeding up ! So the progress bar is not needed any more for this process.

Cpp file and Hpp file

loloof64
  • 5,252
  • 12
  • 41
  • 78
  • The parsing of `ScotchGambit.pgn` took ~0.19s even with `boost::spirit::istream_iterator`. I was indeed wondering what you wanted the progress indication for :) – sehe Dec 13 '15 at 21:55
  • In fact, before following your tutorial, every file of about 10 000 chess games or more (as the ones I took from PgnMentor) tooked 2 min to process. And after having following your advices, it only tooked less than 1s ! I believe that the standard streams are not the solution ... – loloof64 Dec 13 '15 at 21:58
  • 1
    I think it must have been because you [prevented flushing the `multi_pass`](http://www.boost.org/doc/libs/1_59_0/libs/spirit/doc/html/spirit/support/multi_pass.html) (in which case, simply loading the entire file into memory is vastly superior). Note I actually measured `0.19s` by uncommenting `save_start_pos` and `current_pos` reading the 6k+ games from `ScotchGambit.pgn`. So, you must have done something differently. – sehe Dec 13 '15 at 22:01
  • Ok. Next time I have to process a grammar, I will try avoid using ifstream directly. I say less than a second, but I even think it is faster than that. I haven't profiled the execution. Anyway, that time is vastly affordable compared to the 2 min I got before. – loloof64 Dec 13 '15 at 22:05
  • 1
    Like I said, the `istream_iterator` approach is fine unless you have a grammar that potentially backtracks humongeous regions (use kleene stars and expectation points to induce flushing) – sehe Dec 13 '15 at 22:07
  • Fnially my two sources files Cpp and Hpp on this answer are not good : I don't extract any game though the process does not report error ! – loloof64 Dec 14 '15 at 09:45
  • Did I say "backtracks humongeous [sic] regions"? Lol. That's exactly what apparently happens then. It works for me with ScotchGambit.pgn though. – sehe Dec 14 '15 at 10:54
  • Let us [continue this discussion in chat](http://chat.stackoverflow.com/rooms/97838/discussion-between-loloof64-and-sehe). – loloof64 Dec 14 '15 at 10:55