-1

I am writing a very simple HTTP server based on: http://www.boost.org/doc/libs/1_62_0/doc/html/boost_asio/example/cpp11/echo/async_tcp_echo_server.cpp

I have tried numerous techniques to extract the data from a boost::asio::streambuf in order to parse HTTP headers. The streambuf object does not appear to manage it's memory properly (or, more likely, I am mis-using it) & I end up getting a seg fault.

As you can see from the code, none of the techniques suggested here or here work. I suspect this is because I'm using boost::asio::async_read_until() to read all the headers, rather than just a single header at a time as most other coders appear to be doing.

Any advice/pointers would be appreciated.

/*
    g++ -D TRY1 -ggdb3 -I $e/boost-1.62/include /tmp/streambuf.bug.cc $e/boost-1.62/lib/libboost_system.a -D TRY1

    or

    g++ -D TRY2 -ggdb3 -I $e/boost-1.62/include /tmp/streambuf.bug.cc $e/boost-1.62/lib/libboost_system.a -D TRY2

    or

    g++ -D TRY3 -ggdb3 -I $e/boost-1.62/include /tmp/streambuf.bug.cc $e/boost-1.62/lib/libboost_system.a -D TRY3
*/

#include <cstdlib>
#include <iostream>
#include <memory>
#include <utility>
#include <boost/asio.hpp>
#include <boost/bind.hpp>

using boost::asio::ip::tcp;

class session : public std::enable_shared_from_this<session>
{
    public:
        session(tcp::socket socket) : socket_(std::move(socket)), dbg(true) {
            assert(headers.empty());
            memset(padding, 0, 10000);
            std::cout << "buffer size: " << buffer.max_size() << '\n';
        }

        void start() { readHeaders(); }

    private:
        void readHeaders() {
            if (dbg)
                std::cout << "readHeaders() start\n";
            //auto self(shared_from_this());

            boost::asio::async_read_until(socket_, buffer, "\r\n\r\n", [this](const boost::system::error_code &ec, std::size_t length) {
                if (dbg)
                    std::cout << "Read " << length << " bytes of headers in async_read_until()\n";
                if (ec)
                    throw std::runtime_error("Error code in readHeaders()");


#ifdef TRY1
                std::istream stream(&buffer);
                std::string str;
                assert(!corrupted("A1"));
                while (std::getline(stream, str)) { // seg fault! (on 3rd invocation)
                    assert(!corrupted("A2"));
                    std::cout << "str=" << str << '\n';
                    assert(!corrupted("A3"));
                }
#endif


#if 0
                std::string str;
                boost::asio::buffer_copy(boost::asio::buffer(str), buffer); // ugh, won't compile
#endif


#if 0
                std::vector<unsigned char> v(buffer.size());
                boost::asio::buffer_copy(boost::asio::buffer(v), buffer); // ugh, won't compile
                const std::string str(v.begin(), v.end());
#endif


#ifdef TRY2
                std::string str;
                auto data = buffer.data();
                assert(!corrupted("B1"));
                for (auto it = data.begin(); it != data.end(); ++it) {
                    const auto buf = *it;
                    std::cout << "buf_size=" << boost::asio::buffer_size(buf) << '\n';
                    assert(!corrupted("B2"));
                    const char *tmp = boost::asio::buffer_cast<const char *>(buf);
                    assert(!corrupted("B3"));
                    str.append(tmp); // BUG!!
                    assert(!corrupted("B4")); // fails
                }
#endif


#ifdef TRY3
                auto data = buffer.data();
                auto end = data.end();
                std::string str;
                assert(!corrupted("C1"));
                for (auto it = data.begin(); it != end; ++it) {
                    assert(!corrupted("C2"));
                    std::vector<unsigned char> v(boost::asio::buffer_size(*it));
                    assert(!corrupted("C3"));
                    boost::asio::buffer_copy(boost::asio::buffer(v), *it); // BUG!!
                    assert(!corrupted("C4")); // fails
                    str.append(v.begin(), v.end());
                    assert(!corrupted("C5"));
                }
#endif

#ifdef TRY4
                assert(!corrupted("D1"));
                const std::string str(boost::asio::buffers_begin(buffer.data()), boost::asio::buffers_end(buffer.data())); // BUG!!
                assert(!corrupted("D2")); // fails
#endif

#ifdef TRY5
                assert(!corrupted("E1"));
                const std::string str((std::istreambuf_iterator<char>(&buffer)), std::istreambuf_iterator<char>()); // seg faults!
                assert(!corrupted("E2"));
#endif

#ifdef TRY6
                boost::asio::streambuf::const_buffers_type bufs = buffer.data();
                assert(!corrupted("F1"));
                std::string str(boost::asio::buffers_begin(bufs), boost::asio::buffers_begin(bufs) + buffer.size()); // BUG!!
                assert(!corrupted("F2")); // fails
#endif

                assert(!corrupted("Z1"));
                std::cout << "str=" << str << "end of data\n";
                std::istringstream input(str);
                std::string line;
                while (std::getline(input, line)) {
                    assert(!corrupted("Z2"));
                    if (line.size() == 1)
                        continue; // blank line
                    if (line.substr(0, 3) == "GET")
                        continue; // TODO: handle properly
                    const auto idx = line.find(':');
                    assert(idx != std::string::npos);
                    const std::string key(line.begin(), line.begin() + idx);
                    const std::string val(line.begin() + idx + 2, line.end());
                    // std::cout << "key=" << key << " val=" << val << '\n';
                    assert(!corrupted("Z3"));
                    headers[key] = val;
                    assert(!corrupted("Z4"));
                }
                assert(!corrupted("Z5"));
                for (auto it3 : headers) {
                    std::cout << it3.first << '=' << it3.second << '\n';
                }

                const auto it2 = headers.find("Content Length");
                contentLength = (it2 == headers.end() ? 0 : atoi(it2->second.c_str()));
                if (contentLength > 0) {
                    const boost::system::error_code ec; // (boost::system::errc::success);
                    readBody(ec);
                }
            });
            if (dbg)
                std::cout << "readHeaders() end\n";
        }

        void readBody (const boost::system::error_code &ec) {
            if (dbg)
                std::cout << "readBody()\n";
            if (ec)
                throw std::runtime_error("Error code in readBody()");

            boost::asio::streambuf::const_buffers_type bufs = buffer.data();
            body.append(boost::asio::buffers_begin(bufs), boost::asio::buffers_begin(bufs) + buffer.size());

            if (dbg)
                std::cout << "body.size=" << body.size() << " content length=" << contentLength << '\n';

            boost::asio::async_read(socket_,
                                    buffer,
                                    boost::asio::transfer_at_least(1),
                                    boost::bind(&session::readBody, this, boost::asio::placeholders::error));
        }

        bool corrupted (const std::string s) const {
            bool b = false;
            if (strlen(padding) > 0) {
                std::cout << "buffer overflow detected @ " << s << "! padding is: " << padding << '\n';
                std::cout.flush();
                b = true;
            }
            if (headers.size() > 1000) {
                std::cout << headers.size() << " headers!!\n";
                b = true;
            }
            return b;
        }

        tcp::socket socket_;
        boost::asio::streambuf buffer;
        char padding[10000]; // $buffer appears not to manage it's memory properly. Add some padding to detect overflows.
        std::map<std::string, std::string> headers;
        uint contentLength;
        std::string body;
        const bool dbg;
};

class server
{
    public:
        server(boost::asio::io_service& io_service, short port) : acceptor_(io_service, tcp::endpoint(tcp::v4(), port)), socket_(io_service) {
            do_accept();
        }

    private:
        void do_accept() {
            acceptor_.async_accept(socket_, [this](boost::system::error_code ec) {
                if (!ec) {
                    std::cout << "Connection accepted\n";
                    std::make_shared<session>(std::move(socket_))->start();
                }
                do_accept();
            });
        }

        tcp::acceptor acceptor_;
        tcp::socket socket_;
};

int main(int argc, char* argv[])
{
    try {
        if (argc != 2) {
            std::cerr << "Usage: async_tcp_echo_server <port>\n";
            return 1;
        }

        boost::asio::io_service io_service;
        server s(io_service, std::atoi(argv[1]));

        io_service.run();
    } catch (std::exception& e) {
        std::cerr << "Exception: " << e.what() << "\n";
    }

    return 0;
}

I am using boost v1.62, gcc v6.1 on Linux (Ubuntu 12.04).

Community
  • 1
  • 1
Scott Smedley
  • 1,779
  • 20
  • 28
  • Undefined behavior is being invoked. The program fails to meet the lifetime requirement for `async_read_until()`'s `b` parameter, as `session` and `streambuf` are being destroyed before the completion handler is invoked. The official Asio example guarantees that the lifetime of the `session` is at least as long as the completion handler by capturing the results of `shared_from_this()` in the lambda capture. – Tanner Sansbury Dec 22 '16 at 04:19
  • @TannerSansbury, ahhhh this makes sense. Thank-you very much! – Scott Smedley Dec 22 '16 at 05:27

3 Answers3

3
  • You can read from the streambuf

    • manually (see documentation)
    • using std::istream:

      boost::asio::streambuf buf;
      std::istream is(&buf);
      
      // usual extraction:
      int i;
      if (is >> i) {
          // use `i`
      }         
      
      // or usual line-wise extraction:
      std::string line;
      while (std::getline(is, line)) {
          // do something with `line`
      }
      
  • alternative use boost::asio::buffer_* functions (buffer_begin(), buffer_end() and buffer_copy) - How copy or reuse boost::asio::streambuf?

Community
  • 1
  • 1
sehe
  • 374,641
  • 47
  • 450
  • 633
3

Copying the contents of a boost::asio::streambuf as a string has been answered in both the Copy a streambuf's contents to a string and How copy or reuse boost::asio::streambuf questions:

boost::asio::streambuf source;
...
std::string target{buffers_begin(source.data()), buffers_end(source.data())};

The problems being observed are the result of undefined behavior. The program fails to meet the lifetime requirement for async_read_until()'s b parameter, as the streambuf are being destroyed before the completion handler is invoked:

[...] Ownership of the streambuf is retained by the caller, which must guarantee that it remains valid until the handler is called.

In this case, streambuf is a data member of session, and session objects are managed by a shared pointer. The only shared pointer managing session is both created and destroyed in the following expression:

std::make_shared<session>(std::move(socket_))->start();

Within start(), an async_read_until() operation is initiated. However, upon returning form start(), the session's buffer is destroyed before the async_read_until()'s completion handler is invoked, violating the lifetime requirement.

The idiomatic solution used by the official Asio examples is to capture the results of shared_from_this() in the completion handler's lambda capture. This guarantees that the lifetime of the session will be at least as long as the completion handler.

auto self(shared_from_this());
async_read_until(socket_, buffer_, ...,
  [this, self](boost::system::error_code& ec, std::size_t length)
  {
    // `self` keeps the `session` alive for the lifetime of the
    // handler.  If more async operations are initiated from within
    // this handler, then the completion handlers should capture
    // `self` as well.
    ...
  });
Community
  • 1
  • 1
Tanner Sansbury
  • 51,153
  • 9
  • 112
  • 169
0

The exact answer to your question gave sehe. Below is some pseudocode I am using currently for parsing headers.

// This is where to store headers.
_STL::map<_STL::string, _STL::string> m_headers;

// buffer is of type asio::streambuf and contains response from asio::async_read_until
_STL::istream response_stream_headers(&buffer);

// Some helper variables.
_STL::string header, header_name, header_value;

while (true) {
  _STL::getline(response_stream_headers, header, '\r');

  // Remove \n symbol from the stream.
  response_stream_headers.get();

  if (header == "") {
    // We reached end of headers, there might be still some more data!!
    break;
  }

  // Parse header to key->value
  size_t separator_pos = header.find(':');
  if (separator_pos != _STL::string::npos) {
    header_name = header.substr(0, separator_pos);

    if (separator_pos < header.length() - 1) {
      header_value = header.substr(separator_pos + 1);
    }
    else {
      header_value = "";
    }

    boost::trim_left(header_value);
    m_headers[name] = value;
  }
}

// Parsing is done, but some of the request response could have been reed by 
// asio::async_read_until, so whe read response_stream_headers untill end.
// You should use body_response_start as the begining of your response.
std::string body_response_start(std::istreambuf_iterator<char>(response_stream_headers), {});
marcinj
  • 48,511
  • 9
  • 79
  • 100