I am writing a program with the following function:
- A server starts up and run
trans2C
,trans2C
will set acandidate_timer_
and call itself every 300ms. Let's say after calling the firsttrans2C
the server'sterm
is 0, then the handler calls the secondtrans2C
theterm
is 1, and so on. - During every
term
, the server will startRV
, andRV
will set aretry_timer_
to repeatedly call itself every 10ms. When settingretry_timer_
,RV
will check theretry_timer_
's handler number, if it's not 0, throw an exception. - I used the
boost::asio::strand
and there is only one thread. - Every time the
trans2C
runs, it cancelscandidate_timer_
andretry_timer_
.
Let's say in a timepoint in term
8, the RV
set retry_timer_
to call itself after 10ms. But after 5ms, the candidate_timer_
expires and calls the trans2C
to enter the term
9 and cancels both the timers. In the normal situations the retry_timer_
will be canceled so every time RV
is called, there is no handler on retry_timer_
and the program will loop forever. However it is not.
What's wrong with my program?
Reproducible code:
#define BOOST_ASIO_ENABLE_HANDLER_TRACKING
#include <iostream>
#include <vector>
#include <string>
#include <boost/asio.hpp>
#include <boost/log/trivial.hpp>
#include <boost/log/expressions.hpp>
#include <boost/log/utility/setup/file.hpp>
#include <boost/log/utility/setup/common_attributes.hpp>
namespace logging = boost::log;
namespace keywords = boost::log::keywords;
namespace attrs = boost::log::attributes;
using namespace boost::asio;
using namespace std;
using boost::asio::ip::tcp;
std::tuple<string, int> another_server("127.0.0.1", 7777);
class instance {
public:
instance(io_service &loop, const string &_ip, int _port) :
server_(_ip, _port),
ioContext(loop),
candidate_timer_(loop),
strand_(boost::asio::make_strand(loop)),
retry_timer_(loop) {
}
void run() {
trans2C();
ioContext.run();
}
private:
void trans2C() {
BOOST_LOG_TRIVIAL(error) << "trans to candidate";
cancel_all_timers();
int waiting_count = candidate_timer_.expires_from_now(boost::posix_time::milliseconds(300));
if (waiting_count == 0) {
} else {
throw std::logic_error("trans2C只能是timer_candidate_expire自然到期触发,所以不可能有waiting_count不为0的情况,否则就是未考虑的情况");
}
candidate_timer_.async_wait(boost::asio::bind_executor(strand_, [this](const boost::system::error_code &error) {
if (error == boost::asio::error::operation_aborted) {
} else {
trans2C();
}
}));
RV(another_server);
}
void RV(const tuple<string, int> &server) {
BOOST_LOG_TRIVIAL(trace) << " send rpc_rv to server ";
int waiting_counts = retry_timer_.expires_from_now(boost::posix_time::milliseconds(10));
if (waiting_counts != 0) {
std::ostringstream oss;
oss << "rv retry timer is set and hooks is not zero, it should be ";
string s = oss.str();
throw logic_error(s);
} else {
retry_timer_.async_wait(boost::asio::bind_executor(strand_, [this, server](const boost::system::error_code &error) {
BOOST_LOG_TRIVIAL(trace) << "rv retry_timers expires";
if (error == boost::asio::error::operation_aborted) {
BOOST_LOG_TRIVIAL(error) << "rv retry callback error: " << error.message();
} else {
RV(server);
}
}));
}
}
private:
void cancel_all_timers() {
BOOST_LOG_TRIVIAL(trace) << " all timer canceled";
candidate_timer_.cancel();
int retry_handlers_canceled_number = retry_timer_.cancel();
BOOST_LOG_TRIVIAL(trace) << retry_handlers_canceled_number << " retry_timer is canceled by cancell_all_timers";
}
io_context &ioContext;
std::tuple<string, int> server_;
deadline_timer candidate_timer_;
deadline_timer retry_timer_;
boost::asio::strand<boost::asio::io_context::executor_type> strand_;
};
int main() {
try {
boost::asio::io_service io;
int _port = 8888;
instance raft_instance(io, "127.0.0.1", _port);
raft_instance.run();
} catch (std::exception &exception) {
BOOST_LOG_TRIVIAL(error) << " exception: " << exception.what();
}
return 0;
}
CmakeList:
cmake_minimum_required(VERSION 3.14)
project(raft)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_FLAGS " -DBOOST_LOG_DYN_LINK")
set(BOOST_ROOT /Users/ynx/boost/build)
#include(FindProtobuf)
#find_package(protobuf REQUIRED)
#if (protobuf_VERBOSE)
# message(STATUS "Using Protocol Buffers ${Protobuf_VERSION}")
#endif ()
find_package(Boost COMPONENTS log log_setup serialization REQUIED)
include_directories(${Boost_INCLUDE_DIRS})
add_executable(test_boost_timer_bug tests/boost_timer_bug.cpp
)
target_link_libraries(test_boost_timer_bug
${Boost_LOG_LIBRARY}
${Boost_LOG_SETUP_LIBRARY}
${Boost_SERIALIZATION_LIBRARY}
${Boost_WSERIALIZATION_LIBRARY}
${Boost_FILE_SYSTEM_LIBRARY}
${Boost_THREAD_LIBRARY}
${Boost_SYSTEM_LIBRARY})
Handler tracking log can show something strange (I can't paste it here for that will exceed StackOverflow's 30000 text limit. But it is easy to reproduce it, just run the code), the last cancelation before crash show that it cancels 0 handlers while the "normal" cancelations return 1.