Skip to content

Commit

Permalink
Reapply netlink retries (#591)
Browse files Browse the repository at this point in the history
* Retry failed netlink operations

The stack sends three requests to the kernel using
the netlink socket. If the kernel is e.g. busy at this point,
netlink will respond with a EBUSY error value.

The current code does not handle these errors gracefully, but instead
silently ignores them. This can lead to vsomeip stack being stalled,
and not starting certain services, e.g. service discovery.

This patch helps fix these issues by sending a retry of the messages
that fail.

* Limit netlink retries to 3, improve error logging

Only log error after retries exhausted and fix error message output,
as strerror takes positive errno.

---------

Co-authored-by: Philip Werner <[email protected]>
  • Loading branch information
hefroy and phiwer authored Jan 19, 2024
1 parent 00ab822 commit 339a2ca
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 12 deletions.
2 changes: 2 additions & 0 deletions implementation/configuration/include/internal.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@
#define VSOMEIP_MAX_TCP_RESTART_ABORTS 5
#define VSOMEIP_MAX_TCP_SENT_WAIT_TIME 10000

#define VSOMEIP_MAX_NETLINK_RETRIES 3

#define VSOMEIP_TP_MAX_SEGMENT_LENGTH_DEFAULT 1392

#define VSOMEIP_DEFAULT_BUFFER_SHRINK_THRESHOLD 5
Expand Down
2 changes: 2 additions & 0 deletions implementation/configuration/include/internal_android.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
#define VSOMEIP_MAX_TCP_RESTART_ABORTS 5
#define VSOMEIP_MAX_TCP_SENT_WAIT_TIME 10000

#define VSOMEIP_MAX_NETLINK_RETRIES 3

#define VSOMEIP_TP_MAX_SEGMENT_LENGTH_DEFAULT 1392

#define VSOMEIP_DEFAULT_BUFFER_SHRINK_THRESHOLD 5
Expand Down
20 changes: 17 additions & 3 deletions implementation/endpoints/include/netlink_connector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@

#include "../../endpoints/include/buffer.hpp"

#ifdef ANDROID
# include "../../configuration/include/internal_android.hpp"
#else
# include "../../configuration/include/internal.hpp"
#endif

namespace vsomeip_v3 {

template <typename Protocol>
Expand Down Expand Up @@ -161,9 +167,10 @@ class netlink_connector : public std::enable_shared_from_this<netlink_connector>
bool has_address(struct ifaddrmsg * ifa_struct,
size_t length,
const unsigned int address);
void send_ifa_request();
void send_ifi_request();
void send_rt_request();
void send_ifa_request(std::uint32_t _retry = 0);
void send_ifi_request(std::uint32_t _retry = 0);
void send_rt_request(std::uint32_t _retry = 0);
void handle_netlink_error(struct nlmsgerr *_error_msg);

void receive_cbk(boost::system::error_code const &_error, std::size_t _bytes);
void send_cbk(boost::system::error_code const &_error, std::size_t _bytes);
Expand All @@ -186,6 +193,13 @@ class netlink_connector : public std::enable_shared_from_this<netlink_connector>
boost::asio::ip::address address_;
boost::asio::ip::address multicast_address_;
bool is_requiring_link_;

static const std::uint32_t max_retries_ = VSOMEIP_MAX_NETLINK_RETRIES;
static const std::uint32_t retry_bit_shift_ = 8;
static const std::uint32_t request_sequence_bitmask_ = 0xFF;
static const std::uint32_t ifa_request_sequence_ = 1;
static const std::uint32_t ifi_request_sequence_ = 2;
static const std::uint32_t rt_request_sequence_ = 3;
};

} // namespace vsomeip_v3
Expand Down
59 changes: 50 additions & 9 deletions implementation/endpoints/src/netlink_connector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,9 @@ void netlink_connector::receive_cbk(boost::system::error_code const &_error,
}
case NLMSG_ERROR: {
struct nlmsgerr *errmsg = (nlmsgerr *)NLMSG_DATA(nlh);
VSOMEIP_ERROR << "netlink_connector::receive_cbk received "
"error message: " << std::dec << nlh->nlmsg_type
<< " seq " << errmsg->msg.nlmsg_seq;
if (errmsg->error != 0) {
handle_netlink_error(errmsg);
}
break;
}
case NLMSG_DONE:
Expand Down Expand Up @@ -240,7 +240,7 @@ void netlink_connector::send_cbk(boost::system::error_code const &_error, std::s
}
}

void netlink_connector::send_ifa_request() {
void netlink_connector::send_ifa_request(std::uint32_t _retry) {
typedef struct {
struct nlmsghdr nlhdr;
struct ifaddrmsg addrmsg;
Expand All @@ -250,7 +250,10 @@ void netlink_connector::send_ifa_request() {
get_address_msg.nlhdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
get_address_msg.nlhdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
get_address_msg.nlhdr.nlmsg_type = RTM_GETADDR;
get_address_msg.nlhdr.nlmsg_seq = 1;
// the sequence number has stored the request sequence and the retry count.
// request sequece is stored in the LSB (least significant byte) and
// retry is stored in the 2nd LSB.
get_address_msg.nlhdr.nlmsg_seq = ifa_request_sequence_ | (_retry << retry_bit_shift_);
if (address_.is_v4()) {
get_address_msg.addrmsg.ifa_family = AF_INET;
} else {
Expand All @@ -268,7 +271,7 @@ void netlink_connector::send_ifa_request() {
);
}

void netlink_connector::send_ifi_request() {
void netlink_connector::send_ifi_request(std::uint32_t _retry) {
typedef struct {
struct nlmsghdr nlhdr;
struct ifinfomsg infomsg;
Expand All @@ -279,7 +282,10 @@ void netlink_connector::send_ifi_request() {
get_link_msg.nlhdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
get_link_msg.nlhdr.nlmsg_type = RTM_GETLINK;
get_link_msg.infomsg.ifi_family = AF_UNSPEC;
get_link_msg.nlhdr.nlmsg_seq = 2;
// the sequence number has stored the request sequence and the retry count.
// request sequece is stored in the LSB (least significant byte) and
// retry is stored in the 2nd LSB.
get_link_msg.nlhdr.nlmsg_seq = ifi_request_sequence_ | (_retry << retry_bit_shift_);

{
std::lock_guard<std::mutex> its_lock(socket_mutex_);
Expand All @@ -295,7 +301,7 @@ void netlink_connector::send_ifi_request() {
}
}

void netlink_connector::send_rt_request() {
void netlink_connector::send_rt_request(std::uint32_t _retry) {
typedef struct {
struct nlmsghdr nlhdr;
struct rtgenmsg routemsg;
Expand All @@ -306,7 +312,10 @@ void netlink_connector::send_rt_request() {
get_route_msg.nlhdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
get_route_msg.nlhdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
get_route_msg.nlhdr.nlmsg_type = RTM_GETROUTE;
get_route_msg.nlhdr.nlmsg_seq = 3;
// the sequence number has stored the request sequence and the retry count.
// request sequece is stored in the LSB (least significant byte) and
// retry is stored in the 2nd LSB.
get_route_msg.nlhdr.nlmsg_seq = rt_request_sequence_ | (_retry << retry_bit_shift_);
if (multicast_address_.is_v6()) {
get_route_msg.routemsg.rtgen_family = AF_INET6;
} else {
Expand All @@ -327,6 +336,38 @@ void netlink_connector::send_rt_request() {
}
}

void netlink_connector::handle_netlink_error(struct nlmsgerr *_error_msg) {
// the sequence number has stored the request sequence and the retry count.
// retry is stored in the 2nd LSB.
std::uint32_t retry = _error_msg->msg.nlmsg_seq >> retry_bit_shift_;
if (retry >= max_retries_) {
VSOMEIP_ERROR << "netlink_connector::receive_cbk received "
"error message: " << strerror(-_error_msg->error)
<< " type " << std::dec << _error_msg->msg.nlmsg_type
<< " seq " << _error_msg->msg.nlmsg_seq;
return;
}

// the sequence number has stored the request sequence and the retry count.
// request sequece is stored in the LSB.
std::uint32_t request_sequence = _error_msg->msg.nlmsg_seq & request_sequence_bitmask_;
std::string request_type{};
if (_error_msg->msg.nlmsg_type == RTM_GETADDR && request_sequence == ifa_request_sequence_) {
request_type = "address request";
send_ifa_request(retry + 1);
} else if (_error_msg->msg.nlmsg_type == RTM_GETLINK && request_sequence == ifi_request_sequence_) {
request_type = "link request";
send_ifi_request(retry + 1);
} else if (_error_msg->msg.nlmsg_type == RTM_GETROUTE && request_sequence == rt_request_sequence_) {
request_type = "route request";
send_rt_request(retry + 1);
}

if (!request_type.empty()) {
VSOMEIP_INFO << "Retrying netlink " << request_type;
}
}

bool netlink_connector::has_address(struct ifaddrmsg * ifa_struct,
size_t length,
const unsigned int address) {
Expand Down

0 comments on commit 339a2ca

Please sign in to comment.