Copilot commented on code in PR #3304:
URL: https://github.com/apache/brpc/pull/3304#discussion_r3270877769
##########
src/brpc/rdma/rdma_endpoint.cpp:
##########
@@ -81,14 +81,16 @@ static const size_t RESERVED_WR_NUM = 3;
// block size (4B)
// sq size (2B)
// rq size (2B)
+// lid size (2B)
// GID (16B)
// QP number (4B)
+// mtu type (2B)
static const char* MAGIC_STR = "RDMA";
static const size_t MAGIC_STR_LEN = 4;
-static const size_t HELLO_MSG_LEN_MIN = 40;
+static const size_t HELLO_MSG_LEN_MIN = 42;
// static const size_t HELLO_MSG_LEN_MAX = 4096;
static const size_t ACK_MSG_LEN = 4;
-static uint16_t g_rdma_hello_msg_len = 40; // In Byte
+static uint16_t g_rdma_hello_msg_len = 42; // In Byte
static uint16_t g_rdma_hello_version = 2;
Review Comment:
The handshake wire format changed
(`HELLO_MSG_LEN_MIN`/`g_rdma_hello_msg_len` increased to 42 and `mtu_type`
added) but `g_rdma_hello_version` is still 2 and the receive path still reads a
fixed `HELLO_MSG_LEN_MIN - 4` bytes before knowing the peer’s `msg_len`. This
will break interoperability with older peers (and can cause the reader to
consume bytes from the next message). Consider (a) bumping the hello version
and making negotiation accept both old/new versions, and (b) reading the
fixed-size header first to learn `msg_len`, then conditionally reading/decoding
optional fields like `mtu_type`.
##########
src/brpc/rdma/rdma_endpoint.cpp:
##########
@@ -1232,12 +1249,27 @@ int RdmaEndpoint::AllocateResources() {
return 0;
}
-int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid gid, uint32_t qp_num) {
+int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid gid, uint32_t qp_num,
uint16_t mtu_type) {
if (BAIDU_UNLIKELY(g_skip_rdma_init)) {
// For UT
return 0;
}
+ if (mtu_type == IBV_MTU_256) {
+ LOG(INFO) << "negotiated mtu is 256";
+ } else if (mtu_type == IBV_MTU_512) {
+ LOG(INFO) << "negotiated mtu is 512";
+ } else if (mtu_type == IBV_MTU_1024) {
+ LOG(INFO) << "negotiated mtu is 1024";
+ } else if (mtu_type == IBV_MTU_2048) {
+ LOG(INFO) << "negotiated mtu is 2048";
+ } else if (mtu_type == IBV_MTU_4096) {
+ LOG(INFO) << "negotiated mtu is 4096";
Review Comment:
`BringUpQp()` now logs the negotiated MTU at INFO for every connection
setup. Since QP bring-up happens per-connection, this can generate high-volume
logs in production. Consider gating these messages behind
`FLAGS_rdma_trace_verbose` (or using VLOG) and keeping INFO reserved for
one-time initialization logs.
##########
src/brpc/rdma/rdma_endpoint.cpp:
##########
@@ -534,7 +545,9 @@ void* RdmaEndpoint::ProcessHandshakeAtClient(void* arg) {
ep->_local_window_capacity, butil::memory_order_relaxed);
ep->_state = C_BRINGUP_QP;
- if (ep->BringUpQp(remote_msg.lid, remote_msg.gid, remote_msg.qp_num) <
0) {
+ // use the minimum of local mtu type and remote mtu type
+ uint16_t min_mtu_type = std::min(local_mtu_type, remote_msg.mtu_type);
+ if (ep->BringUpQp(remote_msg.lid, remote_msg.gid, remote_msg.qp_num,
min_mtu_type) < 0) {
LOG(WARNING) << "Fail to bringup QP, fallback to tcp:" <<
s->description();
Review Comment:
The new MTU negotiation path (`min_mtu_type` derived from local/remote hello
messages) changes handshake/QP bring-up behavior, but there are existing RDMA
handshake unit tests (e.g. `test/brpc_rdma_unittest.cpp`) that were written
around the previous 40-byte hello message and do not cover MTU negotiation.
Please update/add tests to validate (1) hello parsing for both old/new message
lengths (if backward compatibility is intended) and (2) that `path_mtu` is set
from the negotiated MTU during bring-up.
##########
src/brpc/rdma/rdma_helper.cpp:
##########
@@ -455,6 +457,36 @@ static ibv_context* OpenDevice(int num_total, int*
num_available_devices) {
return ret_context;
}
+static uint16_t detect_mtu(struct ibv_context* ctx, int port_num) {
+ struct ibv_port_attr port_attr;
+
+ if (ibv_query_port(ctx, port_num, &port_attr)) {
+ LOG(ERROR) << "ibv_query_port failed";
Review Comment:
`detect_mtu()` calls `ibv_query_port` directly, bypassing the
dynamically-loaded `IbvQueryPort` function pointer used elsewhere in this file.
This is inconsistent with the rest of the RDMA helper and can break the
intended indirection (and error handling style) around ibverbs symbols. Use
`IbvQueryPort(ctx, ...)` here and propagate/log the error similarly to other
port queries in this file.
##########
src/brpc/rdma/rdma_endpoint.cpp:
##########
@@ -681,6 +697,7 @@ void* RdmaEndpoint::ProcessHandshakeAtServer(void* arg) {
// Only happens in UT
local_msg.qp_num = 0;
}
+ local_msg.mtu_type = local_mtu_type;
}
memcpy(data, MAGIC_STR, 4);
local_msg.Serialize((char*)data + 4);
Review Comment:
In the server handshake, `local_msg.mtu_type` is only set in the `RDMA_ON`
branch. Since `HelloMessage` is not value-initialized, the `RDMA_OFF` branch
can still serialize and send uninitialized bytes for fields like `mtu_type`
(and potentially others), leaking stack contents over the wire. Please
value-initialize `HelloMessage` (e.g. `{}`) and/or explicitly set all
serialized fields in the `RDMA_OFF` path.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]