diff --git a/src/messenger.cpp b/src/messenger.cpp index bfcd4b9b..61beacd2 100644 --- a/src/messenger.cpp +++ b/src/messenger.cpp @@ -370,12 +370,12 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl) #ifdef WITH_RDMA if (rdma_context) { - cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge); + cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge, rdma_max_msg); if (cl->rdma_conn) { json11::Json payload = json11::Json::object { { "connect_rdma", cl->rdma_conn->addr.to_string() }, - { "rdma_max_sge", rdma_max_sge }, + { "rdma_max_msg", cl->rdma_conn->max_msg }, }; std::string payload_str = payload.dump(); op->req.show_conf.json_len = payload_str.size(); @@ -448,10 +448,10 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl) } else { - uint64_t server_max_sge = config["rdma_max_sge"].uint64_value(); - if (cl->rdma_conn->max_sge > server_max_sge) + uint64_t server_max_msg = config["rdma_max_msg"].uint64_value(); + if (cl->rdma_conn->max_msg > server_max_msg) { - cl->rdma_conn->max_sge = server_max_sge; + cl->rdma_conn->max_msg = server_max_msg; } printf("Connected to OSD %lu using RDMA\n", cl->osd_num); cl->peer_state = PEER_RDMA; @@ -507,8 +507,3 @@ bool osd_messenger_t::is_rdma_enabled() { return rdma_context != NULL; } - -uint64_t osd_messenger_t::get_rdma_max_sge() -{ - return rdma_max_sge; -} diff --git a/src/messenger.h b/src/messenger.h index 2d2abef8..a98b5aa7 100644 --- a/src/messenger.h +++ b/src/messenger.h @@ -137,7 +137,8 @@ protected: uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0; msgr_rdma_context_t *rdma_context = NULL; // FIXME: Allow to configure these options - uint64_t rdma_max_sge = 128, rdma_max_send = 32, rdma_max_recv = 32; + uint64_t rdma_max_sge = 128, rdma_max_send = 32, rdma_max_recv = 8; + uint64_t rdma_max_msg = 1024*1024; #endif std::vector read_ready_clients; @@ -170,8 +171,7 @@ public: #ifdef WITH_RDMA bool is_rdma_enabled(); - bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge); - uint64_t get_rdma_max_sge(); + bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_msg); #endif protected: diff --git a/src/msgr_rdma.cpp b/src/msgr_rdma.cpp index 061a0278..e082bf32 100644 --- a/src/msgr_rdma.cpp +++ b/src/msgr_rdma.cpp @@ -166,7 +166,8 @@ cleanup: return NULL; } -msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx, uint32_t max_send, uint32_t max_recv, uint32_t max_sge) +msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx, uint32_t max_send, + uint32_t max_recv, uint32_t max_sge, uint32_t max_msg) { msgr_rdma_connection_t *conn = new msgr_rdma_connection_t; @@ -176,6 +177,7 @@ msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx, conn->max_send = max_send; conn->max_recv = max_recv; conn->max_sge = max_sge; + conn->max_msg = max_msg; ctx->used_max_cqe += max_send+max_recv; if (ctx->used_max_cqe > ctx->max_cqe) @@ -296,17 +298,17 @@ int msgr_rdma_connection_t::connect(msgr_rdma_address_t *dest) return 0; } -bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge) +bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_msg) { // Try to connect to the peer using RDMA msgr_rdma_address_t addr; if (msgr_rdma_address_t::from_string(rdma_address.c_str(), &addr)) { - if (client_max_sge > rdma_max_sge) + if (client_max_msg > rdma_max_msg) { - client_max_sge = rdma_max_sge; + client_max_msg = rdma_max_msg; } - auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge); + auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge, client_max_msg); if (rdma_conn) { int r = rdma_conn->connect(&addr); @@ -363,7 +365,7 @@ bool osd_messenger_t::try_send_rdma(osd_client_t *cl) while (rc->send_pos < cl->send_list.size()) { iovec & iov = cl->send_list[rc->send_pos]; - if (op_size >= RDMA_MAX_MSG || op_sge >= rc->max_sge) + if (op_size >= rc->max_msg || op_sge >= rc->max_sge) { try_send_rdma_wr(cl, sge, op_sge); op_sge = 0; @@ -373,8 +375,8 @@ bool osd_messenger_t::try_send_rdma(osd_client_t *cl) break; } } - uint32_t len = (uint32_t)(op_size+iov.iov_len-rc->send_buf_pos < RDMA_MAX_MSG - ? iov.iov_len-rc->send_buf_pos : RDMA_MAX_MSG-op_size); + uint32_t len = (uint32_t)(op_size+iov.iov_len-rc->send_buf_pos < rc->max_msg + ? iov.iov_len-rc->send_buf_pos : rc->max_msg-op_size); sge[op_sge++] = { .addr = (uintptr_t)(iov.iov_base+rc->send_buf_pos), .length = len, @@ -417,11 +419,11 @@ bool osd_messenger_t::try_recv_rdma(osd_client_t *cl) auto rc = cl->rdma_conn; while (rc->cur_recv < rc->max_recv) { - void *buf = malloc_or_die(RDMA_MAX_MSG); + void *buf = malloc_or_die(rc->max_msg); rc->recv_buffers.push_back(buf); ibv_sge sge = { .addr = (uintptr_t)buf, - .length = RDMA_MAX_MSG, + .length = (uint32_t)rc->max_msg, .lkey = rc->ctx->mr->lkey, }; try_recv_rdma_wr(cl, &sge, 1); diff --git a/src/msgr_rdma.h b/src/msgr_rdma.h index 92573140..4f257707 100644 --- a/src/msgr_rdma.h +++ b/src/msgr_rdma.h @@ -6,9 +6,6 @@ #include #include -// FIXME: Allow to configure this option -#define RDMA_MAX_MSG 4194304 - struct msgr_rdma_address_t { ibv_gid gid; @@ -49,12 +46,13 @@ struct msgr_rdma_connection_t msgr_rdma_address_t addr; int max_send = 0, max_recv = 0, max_sge = 0; int cur_send = 0, cur_recv = 0; + uint64_t max_msg = 0; int send_pos = 0, send_buf_pos = 0; int recv_pos = 0, recv_buf_pos = 0; std::vector recv_buffers; ~msgr_rdma_connection_t(); - static msgr_rdma_connection_t *create(msgr_rdma_context_t *ctx, uint32_t max_send, uint32_t max_recv, uint32_t max_sge); + static msgr_rdma_connection_t *create(msgr_rdma_context_t *ctx, uint32_t max_send, uint32_t max_recv, uint32_t max_sge, uint32_t max_msg); int connect(msgr_rdma_address_t *dest); }; diff --git a/src/osd_secondary.cpp b/src/osd_secondary.cpp index a674ac9d..3487bedc 100644 --- a/src/osd_secondary.cpp +++ b/src/osd_secondary.cpp @@ -169,11 +169,12 @@ void osd_t::exec_show_config(osd_op_t *cur_op) if (req_json["connect_rdma"].is_string()) { // Peer is trying to connect using RDMA, try to satisfy him - bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value(), req_json["rdma_max_sge"].uint64_value()); + bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value(), req_json["rdma_max_msg"].uint64_value()); if (ok) { - wire_config["rdma_address"] = msgr.clients.at(cur_op->peer_fd)->rdma_conn->addr.to_string(); - wire_config["rdma_max_sge"] = msgr.get_rdma_max_sge(); + auto rc = msgr.clients.at(cur_op->peer_fd)->rdma_conn; + wire_config["rdma_address"] = rc->addr.to_string(); + wire_config["rdma_max_msg"] = rc->max_msg; } } }