Negotiate max_msg instead of max_sge, make buffer settings more conservative :-)

allow-etcd-address-option
Vitaliy Filippov 2021-04-29 01:39:32 +03:00
parent 6a6fd6544d
commit 483c5ab380
5 changed files with 26 additions and 30 deletions

View File

@ -370,12 +370,12 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
#ifdef WITH_RDMA #ifdef WITH_RDMA
if (rdma_context) if (rdma_context)
{ {
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge); cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge, rdma_max_msg);
if (cl->rdma_conn) if (cl->rdma_conn)
{ {
json11::Json payload = json11::Json::object { json11::Json payload = json11::Json::object {
{ "connect_rdma", cl->rdma_conn->addr.to_string() }, { "connect_rdma", cl->rdma_conn->addr.to_string() },
{ "rdma_max_sge", rdma_max_sge }, { "rdma_max_msg", cl->rdma_conn->max_msg },
}; };
std::string payload_str = payload.dump(); std::string payload_str = payload.dump();
op->req.show_conf.json_len = payload_str.size(); op->req.show_conf.json_len = payload_str.size();
@ -448,10 +448,10 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
} }
else else
{ {
uint64_t server_max_sge = config["rdma_max_sge"].uint64_value(); uint64_t server_max_msg = config["rdma_max_msg"].uint64_value();
if (cl->rdma_conn->max_sge > server_max_sge) if (cl->rdma_conn->max_msg > server_max_msg)
{ {
cl->rdma_conn->max_sge = server_max_sge; cl->rdma_conn->max_msg = server_max_msg;
} }
printf("Connected to OSD %lu using RDMA\n", cl->osd_num); printf("Connected to OSD %lu using RDMA\n", cl->osd_num);
cl->peer_state = PEER_RDMA; cl->peer_state = PEER_RDMA;
@ -507,8 +507,3 @@ bool osd_messenger_t::is_rdma_enabled()
{ {
return rdma_context != NULL; return rdma_context != NULL;
} }
uint64_t osd_messenger_t::get_rdma_max_sge()
{
return rdma_max_sge;
}

View File

@ -137,7 +137,8 @@ protected:
uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0; uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0;
msgr_rdma_context_t *rdma_context = NULL; msgr_rdma_context_t *rdma_context = NULL;
// FIXME: Allow to configure these options // FIXME: Allow to configure these options
uint64_t rdma_max_sge = 128, rdma_max_send = 32, rdma_max_recv = 32; uint64_t rdma_max_sge = 128, rdma_max_send = 32, rdma_max_recv = 8;
uint64_t rdma_max_msg = 1024*1024;
#endif #endif
std::vector<int> read_ready_clients; std::vector<int> read_ready_clients;
@ -170,8 +171,7 @@ public:
#ifdef WITH_RDMA #ifdef WITH_RDMA
bool is_rdma_enabled(); bool is_rdma_enabled();
bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge); bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_msg);
uint64_t get_rdma_max_sge();
#endif #endif
protected: protected:

View File

@ -166,7 +166,8 @@ cleanup:
return NULL; return NULL;
} }
msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx, uint32_t max_send, uint32_t max_recv, uint32_t max_sge) msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx, uint32_t max_send,
uint32_t max_recv, uint32_t max_sge, uint32_t max_msg)
{ {
msgr_rdma_connection_t *conn = new msgr_rdma_connection_t; msgr_rdma_connection_t *conn = new msgr_rdma_connection_t;
@ -176,6 +177,7 @@ msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx,
conn->max_send = max_send; conn->max_send = max_send;
conn->max_recv = max_recv; conn->max_recv = max_recv;
conn->max_sge = max_sge; conn->max_sge = max_sge;
conn->max_msg = max_msg;
ctx->used_max_cqe += max_send+max_recv; ctx->used_max_cqe += max_send+max_recv;
if (ctx->used_max_cqe > ctx->max_cqe) if (ctx->used_max_cqe > ctx->max_cqe)
@ -296,17 +298,17 @@ int msgr_rdma_connection_t::connect(msgr_rdma_address_t *dest)
return 0; return 0;
} }
bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge) bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_msg)
{ {
// Try to connect to the peer using RDMA // Try to connect to the peer using RDMA
msgr_rdma_address_t addr; msgr_rdma_address_t addr;
if (msgr_rdma_address_t::from_string(rdma_address.c_str(), &addr)) if (msgr_rdma_address_t::from_string(rdma_address.c_str(), &addr))
{ {
if (client_max_sge > rdma_max_sge) if (client_max_msg > rdma_max_msg)
{ {
client_max_sge = rdma_max_sge; client_max_msg = rdma_max_msg;
} }
auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge); auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge, client_max_msg);
if (rdma_conn) if (rdma_conn)
{ {
int r = rdma_conn->connect(&addr); int r = rdma_conn->connect(&addr);
@ -363,7 +365,7 @@ bool osd_messenger_t::try_send_rdma(osd_client_t *cl)
while (rc->send_pos < cl->send_list.size()) while (rc->send_pos < cl->send_list.size())
{ {
iovec & iov = cl->send_list[rc->send_pos]; iovec & iov = cl->send_list[rc->send_pos];
if (op_size >= RDMA_MAX_MSG || op_sge >= rc->max_sge) if (op_size >= rc->max_msg || op_sge >= rc->max_sge)
{ {
try_send_rdma_wr(cl, sge, op_sge); try_send_rdma_wr(cl, sge, op_sge);
op_sge = 0; op_sge = 0;
@ -373,8 +375,8 @@ bool osd_messenger_t::try_send_rdma(osd_client_t *cl)
break; break;
} }
} }
uint32_t len = (uint32_t)(op_size+iov.iov_len-rc->send_buf_pos < RDMA_MAX_MSG uint32_t len = (uint32_t)(op_size+iov.iov_len-rc->send_buf_pos < rc->max_msg
? iov.iov_len-rc->send_buf_pos : RDMA_MAX_MSG-op_size); ? iov.iov_len-rc->send_buf_pos : rc->max_msg-op_size);
sge[op_sge++] = { sge[op_sge++] = {
.addr = (uintptr_t)(iov.iov_base+rc->send_buf_pos), .addr = (uintptr_t)(iov.iov_base+rc->send_buf_pos),
.length = len, .length = len,
@ -417,11 +419,11 @@ bool osd_messenger_t::try_recv_rdma(osd_client_t *cl)
auto rc = cl->rdma_conn; auto rc = cl->rdma_conn;
while (rc->cur_recv < rc->max_recv) while (rc->cur_recv < rc->max_recv)
{ {
void *buf = malloc_or_die(RDMA_MAX_MSG); void *buf = malloc_or_die(rc->max_msg);
rc->recv_buffers.push_back(buf); rc->recv_buffers.push_back(buf);
ibv_sge sge = { ibv_sge sge = {
.addr = (uintptr_t)buf, .addr = (uintptr_t)buf,
.length = RDMA_MAX_MSG, .length = (uint32_t)rc->max_msg,
.lkey = rc->ctx->mr->lkey, .lkey = rc->ctx->mr->lkey,
}; };
try_recv_rdma_wr(cl, &sge, 1); try_recv_rdma_wr(cl, &sge, 1);

View File

@ -6,9 +6,6 @@
#include <string> #include <string>
#include <vector> #include <vector>
// FIXME: Allow to configure this option
#define RDMA_MAX_MSG 4194304
struct msgr_rdma_address_t struct msgr_rdma_address_t
{ {
ibv_gid gid; ibv_gid gid;
@ -49,12 +46,13 @@ struct msgr_rdma_connection_t
msgr_rdma_address_t addr; msgr_rdma_address_t addr;
int max_send = 0, max_recv = 0, max_sge = 0; int max_send = 0, max_recv = 0, max_sge = 0;
int cur_send = 0, cur_recv = 0; int cur_send = 0, cur_recv = 0;
uint64_t max_msg = 0;
int send_pos = 0, send_buf_pos = 0; int send_pos = 0, send_buf_pos = 0;
int recv_pos = 0, recv_buf_pos = 0; int recv_pos = 0, recv_buf_pos = 0;
std::vector<void*> recv_buffers; std::vector<void*> recv_buffers;
~msgr_rdma_connection_t(); ~msgr_rdma_connection_t();
static msgr_rdma_connection_t *create(msgr_rdma_context_t *ctx, uint32_t max_send, uint32_t max_recv, uint32_t max_sge); static msgr_rdma_connection_t *create(msgr_rdma_context_t *ctx, uint32_t max_send, uint32_t max_recv, uint32_t max_sge, uint32_t max_msg);
int connect(msgr_rdma_address_t *dest); int connect(msgr_rdma_address_t *dest);
}; };

View File

@ -169,11 +169,12 @@ void osd_t::exec_show_config(osd_op_t *cur_op)
if (req_json["connect_rdma"].is_string()) if (req_json["connect_rdma"].is_string())
{ {
// Peer is trying to connect using RDMA, try to satisfy him // Peer is trying to connect using RDMA, try to satisfy him
bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value(), req_json["rdma_max_sge"].uint64_value()); bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value(), req_json["rdma_max_msg"].uint64_value());
if (ok) if (ok)
{ {
wire_config["rdma_address"] = msgr.clients.at(cur_op->peer_fd)->rdma_conn->addr.to_string(); auto rc = msgr.clients.at(cur_op->peer_fd)->rdma_conn;
wire_config["rdma_max_sge"] = msgr.get_rdma_max_sge(); wire_config["rdma_address"] = rc->addr.to_string();
wire_config["rdma_max_msg"] = rc->max_msg;
} }
} }
} }