Browse Source

Negotiate max_msg instead of max_sge, make buffer settings more conservative :-)

master
Vitaliy Filippov 2 months ago
parent
commit
483c5ab380
  1. 15
      src/messenger.cpp
  2. 6
      src/messenger.h
  3. 22
      src/msgr_rdma.cpp
  4. 6
      src/msgr_rdma.h
  5. 7
      src/osd_secondary.cpp

15
src/messenger.cpp

@ -370,12 +370,12 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
#ifdef WITH_RDMA
if (rdma_context)
{
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge);
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge, rdma_max_msg);
if (cl->rdma_conn)
{
json11::Json payload = json11::Json::object {
{ "connect_rdma", cl->rdma_conn->addr.to_string() },
{ "rdma_max_sge", rdma_max_sge },
{ "rdma_max_msg", cl->rdma_conn->max_msg },
};
std::string payload_str = payload.dump();
op->req.show_conf.json_len = payload_str.size();
@ -448,10 +448,10 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
}
else
{
uint64_t server_max_sge = config["rdma_max_sge"].uint64_value();
if (cl->rdma_conn->max_sge > server_max_sge)
uint64_t server_max_msg = config["rdma_max_msg"].uint64_value();
if (cl->rdma_conn->max_msg > server_max_msg)
{
cl->rdma_conn->max_sge = server_max_sge;
cl->rdma_conn->max_msg = server_max_msg;
}
printf("Connected to OSD %lu using RDMA\n", cl->osd_num);
cl->peer_state = PEER_RDMA;
@ -507,8 +507,3 @@ bool osd_messenger_t::is_rdma_enabled()
{
return rdma_context != NULL;
}
uint64_t osd_messenger_t::get_rdma_max_sge()
{
return rdma_max_sge;
}

6
src/messenger.h

@ -137,7 +137,8 @@ protected:
uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0;
msgr_rdma_context_t *rdma_context = NULL;
// FIXME: Allow to configure these options
uint64_t rdma_max_sge = 128, rdma_max_send = 32, rdma_max_recv = 32;
uint64_t rdma_max_sge = 128, rdma_max_send = 32, rdma_max_recv = 8;
uint64_t rdma_max_msg = 1024*1024;
#endif
std::vector<int> read_ready_clients;
@ -170,8 +171,7 @@ public:
#ifdef WITH_RDMA
bool is_rdma_enabled();
bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge);
uint64_t get_rdma_max_sge();
bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_msg);
#endif
protected:

22
src/msgr_rdma.cpp

@ -166,7 +166,8 @@ cleanup:
return NULL;
}
msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx, uint32_t max_send, uint32_t max_recv, uint32_t max_sge)
msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx, uint32_t max_send,
uint32_t max_recv, uint32_t max_sge, uint32_t max_msg)
{
msgr_rdma_connection_t *conn = new msgr_rdma_connection_t;
@ -176,6 +177,7 @@ msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx,
conn->max_send = max_send;
conn->max_recv = max_recv;
conn->max_sge = max_sge;
conn->max_msg = max_msg;
ctx->used_max_cqe += max_send+max_recv;
if (ctx->used_max_cqe > ctx->max_cqe)
@ -296,17 +298,17 @@ int msgr_rdma_connection_t::connect(msgr_rdma_address_t *dest)
return 0;
}
bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge)
bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_msg)
{
// Try to connect to the peer using RDMA
msgr_rdma_address_t addr;
if (msgr_rdma_address_t::from_string(rdma_address.c_str(), &addr))
{
if (client_max_sge > rdma_max_sge)
if (client_max_msg > rdma_max_msg)
{
client_max_sge = rdma_max_sge;
client_max_msg = rdma_max_msg;
}
auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge);
auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge, client_max_msg);
if (rdma_conn)
{
int r = rdma_conn->connect(&addr);
@ -363,7 +365,7 @@ bool osd_messenger_t::try_send_rdma(osd_client_t *cl)
while (rc->send_pos < cl->send_list.size())
{
iovec & iov = cl->send_list[rc->send_pos];
if (op_size >= RDMA_MAX_MSG || op_sge >= rc->max_sge)
if (op_size >= rc->max_msg || op_sge >= rc->max_sge)
{
try_send_rdma_wr(cl, sge, op_sge);
op_sge = 0;
@ -373,8 +375,8 @@ bool osd_messenger_t::try_send_rdma(osd_client_t *cl)
break;
}
}
uint32_t len = (uint32_t)(op_size+iov.iov_len-rc->send_buf_pos < RDMA_MAX_MSG
? iov.iov_len-rc->send_buf_pos : RDMA_MAX_MSG-op_size);
uint32_t len = (uint32_t)(op_size+iov.iov_len-rc->send_buf_pos < rc->max_msg
? iov.iov_len-rc->send_buf_pos : rc->max_msg-op_size);
sge[op_sge++] = {
.addr = (uintptr_t)(iov.iov_base+rc->send_buf_pos),
.length = len,
@ -417,11 +419,11 @@ bool osd_messenger_t::try_recv_rdma(osd_client_t *cl)
auto rc = cl->rdma_conn;
while (rc->cur_recv < rc->max_recv)
{
void *buf = malloc_or_die(RDMA_MAX_MSG);
void *buf = malloc_or_die(rc->max_msg);
rc->recv_buffers.push_back(buf);
ibv_sge sge = {
.addr = (uintptr_t)buf,
.length = RDMA_MAX_MSG,
.length = (uint32_t)rc->max_msg,
.lkey = rc->ctx->mr->lkey,
};
try_recv_rdma_wr(cl, &sge, 1);

6
src/msgr_rdma.h

@ -6,9 +6,6 @@
#include <string>
#include <vector>
// FIXME: Allow to configure this option
#define RDMA_MAX_MSG 4194304
struct msgr_rdma_address_t
{
ibv_gid gid;
@ -49,12 +46,13 @@ struct msgr_rdma_connection_t
msgr_rdma_address_t addr;
int max_send = 0, max_recv = 0, max_sge = 0;
int cur_send = 0, cur_recv = 0;
uint64_t max_msg = 0;
int send_pos = 0, send_buf_pos = 0;
int recv_pos = 0, recv_buf_pos = 0;
std::vector<void*> recv_buffers;
~msgr_rdma_connection_t();
static msgr_rdma_connection_t *create(msgr_rdma_context_t *ctx, uint32_t max_send, uint32_t max_recv, uint32_t max_sge);
static msgr_rdma_connection_t *create(msgr_rdma_context_t *ctx, uint32_t max_send, uint32_t max_recv, uint32_t max_sge, uint32_t max_msg);
int connect(msgr_rdma_address_t *dest);
};

7
src/osd_secondary.cpp

@ -169,11 +169,12 @@ void osd_t::exec_show_config(osd_op_t *cur_op)
if (req_json["connect_rdma"].is_string())
{
// Peer is trying to connect using RDMA, try to satisfy him
bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value(), req_json["rdma_max_sge"].uint64_value());
bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value(), req_json["rdma_max_msg"].uint64_value());
if (ok)
{
wire_config["rdma_address"] = msgr.clients.at(cur_op->peer_fd)->rdma_conn->addr.to_string();
wire_config["rdma_max_sge"] = msgr.get_rdma_max_sge();
auto rc = msgr.clients.at(cur_op->peer_fd)->rdma_conn;
wire_config["rdma_address"] = rc->addr.to_string();
wire_config["rdma_max_msg"] = rc->max_msg;
}
}
}

Loading…
Cancel
Save