Browse Source

Negotiate max_sge between RDMA client & server

master
Vitaliy Filippov 2 months ago
parent
commit
9e6cbc6ebc
  1. 46
      src/fio_cluster.cpp
  2. 24
      src/messenger.cpp
  3. 5
      src/messenger.h
  4. 10
      src/msgr_rdma.cpp
  5. 4
      src/osd_secondary.cpp

46
src/fio_cluster.cpp

@ -54,6 +54,7 @@ struct sec_options
int cluster_log = 0;
int trace = 0;
int use_rdma = 0;
char *rdma_device = NULL;
int rdma_port_num = 0;
int rdma_gid_index = 0;
int rdma_mtu = 0;
@ -127,7 +128,7 @@ static struct fio_option options[] = {
},
{
.name = "use_rdma",
.lname = "OSD trace",
.lname = "Use RDMA",
.type = FIO_OPT_BOOL,
.off1 = offsetof(struct sec_options, use_rdma),
.help = "Use RDMA",
@ -135,6 +136,45 @@ static struct fio_option options[] = {
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME,
},
{
.name = "rdma_device",
.lname = "RDMA device name",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct sec_options, rdma_device),
.help = "RDMA device name",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME,
},
{
.name = "rdma_port_num",
.lname = "RDMA port number",
.type = FIO_OPT_INT,
.off1 = offsetof(struct sec_options, rdma_port_num),
.help = "RDMA port number",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME,
},
{
.name = "rdma_gid_index",
.lname = "RDMA gid index",
.type = FIO_OPT_INT,
.off1 = offsetof(struct sec_options, rdma_gid_index),
.help = "RDMA gid index",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME,
},
{
.name = "rdma_mtu",
.lname = "RDMA path MTU",
.type = FIO_OPT_INT,
.off1 = offsetof(struct sec_options, rdma_mtu),
.help = "RDMA path MTU",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME,
},
{
.name = NULL,
},
@ -171,6 +211,10 @@ static int sec_setup(struct thread_data *td)
{ "etcd_prefix", std::string(o->etcd_prefix ? o->etcd_prefix : "/vitastor") },
{ "log_level", o->cluster_log },
{ "use_rdma", o->use_rdma },
{ "rdma_device", std::string(o->rdma_device ? o->rdma_device : "") },
{ "rdma_port_num", o->rdma_port_num },
{ "rdma_gid_index", o->rdma_gid_index },
{ "rdma_mtu", o->rdma_mtu },
};
if (!o->image)

24
src/messenger.cpp

@ -25,6 +25,8 @@ void osd_messenger_t::init()
}
else
{
rdma_max_sge = rdma_max_sge < rdma_context->attrx.orig_attr.max_sge
? rdma_max_sge : rdma_context->attrx.orig_attr.max_sge;
printf("[OSD %lu] RDMA initialized successfully\n", osd_num);
fcntl(rdma_context->channel->fd, F_SETFL, fcntl(rdma_context->channel->fd, F_GETFL, 0) | O_NONBLOCK);
tfd->set_fd_handler(rdma_context->channel->fd, false, [this](int notify_fd, int epoll_events)
@ -356,9 +358,6 @@ void osd_messenger_t::on_connect_peer(osd_num_t peer_osd, int peer_fd)
void osd_messenger_t::check_peer_config(osd_client_t *cl)
{
#ifdef WITH_RDMA
msgr_rdma_connection_t *rdma_conn = NULL;
#endif
osd_op_t *op = new osd_op_t();
op->op_type = OSD_OP_OUT;
op->peer_fd = cl->peer_fd;
@ -374,11 +373,12 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
#ifdef WITH_RDMA
if (rdma_context)
{
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, max_rdma_send, max_rdma_recv, max_rdma_sge);
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge);
if (cl->rdma_conn)
{
json11::Json payload = json11::Json::object {
{ "connect_rdma", cl->rdma_conn->addr.to_string() },
{ "rdma_max_sge", rdma_max_sge },
};
std::string payload_str = payload.dump();
op->req.show_conf.json_len = payload_str.size();
@ -388,11 +388,7 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
}
}
#endif
op->callback = [this, cl
#ifdef WITH_RDMA
, rdma_conn
#endif
](osd_op_t *op)
op->callback = [this, cl](osd_op_t *op)
{
std::string json_err;
json11::Json config;
@ -455,6 +451,11 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
}
else
{
uint64_t server_max_sge = config["rdma_max_sge"].uint64_value();
if (cl->rdma_conn->max_sge > server_max_sge)
{
cl->rdma_conn->max_sge = server_max_sge;
}
printf("Connected to OSD %lu using RDMA\n", cl->osd_num);
cl->peer_state = PEER_RDMA;
tfd->set_fd_handler(cl->peer_fd, false, NULL);
@ -509,3 +510,8 @@ bool osd_messenger_t::is_rdma_enabled()
{
return rdma_context != NULL;
}
uint64_t osd_messenger_t::get_rdma_max_sge()
{
return rdma_max_sge;
}

5
src/messenger.h

@ -137,7 +137,7 @@ protected:
std::string rdma_device;
uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0;
msgr_rdma_context_t *rdma_context = NULL;
int max_rdma_sge = 128, max_rdma_send = 32, max_rdma_recv = 32;
uint64_t rdma_max_sge = 128, rdma_max_send = 32, rdma_max_recv = 32;
#endif
std::vector<int> read_ready_clients;
@ -170,7 +170,8 @@ public:
#ifdef WITH_RDMA
bool is_rdma_enabled();
bool connect_rdma(int peer_fd, std::string rdma_address);
bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge);
uint64_t get_rdma_max_sge();
#endif
protected:

10
src/msgr_rdma.cpp

@ -293,13 +293,17 @@ int msgr_rdma_connection_t::connect(msgr_rdma_address_t *dest)
return 0;
}
bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address)
bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge)
{
// Try to connect to the peer using RDMA
msgr_rdma_address_t addr;
if (msgr_rdma_address_t::from_string(rdma_address.c_str(), &addr))
{
auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, max_rdma_send, max_rdma_recv, max_rdma_sge);
if (client_max_sge > rdma_max_sge)
{
client_max_sge = rdma_max_sge;
}
auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge);
if (rdma_conn)
{
int r = rdma_conn->connect(&addr);
@ -352,7 +356,6 @@ bool osd_messenger_t::try_send_rdma(osd_client_t *cl)
return true;
}
int op_size = 0, op_sge = 0, op_max = rc->max_sge*bs_bitmap_granularity;
// FIXME: rc->max_sge should be negotiated between client & server
ibv_sge sge[rc->max_sge];
while (rc->send_pos < cl->send_list.size())
{
@ -448,7 +451,6 @@ bool osd_messenger_t::try_recv_rdma(osd_client_t *cl)
}
int op_size = 0, op_sge = 0, op_max = rc->max_sge*bs_bitmap_granularity;
iovec *segments = cl->recv_list.get_iovec();
// FIXME: rc->max_sge should be negotiated between client & server
ibv_sge sge[rc->max_sge];
while (rc->recv_pos < cl->recv_list.get_size())
{

4
src/osd_secondary.cpp

@ -169,11 +169,11 @@ void osd_t::exec_show_config(osd_op_t *cur_op)
if (req_json["connect_rdma"].is_string())
{
// Peer is trying to connect using RDMA, try to satisfy him
bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value());
bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value(), req_json["rdma_max_sge"].uint64_value());
if (ok)
{
wire_config["rdma_connected"] = true;
wire_config["rdma_address"] = msgr.clients.at(cur_op->peer_fd)->rdma_conn->addr.to_string();
wire_config["rdma_max_sge"] = msgr.get_rdma_max_sge();
}
}
}

Loading…
Cancel
Save