Negotiate max_sge between RDMA client & server

allow-etcd-address-option
Vitaliy Filippov 2021-04-29 00:12:06 +03:00
parent ce777319c3
commit 9e6cbc6ebc
5 changed files with 71 additions and 18 deletions

View File

@ -54,6 +54,7 @@ struct sec_options
int cluster_log = 0; int cluster_log = 0;
int trace = 0; int trace = 0;
int use_rdma = 0; int use_rdma = 0;
char *rdma_device = NULL;
int rdma_port_num = 0; int rdma_port_num = 0;
int rdma_gid_index = 0; int rdma_gid_index = 0;
int rdma_mtu = 0; int rdma_mtu = 0;
@ -127,7 +128,7 @@ static struct fio_option options[] = {
}, },
{ {
.name = "use_rdma", .name = "use_rdma",
.lname = "OSD trace", .lname = "Use RDMA",
.type = FIO_OPT_BOOL, .type = FIO_OPT_BOOL,
.off1 = offsetof(struct sec_options, use_rdma), .off1 = offsetof(struct sec_options, use_rdma),
.help = "Use RDMA", .help = "Use RDMA",
@ -135,6 +136,45 @@ static struct fio_option options[] = {
.category = FIO_OPT_C_ENGINE, .category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME, .group = FIO_OPT_G_FILENAME,
}, },
{
.name = "rdma_device",
.lname = "RDMA device name",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct sec_options, rdma_device),
.help = "RDMA device name",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME,
},
{
.name = "rdma_port_num",
.lname = "RDMA port number",
.type = FIO_OPT_INT,
.off1 = offsetof(struct sec_options, rdma_port_num),
.help = "RDMA port number",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME,
},
{
.name = "rdma_gid_index",
.lname = "RDMA gid index",
.type = FIO_OPT_INT,
.off1 = offsetof(struct sec_options, rdma_gid_index),
.help = "RDMA gid index",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME,
},
{
.name = "rdma_mtu",
.lname = "RDMA path MTU",
.type = FIO_OPT_INT,
.off1 = offsetof(struct sec_options, rdma_mtu),
.help = "RDMA path MTU",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_FILENAME,
},
{ {
.name = NULL, .name = NULL,
}, },
@ -171,6 +211,10 @@ static int sec_setup(struct thread_data *td)
{ "etcd_prefix", std::string(o->etcd_prefix ? o->etcd_prefix : "/vitastor") }, { "etcd_prefix", std::string(o->etcd_prefix ? o->etcd_prefix : "/vitastor") },
{ "log_level", o->cluster_log }, { "log_level", o->cluster_log },
{ "use_rdma", o->use_rdma }, { "use_rdma", o->use_rdma },
{ "rdma_device", std::string(o->rdma_device ? o->rdma_device : "") },
{ "rdma_port_num", o->rdma_port_num },
{ "rdma_gid_index", o->rdma_gid_index },
{ "rdma_mtu", o->rdma_mtu },
}; };
if (!o->image) if (!o->image)

View File

@ -25,6 +25,8 @@ void osd_messenger_t::init()
} }
else else
{ {
rdma_max_sge = rdma_max_sge < rdma_context->attrx.orig_attr.max_sge
? rdma_max_sge : rdma_context->attrx.orig_attr.max_sge;
printf("[OSD %lu] RDMA initialized successfully\n", osd_num); printf("[OSD %lu] RDMA initialized successfully\n", osd_num);
fcntl(rdma_context->channel->fd, F_SETFL, fcntl(rdma_context->channel->fd, F_GETFL, 0) | O_NONBLOCK); fcntl(rdma_context->channel->fd, F_SETFL, fcntl(rdma_context->channel->fd, F_GETFL, 0) | O_NONBLOCK);
tfd->set_fd_handler(rdma_context->channel->fd, false, [this](int notify_fd, int epoll_events) tfd->set_fd_handler(rdma_context->channel->fd, false, [this](int notify_fd, int epoll_events)
@ -356,9 +358,6 @@ void osd_messenger_t::on_connect_peer(osd_num_t peer_osd, int peer_fd)
void osd_messenger_t::check_peer_config(osd_client_t *cl) void osd_messenger_t::check_peer_config(osd_client_t *cl)
{ {
#ifdef WITH_RDMA
msgr_rdma_connection_t *rdma_conn = NULL;
#endif
osd_op_t *op = new osd_op_t(); osd_op_t *op = new osd_op_t();
op->op_type = OSD_OP_OUT; op->op_type = OSD_OP_OUT;
op->peer_fd = cl->peer_fd; op->peer_fd = cl->peer_fd;
@ -374,11 +373,12 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
#ifdef WITH_RDMA #ifdef WITH_RDMA
if (rdma_context) if (rdma_context)
{ {
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, max_rdma_send, max_rdma_recv, max_rdma_sge); cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge);
if (cl->rdma_conn) if (cl->rdma_conn)
{ {
json11::Json payload = json11::Json::object { json11::Json payload = json11::Json::object {
{ "connect_rdma", cl->rdma_conn->addr.to_string() }, { "connect_rdma", cl->rdma_conn->addr.to_string() },
{ "rdma_max_sge", rdma_max_sge },
}; };
std::string payload_str = payload.dump(); std::string payload_str = payload.dump();
op->req.show_conf.json_len = payload_str.size(); op->req.show_conf.json_len = payload_str.size();
@ -388,11 +388,7 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
} }
} }
#endif #endif
op->callback = [this, cl op->callback = [this, cl](osd_op_t *op)
#ifdef WITH_RDMA
, rdma_conn
#endif
](osd_op_t *op)
{ {
std::string json_err; std::string json_err;
json11::Json config; json11::Json config;
@ -455,6 +451,11 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
} }
else else
{ {
uint64_t server_max_sge = config["rdma_max_sge"].uint64_value();
if (cl->rdma_conn->max_sge > server_max_sge)
{
cl->rdma_conn->max_sge = server_max_sge;
}
printf("Connected to OSD %lu using RDMA\n", cl->osd_num); printf("Connected to OSD %lu using RDMA\n", cl->osd_num);
cl->peer_state = PEER_RDMA; cl->peer_state = PEER_RDMA;
tfd->set_fd_handler(cl->peer_fd, false, NULL); tfd->set_fd_handler(cl->peer_fd, false, NULL);
@ -509,3 +510,8 @@ bool osd_messenger_t::is_rdma_enabled()
{ {
return rdma_context != NULL; return rdma_context != NULL;
} }
uint64_t osd_messenger_t::get_rdma_max_sge()
{
return rdma_max_sge;
}

View File

@ -137,7 +137,7 @@ protected:
std::string rdma_device; std::string rdma_device;
uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0; uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0;
msgr_rdma_context_t *rdma_context = NULL; msgr_rdma_context_t *rdma_context = NULL;
int max_rdma_sge = 128, max_rdma_send = 32, max_rdma_recv = 32; uint64_t rdma_max_sge = 128, rdma_max_send = 32, rdma_max_recv = 32;
#endif #endif
std::vector<int> read_ready_clients; std::vector<int> read_ready_clients;
@ -170,7 +170,8 @@ public:
#ifdef WITH_RDMA #ifdef WITH_RDMA
bool is_rdma_enabled(); bool is_rdma_enabled();
bool connect_rdma(int peer_fd, std::string rdma_address); bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge);
uint64_t get_rdma_max_sge();
#endif #endif
protected: protected:

View File

@ -293,13 +293,17 @@ int msgr_rdma_connection_t::connect(msgr_rdma_address_t *dest)
return 0; return 0;
} }
bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address) bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge)
{ {
// Try to connect to the peer using RDMA // Try to connect to the peer using RDMA
msgr_rdma_address_t addr; msgr_rdma_address_t addr;
if (msgr_rdma_address_t::from_string(rdma_address.c_str(), &addr)) if (msgr_rdma_address_t::from_string(rdma_address.c_str(), &addr))
{ {
auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, max_rdma_send, max_rdma_recv, max_rdma_sge); if (client_max_sge > rdma_max_sge)
{
client_max_sge = rdma_max_sge;
}
auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge);
if (rdma_conn) if (rdma_conn)
{ {
int r = rdma_conn->connect(&addr); int r = rdma_conn->connect(&addr);
@ -352,7 +356,6 @@ bool osd_messenger_t::try_send_rdma(osd_client_t *cl)
return true; return true;
} }
int op_size = 0, op_sge = 0, op_max = rc->max_sge*bs_bitmap_granularity; int op_size = 0, op_sge = 0, op_max = rc->max_sge*bs_bitmap_granularity;
// FIXME: rc->max_sge should be negotiated between client & server
ibv_sge sge[rc->max_sge]; ibv_sge sge[rc->max_sge];
while (rc->send_pos < cl->send_list.size()) while (rc->send_pos < cl->send_list.size())
{ {
@ -448,7 +451,6 @@ bool osd_messenger_t::try_recv_rdma(osd_client_t *cl)
} }
int op_size = 0, op_sge = 0, op_max = rc->max_sge*bs_bitmap_granularity; int op_size = 0, op_sge = 0, op_max = rc->max_sge*bs_bitmap_granularity;
iovec *segments = cl->recv_list.get_iovec(); iovec *segments = cl->recv_list.get_iovec();
// FIXME: rc->max_sge should be negotiated between client & server
ibv_sge sge[rc->max_sge]; ibv_sge sge[rc->max_sge];
while (rc->recv_pos < cl->recv_list.get_size()) while (rc->recv_pos < cl->recv_list.get_size())
{ {

View File

@ -169,11 +169,11 @@ void osd_t::exec_show_config(osd_op_t *cur_op)
if (req_json["connect_rdma"].is_string()) if (req_json["connect_rdma"].is_string())
{ {
// Peer is trying to connect using RDMA, try to satisfy him // Peer is trying to connect using RDMA, try to satisfy him
bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value()); bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value(), req_json["rdma_max_sge"].uint64_value());
if (ok) if (ok)
{ {
wire_config["rdma_connected"] = true;
wire_config["rdma_address"] = msgr.clients.at(cur_op->peer_fd)->rdma_conn->addr.to_string(); wire_config["rdma_address"] = msgr.clients.at(cur_op->peer_fd)->rdma_conn->addr.to_string();
wire_config["rdma_max_sge"] = msgr.get_rdma_max_sge();
} }
} }
} }