Negotiate max_sge between RDMA client & server
parent
ce777319c3
commit
9e6cbc6ebc
|
@ -54,6 +54,7 @@ struct sec_options
|
|||
int cluster_log = 0;
|
||||
int trace = 0;
|
||||
int use_rdma = 0;
|
||||
char *rdma_device = NULL;
|
||||
int rdma_port_num = 0;
|
||||
int rdma_gid_index = 0;
|
||||
int rdma_mtu = 0;
|
||||
|
@ -127,7 +128,7 @@ static struct fio_option options[] = {
|
|||
},
|
||||
{
|
||||
.name = "use_rdma",
|
||||
.lname = "OSD trace",
|
||||
.lname = "Use RDMA",
|
||||
.type = FIO_OPT_BOOL,
|
||||
.off1 = offsetof(struct sec_options, use_rdma),
|
||||
.help = "Use RDMA",
|
||||
|
@ -135,6 +136,45 @@ static struct fio_option options[] = {
|
|||
.category = FIO_OPT_C_ENGINE,
|
||||
.group = FIO_OPT_G_FILENAME,
|
||||
},
|
||||
{
|
||||
.name = "rdma_device",
|
||||
.lname = "RDMA device name",
|
||||
.type = FIO_OPT_STR_STORE,
|
||||
.off1 = offsetof(struct sec_options, rdma_device),
|
||||
.help = "RDMA device name",
|
||||
.category = FIO_OPT_C_ENGINE,
|
||||
.group = FIO_OPT_G_FILENAME,
|
||||
},
|
||||
{
|
||||
.name = "rdma_port_num",
|
||||
.lname = "RDMA port number",
|
||||
.type = FIO_OPT_INT,
|
||||
.off1 = offsetof(struct sec_options, rdma_port_num),
|
||||
.help = "RDMA port number",
|
||||
.def = "0",
|
||||
.category = FIO_OPT_C_ENGINE,
|
||||
.group = FIO_OPT_G_FILENAME,
|
||||
},
|
||||
{
|
||||
.name = "rdma_gid_index",
|
||||
.lname = "RDMA gid index",
|
||||
.type = FIO_OPT_INT,
|
||||
.off1 = offsetof(struct sec_options, rdma_gid_index),
|
||||
.help = "RDMA gid index",
|
||||
.def = "0",
|
||||
.category = FIO_OPT_C_ENGINE,
|
||||
.group = FIO_OPT_G_FILENAME,
|
||||
},
|
||||
{
|
||||
.name = "rdma_mtu",
|
||||
.lname = "RDMA path MTU",
|
||||
.type = FIO_OPT_INT,
|
||||
.off1 = offsetof(struct sec_options, rdma_mtu),
|
||||
.help = "RDMA path MTU",
|
||||
.def = "0",
|
||||
.category = FIO_OPT_C_ENGINE,
|
||||
.group = FIO_OPT_G_FILENAME,
|
||||
},
|
||||
{
|
||||
.name = NULL,
|
||||
},
|
||||
|
@ -171,6 +211,10 @@ static int sec_setup(struct thread_data *td)
|
|||
{ "etcd_prefix", std::string(o->etcd_prefix ? o->etcd_prefix : "/vitastor") },
|
||||
{ "log_level", o->cluster_log },
|
||||
{ "use_rdma", o->use_rdma },
|
||||
{ "rdma_device", std::string(o->rdma_device ? o->rdma_device : "") },
|
||||
{ "rdma_port_num", o->rdma_port_num },
|
||||
{ "rdma_gid_index", o->rdma_gid_index },
|
||||
{ "rdma_mtu", o->rdma_mtu },
|
||||
};
|
||||
|
||||
if (!o->image)
|
||||
|
|
|
@ -25,6 +25,8 @@ void osd_messenger_t::init()
|
|||
}
|
||||
else
|
||||
{
|
||||
rdma_max_sge = rdma_max_sge < rdma_context->attrx.orig_attr.max_sge
|
||||
? rdma_max_sge : rdma_context->attrx.orig_attr.max_sge;
|
||||
printf("[OSD %lu] RDMA initialized successfully\n", osd_num);
|
||||
fcntl(rdma_context->channel->fd, F_SETFL, fcntl(rdma_context->channel->fd, F_GETFL, 0) | O_NONBLOCK);
|
||||
tfd->set_fd_handler(rdma_context->channel->fd, false, [this](int notify_fd, int epoll_events)
|
||||
|
@ -356,9 +358,6 @@ void osd_messenger_t::on_connect_peer(osd_num_t peer_osd, int peer_fd)
|
|||
|
||||
void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
||||
{
|
||||
#ifdef WITH_RDMA
|
||||
msgr_rdma_connection_t *rdma_conn = NULL;
|
||||
#endif
|
||||
osd_op_t *op = new osd_op_t();
|
||||
op->op_type = OSD_OP_OUT;
|
||||
op->peer_fd = cl->peer_fd;
|
||||
|
@ -374,11 +373,12 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
|||
#ifdef WITH_RDMA
|
||||
if (rdma_context)
|
||||
{
|
||||
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, max_rdma_send, max_rdma_recv, max_rdma_sge);
|
||||
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge);
|
||||
if (cl->rdma_conn)
|
||||
{
|
||||
json11::Json payload = json11::Json::object {
|
||||
{ "connect_rdma", cl->rdma_conn->addr.to_string() },
|
||||
{ "rdma_max_sge", rdma_max_sge },
|
||||
};
|
||||
std::string payload_str = payload.dump();
|
||||
op->req.show_conf.json_len = payload_str.size();
|
||||
|
@ -388,11 +388,7 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
|||
}
|
||||
}
|
||||
#endif
|
||||
op->callback = [this, cl
|
||||
#ifdef WITH_RDMA
|
||||
, rdma_conn
|
||||
#endif
|
||||
](osd_op_t *op)
|
||||
op->callback = [this, cl](osd_op_t *op)
|
||||
{
|
||||
std::string json_err;
|
||||
json11::Json config;
|
||||
|
@ -455,6 +451,11 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
|||
}
|
||||
else
|
||||
{
|
||||
uint64_t server_max_sge = config["rdma_max_sge"].uint64_value();
|
||||
if (cl->rdma_conn->max_sge > server_max_sge)
|
||||
{
|
||||
cl->rdma_conn->max_sge = server_max_sge;
|
||||
}
|
||||
printf("Connected to OSD %lu using RDMA\n", cl->osd_num);
|
||||
cl->peer_state = PEER_RDMA;
|
||||
tfd->set_fd_handler(cl->peer_fd, false, NULL);
|
||||
|
@ -509,3 +510,8 @@ bool osd_messenger_t::is_rdma_enabled()
|
|||
{
|
||||
return rdma_context != NULL;
|
||||
}
|
||||
|
||||
uint64_t osd_messenger_t::get_rdma_max_sge()
|
||||
{
|
||||
return rdma_max_sge;
|
||||
}
|
||||
|
|
|
@ -137,7 +137,7 @@ protected:
|
|||
std::string rdma_device;
|
||||
uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0;
|
||||
msgr_rdma_context_t *rdma_context = NULL;
|
||||
int max_rdma_sge = 128, max_rdma_send = 32, max_rdma_recv = 32;
|
||||
uint64_t rdma_max_sge = 128, rdma_max_send = 32, rdma_max_recv = 32;
|
||||
#endif
|
||||
|
||||
std::vector<int> read_ready_clients;
|
||||
|
@ -170,7 +170,8 @@ public:
|
|||
|
||||
#ifdef WITH_RDMA
|
||||
bool is_rdma_enabled();
|
||||
bool connect_rdma(int peer_fd, std::string rdma_address);
|
||||
bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge);
|
||||
uint64_t get_rdma_max_sge();
|
||||
#endif
|
||||
|
||||
protected:
|
||||
|
|
|
@ -293,13 +293,17 @@ int msgr_rdma_connection_t::connect(msgr_rdma_address_t *dest)
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address)
|
||||
bool osd_messenger_t::connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_sge)
|
||||
{
|
||||
// Try to connect to the peer using RDMA
|
||||
msgr_rdma_address_t addr;
|
||||
if (msgr_rdma_address_t::from_string(rdma_address.c_str(), &addr))
|
||||
{
|
||||
auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, max_rdma_send, max_rdma_recv, max_rdma_sge);
|
||||
if (client_max_sge > rdma_max_sge)
|
||||
{
|
||||
client_max_sge = rdma_max_sge;
|
||||
}
|
||||
auto rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge);
|
||||
if (rdma_conn)
|
||||
{
|
||||
int r = rdma_conn->connect(&addr);
|
||||
|
@ -352,7 +356,6 @@ bool osd_messenger_t::try_send_rdma(osd_client_t *cl)
|
|||
return true;
|
||||
}
|
||||
int op_size = 0, op_sge = 0, op_max = rc->max_sge*bs_bitmap_granularity;
|
||||
// FIXME: rc->max_sge should be negotiated between client & server
|
||||
ibv_sge sge[rc->max_sge];
|
||||
while (rc->send_pos < cl->send_list.size())
|
||||
{
|
||||
|
@ -448,7 +451,6 @@ bool osd_messenger_t::try_recv_rdma(osd_client_t *cl)
|
|||
}
|
||||
int op_size = 0, op_sge = 0, op_max = rc->max_sge*bs_bitmap_granularity;
|
||||
iovec *segments = cl->recv_list.get_iovec();
|
||||
// FIXME: rc->max_sge should be negotiated between client & server
|
||||
ibv_sge sge[rc->max_sge];
|
||||
while (rc->recv_pos < cl->recv_list.get_size())
|
||||
{
|
||||
|
|
|
@ -169,11 +169,11 @@ void osd_t::exec_show_config(osd_op_t *cur_op)
|
|||
if (req_json["connect_rdma"].is_string())
|
||||
{
|
||||
// Peer is trying to connect using RDMA, try to satisfy him
|
||||
bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value());
|
||||
bool ok = msgr.connect_rdma(cur_op->peer_fd, req_json["connect_rdma"].string_value(), req_json["rdma_max_sge"].uint64_value());
|
||||
if (ok)
|
||||
{
|
||||
wire_config["rdma_connected"] = true;
|
||||
wire_config["rdma_address"] = msgr.clients.at(cur_op->peer_fd)->rdma_conn->addr.to_string();
|
||||
wire_config["rdma_max_sge"] = msgr.get_rdma_max_sge();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue