Send bitmaps with primary-reads, actually read bitmaps for READ ops

rdma-zerocopy
Vitaliy Filippov 2021-01-13 00:19:04 +03:00
parent 6bf88883ac
commit 0aa2dd2890
7 changed files with 28 additions and 3 deletions

View File

@ -63,6 +63,11 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
st_cli.parse_config(config);
st_cli.load_global_config();
// Temporary implementation: discard all bitmaps
// It will be of course replaced by the implementation of snapshots
scrap_bitmap_size = 4096;
scrap_bitmap = malloc_or_die(scrap_bitmap_size);
if (ringloop)
{
consumer.loop = [this]()
@ -86,6 +91,7 @@ cluster_client_t::~cluster_client_t()
{
ringloop->unregister_consumer(&consumer);
}
free(scrap_bitmap);
}
void cluster_client_t::continue_ops(bool up_retry)
@ -681,6 +687,8 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
.offset = part->offset,
.len = part->len,
} },
.bitmap = scrap_bitmap,
.bitmap_len = scrap_bitmap_size,
.callback = [this, part](osd_op_t *op_part)
{
handle_op_part(part);

View File

@ -77,6 +77,9 @@ class cluster_client_t
std::set<osd_num_t> dirty_osds;
uint64_t dirty_bytes = 0, dirty_ops = 0;
void *scrap_bitmap = NULL;
unsigned scrap_bitmap_size = 0;
bool pgs_loaded = false;
ring_consumer_t consumer;
std::vector<std::function<void(void)>> on_ready_hooks;

View File

@ -162,6 +162,7 @@ struct osd_op_t
blockstore_op_t *bs_op = NULL;
void *buf = NULL;
void *bitmap = NULL;
unsigned bitmap_len = 0;
unsigned bmp_data = 0;
void *rmw_buf = NULL;
osd_primary_op_data_t* op_data = NULL;

View File

@ -278,7 +278,9 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
{
// Read data. In this case we assume that the buffer is preallocated by the caller (!)
assert(op->iov.count > 0);
if (op->reply.hdr.retval != (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->req.sec_rw.len : op->req.rw.len))
unsigned bmp_len = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->reply.sec_rw.attr_len : op->reply.rw.bitmap_len);
if (op->reply.hdr.retval != (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->req.sec_rw.len : op->req.rw.len) ||
bmp_len > op->bitmap_len)
{
// Check reply length to not overflow the buffer
printf("Client %d read reply of different length\n", cl->peer_fd);
@ -286,11 +288,15 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
stop_client(cl->peer_fd);
return false;
}
if (bmp_len > 0)
{
cl->recv_list.push_back(op->bitmap, bmp_len);
}
cl->recv_list.append(op->iov);
delete cl->read_op;
cl->read_op = op;
cl->read_state = CL_READ_REPLY_DATA;
cl->read_remaining = op->reply.hdr.retval;
cl->read_remaining = op->reply.hdr.retval + (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->reply.sec_rw.attr_len : op->reply.rw.bitmap_len);
}
else if (op->reply.hdr.opcode == OSD_OP_SEC_LIST && op->reply.hdr.retval > 0)
{

View File

@ -73,6 +73,7 @@ struct __attribute__((__packed__)) osd_op_secondary_rw_t
uint32_t len;
// bitmap/attribute length - bitmap comes after header, but before data
uint32_t attr_len;
uint32_t pad0;
};
struct __attribute__((__packed__)) osd_reply_secondary_rw_t
@ -82,6 +83,7 @@ struct __attribute__((__packed__)) osd_reply_secondary_rw_t
uint64_t version;
// for reads: bitmap/attribute length (just to double-check)
uint32_t attr_len;
uint32_t pad0;
};
// delete object on the secondary OSD
@ -158,7 +160,6 @@ struct __attribute__((__packed__)) osd_reply_secondary_list_t
};
// read or write to the primary OSD (must be within individual stripe)
// FIXME: allow to return used block bitmap (required for snapshots)
struct __attribute__((__packed__)) osd_op_rw_t
{
osd_op_header_t header;
@ -173,6 +174,9 @@ struct __attribute__((__packed__)) osd_op_rw_t
struct __attribute__((__packed__)) osd_reply_rw_t
{
osd_reply_header_t header;
// for reads: bitmap length
uint32_t bitmap_len;
uint32_t pad0;
};
// sync to the primary OSD

View File

@ -179,6 +179,8 @@ resume_2:
}
else
{
cur_op->reply.rw.bitmap_len = op_data->pg_data_size * entry_attr_size;
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
cur_op->iov.push_back(cur_op->buf, cur_op->req.rw.len);
}
finish_op(cur_op, cur_op->req.rw.len);

View File

@ -161,6 +161,7 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
subops[i].op_type = OSD_OP_OUT;
subops[i].peer_fd = c_cli.osd_peer_fds.at(role_osd_num);
subops[i].bitmap = stripes[stripe_num].bmp_buf;
subops[i].bitmap_len = entry_attr_size;
subops[i].req.sec_rw = {
.header = {
.magic = SECONDARY_OSD_OP_MAGIC,