Inline bitmaps

Handy for zero-copy RDMA tests (removes 4-byte s/g entries)
rdma-zerocopy
Vitaliy Filippov 2021-04-25 19:49:36 +03:00
parent ce777319c3
commit 8faf8f7b58
6 changed files with 71 additions and 29 deletions

View File

@ -207,20 +207,26 @@ void osd_messenger_t::handle_op_hdr(osd_client_t *cl)
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || else if (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
{ {
if (cur_op->req.sec_rw.attr_len > 0) if (cur_op->req.sec_rw.bitmap_len > 0)
{ {
if (cur_op->req.sec_rw.attr_len > sizeof(unsigned)) if (cur_op->req.sec_rw.bitmap_len > sizeof(void*))
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(cur_op->req.sec_rw.attr_len); cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(cur_op->req.sec_rw.bitmap_len);
else else
cur_op->bitmap = &cur_op->bmp_data; cur_op->bitmap = &cur_op->bmp_data;
cl->recv_list.push_back(cur_op->bitmap, cur_op->req.sec_rw.attr_len); if (cur_op->req.sec_rw.bitmap_len <= 8)
memcpy(cur_op->bitmap, &cur_op->req.sec_rw.bitmap, cur_op->req.sec_rw.bitmap_len);
else
{
cl->recv_list.push_back(cur_op->bitmap, cur_op->req.sec_rw.bitmap_len);
cl->read_remaining += cur_op->req.sec_rw.bitmap_len;
}
} }
if (cur_op->req.sec_rw.len > 0) if (cur_op->req.sec_rw.len > 0)
{ {
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len); cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
cl->recv_list.push_back(cur_op->buf, cur_op->req.sec_rw.len); cl->recv_list.push_back(cur_op->buf, cur_op->req.sec_rw.len);
cl->read_remaining += cur_op->req.sec_rw.len;
} }
cl->read_remaining = cur_op->req.sec_rw.len + cur_op->req.sec_rw.attr_len;
} }
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE || else if (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ||
cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK) cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)
@ -295,7 +301,7 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
if (op->reply.hdr.opcode == OSD_OP_SEC_READ || op->reply.hdr.opcode == OSD_OP_READ) if (op->reply.hdr.opcode == OSD_OP_SEC_READ || op->reply.hdr.opcode == OSD_OP_READ)
{ {
// Read data. In this case we assume that the buffer is preallocated by the caller (!) // Read data. In this case we assume that the buffer is preallocated by the caller (!)
unsigned bmp_len = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->reply.sec_rw.attr_len : op->reply.rw.bitmap_len); unsigned bmp_len = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->reply.sec_rw.bitmap_len : op->reply.rw.bitmap_len);
unsigned expected_size = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->req.sec_rw.len : op->req.rw.len); unsigned expected_size = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->req.sec_rw.len : op->req.rw.len);
if (op->reply.hdr.retval >= 0 && (op->reply.hdr.retval != expected_size || bmp_len > op->bitmap_len)) if (op->reply.hdr.retval >= 0 && (op->reply.hdr.retval != expected_size || bmp_len > op->bitmap_len))
{ {
@ -309,14 +315,24 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
if (op->reply.hdr.retval >= 0 && bmp_len > 0) if (op->reply.hdr.retval >= 0 && bmp_len > 0)
{ {
assert(op->bitmap); assert(op->bitmap);
cl->recv_list.push_back(op->bitmap, bmp_len); if (bmp_len <= 8)
{
memcpy(op->bitmap, (op->reply.hdr.opcode == OSD_OP_SEC_READ
? &op->reply.sec_rw.bitmap
: &op->reply.rw.bitmap), bmp_len);
}
else
{
cl->recv_list.push_back(op->bitmap, bmp_len);
cl->read_remaining += bmp_len;
}
} }
if (op->reply.hdr.retval > 0) if (op->reply.hdr.retval > 0)
{ {
assert(op->iov.count > 0); assert(op->iov.count > 0);
cl->recv_list.append(op->iov); cl->recv_list.append(op->iov);
cl->read_remaining += op->reply.hdr.retval;
} }
cl->read_remaining = op->reply.hdr.retval + bmp_len;
if (cl->read_remaining == 0) if (cl->read_remaining == 0)
{ {
goto reuse; goto reuse;

View File

@ -50,23 +50,37 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op)
// Bitmap // Bitmap
if (cur_op->op_type == OSD_OP_IN && if (cur_op->op_type == OSD_OP_IN &&
cur_op->req.hdr.opcode == OSD_OP_SEC_READ && cur_op->req.hdr.opcode == OSD_OP_SEC_READ &&
cur_op->reply.sec_rw.attr_len > 0) cur_op->reply.sec_rw.bitmap_len > 0)
{ {
to_send_list.push_back((iovec){ if (cur_op->reply.sec_rw.bitmap_len <= 8)
.iov_base = cur_op->bitmap, {
.iov_len = cur_op->reply.sec_rw.attr_len, memcpy(&cur_op->reply.sec_rw.bitmap, cur_op->bitmap, cur_op->reply.sec_rw.bitmap_len);
}); }
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 }); else
{
to_send_list.push_back((iovec){
.iov_base = cur_op->bitmap,
.iov_len = cur_op->reply.sec_rw.bitmap_len,
});
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
}
} }
else if (cur_op->op_type == OSD_OP_OUT && else if (cur_op->op_type == OSD_OP_OUT &&
(cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) && (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) &&
cur_op->req.sec_rw.attr_len > 0) cur_op->req.sec_rw.bitmap_len > 0)
{ {
to_send_list.push_back((iovec){ if (cur_op->req.sec_rw.bitmap_len <= 8)
.iov_base = cur_op->bitmap, {
.iov_len = cur_op->req.sec_rw.attr_len, memcpy(&cur_op->req.sec_rw.bitmap, cur_op->bitmap, cur_op->req.sec_rw.bitmap_len);
}); }
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 }); else
{
to_send_list.push_back((iovec){
.iov_base = cur_op->bitmap,
.iov_len = cur_op->req.sec_rw.attr_len,
});
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
}
} }
// Operation data // Operation data
if ((cur_op->op_type == OSD_OP_IN if ((cur_op->op_type == OSD_OP_IN

View File

@ -35,7 +35,7 @@
#define MEM_ALIGNMENT 512 #define MEM_ALIGNMENT 512
#endif #endif
#define OSD_RW_MAX 64*1024*1024 #define OSD_RW_MAX 64*1024*1024
#define OSD_PROTOCOL_VERSION 1 #define OSD_PROTOCOL_VERSION 2
// common request and reply headers // common request and reply headers
struct __attribute__((__packed__)) osd_op_header_t struct __attribute__((__packed__)) osd_op_header_t
@ -74,8 +74,10 @@ struct __attribute__((__packed__)) osd_op_sec_rw_t
// length // length
uint32_t len; uint32_t len;
// bitmap/attribute length - bitmap comes after header, but before data // bitmap/attribute length - bitmap comes after header, but before data
uint32_t attr_len; uint32_t bitmap_len;
uint32_t pad0; uint32_t pad0;
// inline bitmap (when it's no longer than 8 bytes)
uint64_t bitmap;
}; };
struct __attribute__((__packed__)) osd_reply_sec_rw_t struct __attribute__((__packed__)) osd_reply_sec_rw_t
@ -84,8 +86,10 @@ struct __attribute__((__packed__)) osd_reply_sec_rw_t
// for reads and writes: assigned or read version number // for reads and writes: assigned or read version number
uint64_t version; uint64_t version;
// for reads: bitmap/attribute length (just to double-check) // for reads: bitmap/attribute length (just to double-check)
uint32_t attr_len; uint32_t bitmap_len;
uint32_t pad0; uint32_t pad0;
// inline bitmap (when it's no longer than 8 bytes)
uint64_t bitmap;
}; };
// delete object on the secondary OSD // delete object on the secondary OSD
@ -199,6 +203,8 @@ struct __attribute__((__packed__)) osd_reply_rw_t
// for reads: bitmap length // for reads: bitmap length
uint32_t bitmap_len; uint32_t bitmap_len;
uint32_t pad0; uint32_t pad0;
// inline bitmap (when it's no longer than 8 bytes)
uint64_t bitmap;
}; };
// sync to the primary OSD // sync to the primary OSD

View File

@ -235,7 +235,10 @@ resume_2:
{ {
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size); reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
} }
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len); if (cur_op->reply.rw.bitmap_len <= 8)
memcpy(&cur_op->reply.rw.bitmap, op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
else
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
for (int role = 0; role < op_data->pg_size; role++) for (int role = 0; role < op_data->pg_size; role++)
{ {
if (stripes[role].req_end != 0) if (stripes[role].req_end != 0)
@ -250,7 +253,10 @@ resume_2:
} }
else else
{ {
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len); if (cur_op->reply.rw.bitmap_len <= 8)
memcpy(&cur_op->reply.rw.bitmap, op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
else
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
cur_op->iov.push_back(cur_op->buf, cur_op->req.rw.len); cur_op->iov.push_back(cur_op->buf, cur_op->req.rw.len);
} }
finish_op(cur_op, cur_op->req.rw.len); finish_op(cur_op, cur_op->req.rw.len);

View File

@ -200,7 +200,7 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o
.version = op_version, .version = op_version,
.offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start, .offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start,
.len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start, .len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start,
.attr_len = wr ? clean_entry_bitmap_size : 0, .bitmap_len = wr ? clean_entry_bitmap_size : 0,
}; };
#ifdef OSD_DEBUG #ifdef OSD_DEBUG
printf( printf(

View File

@ -20,9 +20,9 @@ void osd_t::secondary_op_callback(osd_op_t *op)
if (op->req.hdr.opcode == OSD_OP_SEC_READ) if (op->req.hdr.opcode == OSD_OP_SEC_READ)
{ {
if (op->bs_op->retval >= 0) if (op->bs_op->retval >= 0)
op->reply.sec_rw.attr_len = clean_entry_bitmap_size; op->reply.sec_rw.bitmap_len = clean_entry_bitmap_size;
else else
op->reply.sec_rw.attr_len = 0; op->reply.sec_rw.bitmap_len = 0;
if (op->bs_op->retval > 0) if (op->bs_op->retval > 0)
op->iov.push_back(op->buf, op->bs_op->retval); op->iov.push_back(op->buf, op->bs_op->retval);
} }
@ -81,7 +81,7 @@ void osd_t::exec_secondary(osd_op_t *cur_op)
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ) if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
{ {
// Allocate memory for the read operation // Allocate memory for the read operation
if (clean_entry_bitmap_size > sizeof(unsigned)) if (clean_entry_bitmap_size > sizeof(void*))
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(clean_entry_bitmap_size); cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(clean_entry_bitmap_size);
else else
cur_op->bitmap = &cur_op->bmp_data; cur_op->bitmap = &cur_op->bmp_data;