Inline bitmaps

Handy for zero-copy RDMA tests (removes 4-byte s/g entries)
rdma-zerocopy
Vitaliy Filippov 2021-04-25 19:49:36 +03:00
parent ce777319c3
commit 8faf8f7b58
6 changed files with 71 additions and 29 deletions

View File

@ -207,20 +207,26 @@ void osd_messenger_t::handle_op_hdr(osd_client_t *cl)
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
{
if (cur_op->req.sec_rw.attr_len > 0)
if (cur_op->req.sec_rw.bitmap_len > 0)
{
if (cur_op->req.sec_rw.attr_len > sizeof(unsigned))
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(cur_op->req.sec_rw.attr_len);
if (cur_op->req.sec_rw.bitmap_len > sizeof(void*))
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(cur_op->req.sec_rw.bitmap_len);
else
cur_op->bitmap = &cur_op->bmp_data;
cl->recv_list.push_back(cur_op->bitmap, cur_op->req.sec_rw.attr_len);
if (cur_op->req.sec_rw.bitmap_len <= 8)
memcpy(cur_op->bitmap, &cur_op->req.sec_rw.bitmap, cur_op->req.sec_rw.bitmap_len);
else
{
cl->recv_list.push_back(cur_op->bitmap, cur_op->req.sec_rw.bitmap_len);
cl->read_remaining += cur_op->req.sec_rw.bitmap_len;
}
}
if (cur_op->req.sec_rw.len > 0)
{
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
cl->recv_list.push_back(cur_op->buf, cur_op->req.sec_rw.len);
cl->read_remaining += cur_op->req.sec_rw.len;
}
cl->read_remaining = cur_op->req.sec_rw.len + cur_op->req.sec_rw.attr_len;
}
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ||
cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)
@ -295,7 +301,7 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
if (op->reply.hdr.opcode == OSD_OP_SEC_READ || op->reply.hdr.opcode == OSD_OP_READ)
{
// Read data. In this case we assume that the buffer is preallocated by the caller (!)
unsigned bmp_len = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->reply.sec_rw.attr_len : op->reply.rw.bitmap_len);
unsigned bmp_len = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->reply.sec_rw.bitmap_len : op->reply.rw.bitmap_len);
unsigned expected_size = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->req.sec_rw.len : op->req.rw.len);
if (op->reply.hdr.retval >= 0 && (op->reply.hdr.retval != expected_size || bmp_len > op->bitmap_len))
{
@ -309,14 +315,24 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
if (op->reply.hdr.retval >= 0 && bmp_len > 0)
{
assert(op->bitmap);
if (bmp_len <= 8)
{
memcpy(op->bitmap, (op->reply.hdr.opcode == OSD_OP_SEC_READ
? &op->reply.sec_rw.bitmap
: &op->reply.rw.bitmap), bmp_len);
}
else
{
cl->recv_list.push_back(op->bitmap, bmp_len);
cl->read_remaining += bmp_len;
}
}
if (op->reply.hdr.retval > 0)
{
assert(op->iov.count > 0);
cl->recv_list.append(op->iov);
cl->read_remaining += op->reply.hdr.retval;
}
cl->read_remaining = op->reply.hdr.retval + bmp_len;
if (cl->read_remaining == 0)
{
goto reuse;

View File

@ -50,17 +50,30 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op)
// Bitmap
if (cur_op->op_type == OSD_OP_IN &&
cur_op->req.hdr.opcode == OSD_OP_SEC_READ &&
cur_op->reply.sec_rw.attr_len > 0)
cur_op->reply.sec_rw.bitmap_len > 0)
{
if (cur_op->reply.sec_rw.bitmap_len <= 8)
{
memcpy(&cur_op->reply.sec_rw.bitmap, cur_op->bitmap, cur_op->reply.sec_rw.bitmap_len);
}
else
{
to_send_list.push_back((iovec){
.iov_base = cur_op->bitmap,
.iov_len = cur_op->reply.sec_rw.attr_len,
.iov_len = cur_op->reply.sec_rw.bitmap_len,
});
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
}
}
else if (cur_op->op_type == OSD_OP_OUT &&
(cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) &&
cur_op->req.sec_rw.attr_len > 0)
cur_op->req.sec_rw.bitmap_len > 0)
{
if (cur_op->req.sec_rw.bitmap_len <= 8)
{
memcpy(&cur_op->req.sec_rw.bitmap, cur_op->bitmap, cur_op->req.sec_rw.bitmap_len);
}
else
{
to_send_list.push_back((iovec){
.iov_base = cur_op->bitmap,
@ -68,6 +81,7 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op)
});
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
}
}
// Operation data
if ((cur_op->op_type == OSD_OP_IN
? (cur_op->req.hdr.opcode == OSD_OP_READ ||

View File

@ -35,7 +35,7 @@
#define MEM_ALIGNMENT 512
#endif
#define OSD_RW_MAX 64*1024*1024
#define OSD_PROTOCOL_VERSION 1
#define OSD_PROTOCOL_VERSION 2
// common request and reply headers
struct __attribute__((__packed__)) osd_op_header_t
@ -74,8 +74,10 @@ struct __attribute__((__packed__)) osd_op_sec_rw_t
// length
uint32_t len;
// bitmap/attribute length - bitmap comes after header, but before data
uint32_t attr_len;
uint32_t bitmap_len;
uint32_t pad0;
// inline bitmap (when it's no longer than 8 bytes)
uint64_t bitmap;
};
struct __attribute__((__packed__)) osd_reply_sec_rw_t
@ -84,8 +86,10 @@ struct __attribute__((__packed__)) osd_reply_sec_rw_t
// for reads and writes: assigned or read version number
uint64_t version;
// for reads: bitmap/attribute length (just to double-check)
uint32_t attr_len;
uint32_t bitmap_len;
uint32_t pad0;
// inline bitmap (when it's no longer than 8 bytes)
uint64_t bitmap;
};
// delete object on the secondary OSD
@ -199,6 +203,8 @@ struct __attribute__((__packed__)) osd_reply_rw_t
// for reads: bitmap length
uint32_t bitmap_len;
uint32_t pad0;
// inline bitmap (when it's no longer than 8 bytes)
uint64_t bitmap;
};
// sync to the primary OSD

View File

@ -235,6 +235,9 @@ resume_2:
{
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
}
if (cur_op->reply.rw.bitmap_len <= 8)
memcpy(&cur_op->reply.rw.bitmap, op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
else
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
for (int role = 0; role < op_data->pg_size; role++)
{
@ -250,6 +253,9 @@ resume_2:
}
else
{
if (cur_op->reply.rw.bitmap_len <= 8)
memcpy(&cur_op->reply.rw.bitmap, op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
else
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
cur_op->iov.push_back(cur_op->buf, cur_op->req.rw.len);
}

View File

@ -200,7 +200,7 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o
.version = op_version,
.offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start,
.len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start,
.attr_len = wr ? clean_entry_bitmap_size : 0,
.bitmap_len = wr ? clean_entry_bitmap_size : 0,
};
#ifdef OSD_DEBUG
printf(

View File

@ -20,9 +20,9 @@ void osd_t::secondary_op_callback(osd_op_t *op)
if (op->req.hdr.opcode == OSD_OP_SEC_READ)
{
if (op->bs_op->retval >= 0)
op->reply.sec_rw.attr_len = clean_entry_bitmap_size;
op->reply.sec_rw.bitmap_len = clean_entry_bitmap_size;
else
op->reply.sec_rw.attr_len = 0;
op->reply.sec_rw.bitmap_len = 0;
if (op->bs_op->retval > 0)
op->iov.push_back(op->buf, op->bs_op->retval);
}
@ -81,7 +81,7 @@ void osd_t::exec_secondary(osd_op_t *cur_op)
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
{
// Allocate memory for the read operation
if (clean_entry_bitmap_size > sizeof(unsigned))
if (clean_entry_bitmap_size > sizeof(void*))
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(clean_entry_bitmap_size);
else
cur_op->bitmap = &cur_op->bmp_data;