From 860ac24762e93269fff5719bc19ac90ab0a2c90b Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Mon, 11 Jan 2021 02:01:42 +0300 Subject: [PATCH] Add "external" bitmap support to the secondary OSD protocol --- src/blockstore.h | 1 + src/blockstore_open.cpp | 2 +- src/cluster_client.h | 2 -- src/messenger.h | 1 + src/msgr_op.h | 1 + src/msgr_receive.cpp | 24 ++++++++++++++++++++---- src/msgr_send.cpp | 12 ++++++++++++ src/osd.cpp | 14 ++++++++++---- src/osd.h | 2 +- src/osd_ops.h | 4 ++++ src/osd_secondary.cpp | 22 +++++++++++++++++++--- 11 files changed, 70 insertions(+), 15 deletions(-) diff --git a/src/blockstore.h b/src/blockstore.h index 6d5f0f5f3..b99f6f0ed 100644 --- a/src/blockstore.h +++ b/src/blockstore.h @@ -27,6 +27,7 @@ #define DEFAULT_ORDER 17 #define MIN_BLOCK_SIZE 4*1024 #define MAX_BLOCK_SIZE 128*1024*1024 +#define DEFAULT_BITMAP_GRANULARITY 4096 #define BS_OP_MIN 1 #define BS_OP_READ 1 diff --git a/src/blockstore_open.cpp b/src/blockstore_open.cpp index eaebeaf98..8531c13c3 100644 --- a/src/blockstore_open.cpp +++ b/src/blockstore_open.cpp @@ -131,7 +131,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config) } if (!bitmap_granularity) { - bitmap_granularity = 4096; + bitmap_granularity = DEFAULT_BITMAP_GRANULARITY; } else if (bitmap_granularity % disk_alignment) { diff --git a/src/cluster_client.h b/src/cluster_client.h index 83fdf323d..fc00b3631 100644 --- a/src/cluster_client.h +++ b/src/cluster_client.h @@ -8,8 +8,6 @@ #define MIN_BLOCK_SIZE 4*1024 #define MAX_BLOCK_SIZE 128*1024*1024 -#define DEFAULT_DISK_ALIGNMENT 4096 -#define DEFAULT_BITMAP_GRANULARITY 4096 #define DEFAULT_CLIENT_MAX_DIRTY_BYTES 32*1024*1024 #define DEFAULT_CLIENT_MAX_DIRTY_OPS 1024 diff --git a/src/messenger.h b/src/messenger.h index 119eb34bd..34466d4fd 100644 --- a/src/messenger.h +++ b/src/messenger.h @@ -31,6 +31,7 @@ #define DEFAULT_PEER_CONNECT_INTERVAL 5 #define DEFAULT_PEER_CONNECT_TIMEOUT 5 #define DEFAULT_OSD_PING_TIMEOUT 5 +#define DEFAULT_BITMAP_GRANULARITY 4096 struct osd_client_t { diff --git a/src/msgr_op.h b/src/msgr_op.h index be16909a8..966122976 100644 --- a/src/msgr_op.h +++ b/src/msgr_op.h @@ -161,6 +161,7 @@ struct osd_op_t osd_any_reply_t reply; blockstore_op_t *bs_op = NULL; void *buf = NULL; + void *bitmap = NULL; void *rmw_buf = NULL; osd_primary_op_data_t* op_data = NULL; std::function callback; diff --git a/src/msgr_receive.cpp b/src/msgr_receive.cpp index 121c9a90a..20fd085a6 100644 --- a/src/msgr_receive.cpp +++ b/src/msgr_receive.cpp @@ -202,22 +202,36 @@ void osd_messenger_t::handle_op_hdr(osd_client_t *cl) osd_op_t *cur_op = cl->read_op; if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ) { - if (cur_op->req.sec_rw.len > 0) - cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len); cl->read_remaining = 0; } else if (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) { + if (cur_op->req.sec_rw.attr_len > 0) + { + if (cur_op->req.sec_rw.attr_len > sizeof(void*)) + { + cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(cur_op->req.sec_rw.attr_len); + cl->recv_list.push_back(cur_op->bitmap, cur_op->req.sec_rw.attr_len); + } + else + cl->recv_list.push_back(&cur_op->bitmap, cur_op->req.sec_rw.attr_len); + } if (cur_op->req.sec_rw.len > 0) + { cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len); - cl->read_remaining = cur_op->req.sec_rw.len; + cl->recv_list.push_back(cur_op->buf, cur_op->req.sec_rw.len); + } + cl->read_remaining = cur_op->req.sec_rw.len + cur_op->req.sec_rw.attr_len; } else if (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE || cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK) { if (cur_op->req.sec_stab.len > 0) + { cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_stab.len); + cl->recv_list.push_back(cur_op->buf, cur_op->req.sec_stab.len); + } cl->read_remaining = cur_op->req.sec_stab.len; } else if (cur_op->req.hdr.opcode == OSD_OP_READ) @@ -227,13 +241,15 @@ void osd_messenger_t::handle_op_hdr(osd_client_t *cl) else if (cur_op->req.hdr.opcode == OSD_OP_WRITE) { if (cur_op->req.rw.len > 0) + { cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.rw.len); + cl->recv_list.push_back(cur_op->buf, cur_op->req.rw.len); + } cl->read_remaining = cur_op->req.rw.len; } if (cl->read_remaining > 0) { // Read data - cl->recv_list.push_back(cur_op->buf, cl->read_remaining); cl->read_state = CL_READ_DATA; } else diff --git a/src/msgr_send.cpp b/src/msgr_send.cpp index 0fae89697..c3cdefdf6 100644 --- a/src/msgr_send.cpp +++ b/src/msgr_send.cpp @@ -59,6 +59,18 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op) cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE || cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)) && cur_op->iov.count > 0) { + to_outbox.push_back(NULL); + // Bitmap + if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ && cur_op->reply.sec_rw.attr_len > 0 || + (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) && + cur_op->req.sec_rw.attr_len > 0) + { + to_send_list.push_back((iovec){ + .iov_base = (cur_op->reply.sec_rw.attr_len > sizeof(void*) ? cur_op->bitmap : &cur_op->bitmap), + .iov_len = cur_op->reply.sec_rw.attr_len, + }); + to_outbox.push_back(NULL); + } for (int i = 0; i < cur_op->iov.count; i++) { assert(cur_op->iov.buf[i].iov_base); diff --git a/src/osd.cpp b/src/osd.cpp index 14ecafb2c..601aa4347 100644 --- a/src/osd.cpp +++ b/src/osd.cpp @@ -12,7 +12,16 @@ osd_t::osd_t(blockstore_config_t & config, ring_loop_t *ringloop) { - config["entry_attr_size"] = "0"; + bs_block_size = strtoull(config["block_size"].c_str(), NULL, 10); + bs_bitmap_granularity = strtoull(config["bitmap_granularity"].c_str(), NULL, 10); + if (!bs_block_size) + bs_block_size = DEFAULT_BLOCK_SIZE; + if (!bs_bitmap_granularity) + bs_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY; + + // Force external bitmap size + entry_attr_size = bs_block_size / bs_bitmap_granularity / 8; + config["entry_attr_size"] = entry_attr_size; this->config = config; this->ringloop = ringloop; @@ -20,9 +29,6 @@ osd_t::osd_t(blockstore_config_t & config, ring_loop_t *ringloop) // FIXME: Create Blockstore from on-disk superblock config and check it against the OSD cluster config this->bs = new blockstore_t(config, ringloop); - this->bs_block_size = bs->get_block_size(); - this->bs_bitmap_granularity = bs->get_bitmap_granularity(); - parse_config(config); epmgr = new epoll_manager_t(ringloop); diff --git a/src/osd.h b/src/osd.h index 720b9cdc3..bef90f595 100644 --- a/src/osd.h +++ b/src/osd.h @@ -115,7 +115,7 @@ class osd_t bool stopping = false; int inflight_ops = 0; blockstore_t *bs; - uint32_t bs_block_size, bs_bitmap_granularity; + uint32_t bs_block_size, bs_bitmap_granularity, entry_attr_size; ring_loop_t *ringloop; timerfd_manager_t *tfd = NULL; epoll_manager_t *epmgr = NULL; diff --git a/src/osd_ops.h b/src/osd_ops.h index f429ecdf0..fece585f1 100644 --- a/src/osd_ops.h +++ b/src/osd_ops.h @@ -71,6 +71,8 @@ struct __attribute__((__packed__)) osd_op_secondary_rw_t uint32_t offset; // length uint32_t len; + // bitmap/attribute length - bitmap comes after header, but before data + uint32_t attr_len; }; struct __attribute__((__packed__)) osd_reply_secondary_rw_t @@ -78,6 +80,8 @@ struct __attribute__((__packed__)) osd_reply_secondary_rw_t osd_reply_header_t header; // for reads and writes: assigned or read version number uint64_t version; + // for reads: bitmap/attribute length (just to double-check) + uint32_t attr_len; }; // delete object on the secondary OSD diff --git a/src/osd_secondary.cpp b/src/osd_secondary.cpp index b370dc391..e25850db4 100644 --- a/src/osd_secondary.cpp +++ b/src/osd_secondary.cpp @@ -17,10 +17,17 @@ void osd_t::secondary_op_callback(osd_op_t *op) { op->reply.sec_del.version = op->bs_op->version; } - if (op->req.hdr.opcode == OSD_OP_SEC_READ && - op->bs_op->retval > 0) + if (op->req.hdr.opcode == OSD_OP_SEC_READ) { - op->iov.push_back(op->buf, op->bs_op->retval); + if (entry_attr_size > 0) + { + op->reply.sec_rw.attr_len = entry_attr_size; + op->iov.push_back((entry_attr_size > sizeof(void*) ? op->bitmap : &op->bs_op->bitmap), entry_attr_size); + } + if (op->bs_op->retval > 0) + { + op->iov.push_back(op->buf, op->bs_op->retval); + } } else if (op->req.hdr.opcode == OSD_OP_SEC_LIST) { @@ -55,11 +62,20 @@ void osd_t::exec_secondary(osd_op_t *cur_op) cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) { + if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ) + { + // Allocate memory for the read operation + if (entry_attr_size > sizeof(void*)) + cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(entry_attr_size); + if (cur_op->req.sec_rw.len > 0) + cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len); + } cur_op->bs_op->oid = cur_op->req.sec_rw.oid; cur_op->bs_op->version = cur_op->req.sec_rw.version; cur_op->bs_op->offset = cur_op->req.sec_rw.offset; cur_op->bs_op->len = cur_op->req.sec_rw.len; cur_op->bs_op->buf = cur_op->buf; + cur_op->bs_op->bitmap = cur_op->bitmap; #ifdef OSD_STUB cur_op->bs_op->retval = cur_op->bs_op->len; #endif