Use clean_entry_bitmap_size instead of entry_attr_size back because of changed bitmap handling

rdma-zerocopy
Vitaliy Filippov 2021-02-07 16:26:08 +03:00
parent d0c2e31312
commit ab39ce2bbb
14 changed files with 54 additions and 64 deletions

View File

@ -65,9 +65,8 @@ Input:
- offset, len = offset and length within object. length may be zero, in that case - offset, len = offset and length within object. length may be zero, in that case
read operation only returns the version / write operation only bumps the version read operation only returns the version / write operation only bumps the version
- buf = pre-allocated buffer for data (read) / with data (write). may be NULL if len == 0. - buf = pre-allocated buffer for data (read) / with data (write). may be NULL if len == 0.
- bitmap = pointer to <entry_attr_size> bytes long (usually very short) arbitrary data - bitmap = pointer to the new 'external' object bitmap data. Its part which is respective to the
stored for each object in the metadata area. write request is copied into the metadata area bitwise and stored there.
Called "bitmap" because it's used for the "external bitmap" in Vitastor.
Output: Output:
- retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC) - retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC)

View File

@ -428,7 +428,7 @@ resume_1:
{ {
new_clean_bitmap = (bs->inmemory_meta new_clean_bitmap = (bs->inmemory_meta
? meta_new.buf + meta_new.pos*bs->clean_entry_size + sizeof(clean_disk_entry) ? meta_new.buf + meta_new.pos*bs->clean_entry_size + sizeof(clean_disk_entry)
: bs->clean_bitmap + (clean_loc >> bs->block_order)*(bs->clean_entry_bitmap_size + bs->entry_attr_size)); : bs->clean_bitmap + (clean_loc >> bs->block_order)*(2*bs->clean_entry_bitmap_size));
if (clean_init_bitmap) if (clean_init_bitmap)
{ {
memset(new_clean_bitmap, 0, bs->clean_entry_bitmap_size); memset(new_clean_bitmap, 0, bs->clean_entry_bitmap_size);
@ -510,11 +510,11 @@ resume_1:
{ {
memcpy(&new_entry->bitmap, new_clean_bitmap, bs->clean_entry_bitmap_size); memcpy(&new_entry->bitmap, new_clean_bitmap, bs->clean_entry_bitmap_size);
} }
if (bs->entry_attr_size) // copy latest external bitmap/attributes
if (bs->clean_entry_bitmap_size)
{ {
// copy latest external bitmap/attributes void *bmp_ptr = bs->clean_entry_bitmap_size > sizeof(void*) ? dirty_end->second.bitmap : &dirty_end->second.bitmap;
void *bmp_ptr = bs->entry_attr_size > sizeof(void*) ? dirty_end->second.bitmap : &dirty_end->second.bitmap; memcpy((void*)(new_entry+1) + bs->clean_entry_bitmap_size, bmp_ptr, bs->clean_entry_bitmap_size);
memcpy((void*)(new_entry+1) + bs->clean_entry_bitmap_size, bmp_ptr, bs->entry_attr_size);
} }
} }
await_sqe(6); await_sqe(6);

View File

@ -218,7 +218,7 @@ class blockstore_impl_t
uint32_t block_order; uint32_t block_order;
uint64_t block_count; uint64_t block_count;
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0, entry_attr_size = 0; uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0;
int meta_fd; int meta_fd;
int data_fd; int data_fd;

View File

@ -98,9 +98,9 @@ void blockstore_init_meta::handle_entries(void* entries, unsigned count, int blo
for (unsigned i = 0; i < count; i++) for (unsigned i = 0; i < count; i++)
{ {
clean_disk_entry *entry = (clean_disk_entry*)(entries + i*bs->clean_entry_size); clean_disk_entry *entry = (clean_disk_entry*)(entries + i*bs->clean_entry_size);
if (!bs->inmemory_meta && (bs->clean_entry_bitmap_size || bs->entry_attr_size)) if (!bs->inmemory_meta && bs->clean_entry_bitmap_size)
{ {
memcpy(bs->clean_bitmap + (done_cnt+i)*(bs->clean_entry_bitmap_size + bs->entry_attr_size), &entry->bitmap, (bs->clean_entry_bitmap_size + bs->entry_attr_size)); memcpy(bs->clean_bitmap + (done_cnt+i)*2*bs->clean_entry_bitmap_size, &entry->bitmap, 2*bs->clean_entry_bitmap_size);
} }
if (entry->oid.inode > 0) if (entry->oid.inode > 0)
{ {
@ -550,9 +550,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
.version = je->small_write.version, .version = je->small_write.version,
}; };
void *bmp = (void*)je + sizeof(journal_entry_small_write); void *bmp = (void*)je + sizeof(journal_entry_small_write);
if (bs->entry_attr_size <= sizeof(void*)) if (bs->clean_entry_bitmap_size <= sizeof(void*))
{ {
memcpy(&bmp, bmp, bs->entry_attr_size); memcpy(&bmp, bmp, bs->clean_entry_bitmap_size);
} }
else if (!bs->journal.inmemory) else if (!bs->journal.inmemory)
{ {
@ -560,8 +560,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
// will result in a lot of small allocations for entry bitmaps. This can // will result in a lot of small allocations for entry bitmaps. This can
// only be fixed by using a patched map with dynamic entry size, but not // only be fixed by using a patched map with dynamic entry size, but not
// the btree_map, because it doesn't keep iterators valid all the time. // the btree_map, because it doesn't keep iterators valid all the time.
void *bmp_cp = malloc_or_die(bs->entry_attr_size); void *bmp_cp = malloc_or_die(bs->clean_entry_bitmap_size);
memcpy(bmp_cp, bmp, bs->entry_attr_size); memcpy(bmp_cp, bmp, bs->clean_entry_bitmap_size);
bmp = bmp_cp; bmp = bmp_cp;
} }
bs->dirty_db.emplace(ov, (dirty_entry){ bs->dirty_db.emplace(ov, (dirty_entry){
@ -630,9 +630,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
.version = je->big_write.version, .version = je->big_write.version,
}; };
void *bmp = (void*)je + sizeof(journal_entry_big_write); void *bmp = (void*)je + sizeof(journal_entry_big_write);
if (bs->entry_attr_size <= sizeof(void*)) if (bs->clean_entry_bitmap_size <= sizeof(void*))
{ {
memcpy(&bmp, bmp, bs->entry_attr_size); memcpy(&bmp, bmp, bs->clean_entry_bitmap_size);
} }
else if (!bs->journal.inmemory) else if (!bs->journal.inmemory)
{ {
@ -640,8 +640,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
// will result in a lot of small allocations for entry bitmaps. This can // will result in a lot of small allocations for entry bitmaps. This can
// only be fixed by using a patched map with dynamic entry size, but not // only be fixed by using a patched map with dynamic entry size, but not
// the btree_map, because it doesn't keep iterators valid all the time. // the btree_map, because it doesn't keep iterators valid all the time.
void *bmp_cp = malloc_or_die(bs->entry_attr_size); void *bmp_cp = malloc_or_die(bs->clean_entry_bitmap_size);
memcpy(bmp_cp, bmp, bs->entry_attr_size); memcpy(bmp_cp, bmp, bs->clean_entry_bitmap_size);
bmp = bmp_cp; bmp = bmp_cp;
} }
auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){ auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){

View File

@ -62,7 +62,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
cfg_data_size = strtoull(config["data_size"].c_str(), NULL, 10); cfg_data_size = strtoull(config["data_size"].c_str(), NULL, 10);
meta_device = config["meta_device"]; meta_device = config["meta_device"];
meta_offset = strtoull(config["meta_offset"].c_str(), NULL, 10); meta_offset = strtoull(config["meta_offset"].c_str(), NULL, 10);
entry_attr_size = strtoull(config["entry_attr_size"].c_str(), NULL, 10);
block_size = strtoull(config["block_size"].c_str(), NULL, 10); block_size = strtoull(config["block_size"].c_str(), NULL, 10);
inmemory_meta = config["inmemory_metadata"] != "false"; inmemory_meta = config["inmemory_metadata"] != "false";
journal_device = config["journal_device"]; journal_device = config["journal_device"];
@ -153,11 +152,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
{ {
throw std::runtime_error("meta_offset must be a multiple of meta_block_size = "+std::to_string(meta_block_size)); throw std::runtime_error("meta_offset must be a multiple of meta_block_size = "+std::to_string(meta_block_size));
} }
// FIXME: Due to the recent changes in entry_attr handling rename it back to bitmap
if (entry_attr_size > meta_block_size/2)
{
throw std::runtime_error("entry_attr_size is too big");
}
if (journal.offset % journal_block_size) if (journal.offset % journal_block_size)
{ {
throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size)); throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
@ -188,7 +182,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
} }
// init some fields // init some fields
clean_entry_bitmap_size = block_size / bitmap_granularity / 8; clean_entry_bitmap_size = block_size / bitmap_granularity / 8;
clean_entry_size = sizeof(clean_disk_entry) + clean_entry_bitmap_size + entry_attr_size; clean_entry_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
journal.block_size = journal_block_size; journal.block_size = journal_block_size;
journal.next_free = journal_block_size; journal.next_free = journal_block_size;
journal.used_start = journal_block_size; journal.used_start = journal_block_size;
@ -253,9 +247,9 @@ void blockstore_impl_t::calc_lengths()
if (!metadata_buffer) if (!metadata_buffer)
throw std::runtime_error("Failed to allocate memory for the metadata"); throw std::runtime_error("Failed to allocate memory for the metadata");
} }
else if (clean_entry_bitmap_size || entry_attr_size) else if (clean_entry_bitmap_size)
{ {
clean_bitmap = (uint8_t*)malloc(block_count * (clean_entry_bitmap_size + entry_attr_size)); clean_bitmap = (uint8_t*)malloc(block_count * 2*clean_entry_bitmap_size);
if (!clean_bitmap) if (!clean_bitmap)
throw std::runtime_error("Failed to allocate memory for the metadata sparse write bitmap"); throw std::runtime_error("Failed to allocate memory for the metadata sparse write bitmap");
} }

View File

@ -105,7 +105,7 @@ uint8_t* blockstore_impl_t::get_clean_entry_bitmap(uint64_t block_loc, int offse
clean_entry_bitmap = (uint8_t*)(metadata_buffer + sector + pos*clean_entry_size + sizeof(clean_disk_entry) + offset); clean_entry_bitmap = (uint8_t*)(metadata_buffer + sector + pos*clean_entry_size + sizeof(clean_disk_entry) + offset);
} }
else else
clean_entry_bitmap = (uint8_t*)(clean_bitmap + meta_loc*(clean_entry_bitmap_size + entry_attr_size) + offset); clean_entry_bitmap = (uint8_t*)(clean_bitmap + meta_loc*2*clean_entry_bitmap_size + offset);
return clean_entry_bitmap; return clean_entry_bitmap;
} }
@ -151,8 +151,8 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
result_version = dirty_it->first.version; result_version = dirty_it->first.version;
if (read_op->bitmap) if (read_op->bitmap)
{ {
void *bmp_ptr = (entry_attr_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap); void *bmp_ptr = (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap);
memcpy(read_op->bitmap, bmp_ptr, entry_attr_size); memcpy(read_op->bitmap, bmp_ptr, clean_entry_bitmap_size);
} }
} }
if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len, if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
@ -178,7 +178,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
if (read_op->bitmap) if (read_op->bitmap)
{ {
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size); void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
memcpy(read_op->bitmap, bmp_ptr, entry_attr_size); memcpy(read_op->bitmap, bmp_ptr, clean_entry_bitmap_size);
} }
} }
if (fulfilled < read_op->len) if (fulfilled < read_op->len)

View File

@ -268,7 +268,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
{ {
journal.used_sectors.erase(dirty_it->second.journal_sector); journal.used_sectors.erase(dirty_it->second.journal_sector);
} }
if (entry_attr_size > sizeof(void*)) if (clean_entry_bitmap_size > sizeof(void*))
{ {
free(dirty_it->second.bitmap); free(dirty_it->second.bitmap);
dirty_it->second.bitmap = NULL; dirty_it->second.bitmap = NULL;

View File

@ -10,9 +10,9 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
bool wait_big = false, wait_del = false; bool wait_big = false, wait_del = false;
void *bmp = NULL; void *bmp = NULL;
uint64_t version = 1; uint64_t version = 1;
if (!is_del && entry_attr_size > sizeof(void*)) if (!is_del && clean_entry_bitmap_size > sizeof(void*))
{ {
bmp = calloc_or_die(1, entry_attr_size); bmp = calloc_or_die(1, clean_entry_bitmap_size);
} }
if (dirty_db.size() > 0) if (dirty_db.size() > 0)
{ {
@ -30,8 +30,8 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
wait_big = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE wait_big = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE
? !IS_SYNCED(dirty_it->second.state) ? !IS_SYNCED(dirty_it->second.state)
: ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_BIG); : ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_BIG);
if (entry_attr_size > sizeof(void*)) if (clean_entry_bitmap_size > sizeof(void*))
memcpy(bmp, dirty_it->second.bitmap, entry_attr_size); memcpy(bmp, dirty_it->second.bitmap, clean_entry_bitmap_size);
else else
bmp = dirty_it->second.bitmap; bmp = dirty_it->second.bitmap;
} }
@ -43,7 +43,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
{ {
version = clean_it->second.version + 1; version = clean_it->second.version + 1;
void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size); void *bmp_ptr = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
memcpy((entry_attr_size > sizeof(void*) ? bmp : &bmp), bmp_ptr, entry_attr_size); memcpy((clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp), bmp_ptr, clean_entry_bitmap_size);
} }
else else
{ {
@ -83,7 +83,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
{ {
// Invalid version requested // Invalid version requested
op->retval = -EEXIST; op->retval = -EEXIST;
if (!is_del && entry_attr_size > sizeof(void*)) if (!is_del && clean_entry_bitmap_size > sizeof(void*))
{ {
free(bmp); free(bmp);
} }
@ -127,7 +127,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
if (op->bitmap) if (op->bitmap)
{ {
// Only allow to overwrite part of the object bitmap respective to the write's offset/len // Only allow to overwrite part of the object bitmap respective to the write's offset/len
uint8_t *bmp_ptr = (uint8_t*)(entry_attr_size > sizeof(void*) ? bmp : &bmp); uint8_t *bmp_ptr = (uint8_t*)(clean_entry_bitmap_size > sizeof(void*) ? bmp : &bmp);
uint32_t bit = op->offset/bitmap_granularity; uint32_t bit = op->offset/bitmap_granularity;
uint32_t bits_left = op->len/bitmap_granularity; uint32_t bits_left = op->len/bitmap_granularity;
while (!(bit % 8) && bits_left > 8) while (!(bit % 8) && bits_left > 8)
@ -166,7 +166,7 @@ void blockstore_impl_t::cancel_all_writes(blockstore_op_t *op, blockstore_dirty_
{ {
while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid) while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
{ {
if (entry_attr_size > sizeof(void*)) if (clean_entry_bitmap_size > sizeof(void*))
free(dirty_it->second.bitmap); free(dirty_it->second.bitmap);
dirty_db.erase(dirty_it++); dirty_db.erase(dirty_it++);
} }
@ -345,7 +345,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
// Then pre-fill journal entry // Then pre-fill journal entry
journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry( journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE, journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
sizeof(journal_entry_small_write) + entry_attr_size sizeof(journal_entry_small_write) + clean_entry_bitmap_size
); );
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset; dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++; journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
@ -364,7 +364,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
je->len = op->len; je->len = op->len;
je->data_offset = journal.next_free; je->data_offset = journal.next_free;
je->crc32_data = crc32c(0, op->buf, op->len); je->crc32_data = crc32c(0, op->buf, op->len);
memcpy((void*)(je+1), (entry_attr_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), entry_attr_size); memcpy((void*)(je+1), (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), clean_entry_bitmap_size);
je->crc32 = je_crc32((journal_entry*)je); je->crc32 = je_crc32((journal_entry*)je);
journal.crc32_last = je->crc32; journal.crc32_last = je->crc32;
if (immediate_commit != IMMEDIATE_NONE) if (immediate_commit != IMMEDIATE_NONE)
@ -437,7 +437,7 @@ resume_2:
BS_SUBMIT_GET_SQE_DECL(sqe); BS_SUBMIT_GET_SQE_DECL(sqe);
je = (journal_entry_big_write*)prefill_single_journal_entry( je = (journal_entry_big_write*)prefill_single_journal_entry(
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE, journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
sizeof(journal_entry_big_write) + entry_attr_size sizeof(journal_entry_big_write) + clean_entry_bitmap_size
); );
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset; dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++; journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
@ -453,7 +453,7 @@ resume_2:
je->offset = op->offset; je->offset = op->offset;
je->len = op->len; je->len = op->len;
je->location = dirty_it->second.location; je->location = dirty_it->second.location;
memcpy((void*)(je+1), (entry_attr_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), entry_attr_size); memcpy((void*)(je+1), (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), clean_entry_bitmap_size);
je->crc32 = je_crc32((journal_entry*)je); je->crc32 = je_crc32((journal_entry*)je);
journal.crc32_last = je->crc32; journal.crc32_last = je->crc32;
prepare_journal_sector_write(journal, journal.cur_sector, sqe, prepare_journal_sector_write(journal, journal.cur_sector, sqe,

View File

@ -18,10 +18,7 @@ osd_t::osd_t(blockstore_config_t & config, ring_loop_t *ringloop)
bs_block_size = DEFAULT_BLOCK_SIZE; bs_block_size = DEFAULT_BLOCK_SIZE;
if (!bs_bitmap_granularity) if (!bs_bitmap_granularity)
bs_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY; bs_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
clean_entry_bitmap_size = bs_block_size / bs_bitmap_granularity / 8;
// Force external bitmap size
entry_attr_size = bs_block_size / bs_bitmap_granularity / 8;
config["entry_attr_size"] = std::to_string(entry_attr_size);
this->config = config; this->config = config;
this->ringloop = ringloop; this->ringloop = ringloop;

View File

@ -126,7 +126,7 @@ class osd_t
bool stopping = false; bool stopping = false;
int inflight_ops = 0; int inflight_ops = 0;
blockstore_t *bs; blockstore_t *bs;
uint32_t bs_block_size, bs_bitmap_granularity, entry_attr_size; uint32_t bs_block_size, bs_bitmap_granularity, clean_entry_bitmap_size;
ring_loop_t *ringloop; ring_loop_t *ringloop;
timerfd_manager_t *tfd = NULL; timerfd_manager_t *tfd = NULL;
epoll_manager_t *epmgr = NULL; epoll_manager_t *epmgr = NULL;

View File

@ -53,7 +53,7 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
} }
int stripe_count = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size); int stripe_count = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size);
osd_primary_op_data_t *op_data = (osd_primary_op_data_t*)calloc_or_die( osd_primary_op_data_t *op_data = (osd_primary_op_data_t*)calloc_or_die(
1, sizeof(osd_primary_op_data_t) + (entry_attr_size + sizeof(osd_rmw_stripe_t)) * stripe_count 1, sizeof(osd_primary_op_data_t) + (clean_entry_bitmap_size + sizeof(osd_rmw_stripe_t)) * stripe_count
); );
op_data->pg_num = pg_num; op_data->pg_num = pg_num;
op_data->oid = oid; op_data->oid = oid;
@ -65,7 +65,7 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
// Allocate bitmaps along with stripes to avoid extra allocations and fragmentation // Allocate bitmaps along with stripes to avoid extra allocations and fragmentation
for (int i = 0; i < stripe_count; i++) for (int i = 0; i < stripe_count; i++)
{ {
op_data->stripes[i].bmp_buf = (void*)(op_data->stripes+stripe_count) + entry_attr_size*i; op_data->stripes[i].bmp_buf = (void*)(op_data->stripes+stripe_count) + clean_entry_bitmap_size*i;
} }
pg_it->second.inflight++; pg_it->second.inflight++;
return true; return true;
@ -154,18 +154,18 @@ resume_2:
finish_op(cur_op, op_data->epipe > 0 ? -EPIPE : -EIO); finish_op(cur_op, op_data->epipe > 0 ? -EPIPE : -EIO);
return; return;
} }
cur_op->reply.rw.bitmap_len = op_data->pg_data_size * entry_attr_size; cur_op->reply.rw.bitmap_len = op_data->pg_data_size * clean_entry_bitmap_size;
if (op_data->degraded) if (op_data->degraded)
{ {
// Reconstruct missing stripes // Reconstruct missing stripes
osd_rmw_stripe_t *stripes = op_data->stripes; osd_rmw_stripe_t *stripes = op_data->stripes;
if (op_data->scheme == POOL_SCHEME_XOR) if (op_data->scheme == POOL_SCHEME_XOR)
{ {
reconstruct_stripes_xor(stripes, op_data->pg_size, entry_attr_size); reconstruct_stripes_xor(stripes, op_data->pg_size, clean_entry_bitmap_size);
} }
else if (op_data->scheme == POOL_SCHEME_JERASURE) else if (op_data->scheme == POOL_SCHEME_JERASURE)
{ {
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, entry_attr_size); reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
} }
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len); cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
for (int role = 0; role < op_data->pg_size; role++) for (int role = 0; role < op_data->pg_size; role++)

View File

@ -155,7 +155,7 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
clock_gettime(CLOCK_REALTIME, &subops[i].tv_begin); clock_gettime(CLOCK_REALTIME, &subops[i].tv_begin);
subops[i].op_type = (uint64_t)cur_op; subops[i].op_type = (uint64_t)cur_op;
subops[i].bitmap = stripes[stripe_num].bmp_buf; subops[i].bitmap = stripes[stripe_num].bmp_buf;
subops[i].bitmap_len = entry_attr_size; subops[i].bitmap_len = clean_entry_bitmap_size;
subops[i].bs_op = new blockstore_op_t({ subops[i].bs_op = new blockstore_op_t({
.opcode = (uint64_t)(wr ? (rep ? BS_OP_WRITE_STABLE : BS_OP_WRITE) : BS_OP_READ), .opcode = (uint64_t)(wr ? (rep ? BS_OP_WRITE_STABLE : BS_OP_WRITE) : BS_OP_READ),
.callback = [subop = &subops[i], this](blockstore_op_t *bs_subop) .callback = [subop = &subops[i], this](blockstore_op_t *bs_subop)
@ -186,7 +186,7 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
subops[i].op_type = OSD_OP_OUT; subops[i].op_type = OSD_OP_OUT;
subops[i].peer_fd = c_cli.osd_peer_fds.at(role_osd_num); subops[i].peer_fd = c_cli.osd_peer_fds.at(role_osd_num);
subops[i].bitmap = stripes[stripe_num].bmp_buf; subops[i].bitmap = stripes[stripe_num].bmp_buf;
subops[i].bitmap_len = entry_attr_size; subops[i].bitmap_len = clean_entry_bitmap_size;
subops[i].req.sec_rw = { subops[i].req.sec_rw = {
.header = { .header = {
.magic = SECONDARY_OSD_OP_MAGIC, .magic = SECONDARY_OSD_OP_MAGIC,
@ -200,7 +200,7 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
.version = op_version, .version = op_version,
.offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start, .offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start,
.len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start, .len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start,
.attr_len = wr ? entry_attr_size : 0, .attr_len = wr ? clean_entry_bitmap_size : 0,
}; };
#ifdef OSD_DEBUG #ifdef OSD_DEBUG
printf( printf(

View File

@ -78,7 +78,7 @@ resume_1:
else else
{ {
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set, cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, entry_attr_size); pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
if (!cur_op->rmw_buf) if (!cur_op->rmw_buf)
{ {
// Refuse partial overwrite of an incomplete object // Refuse partial overwrite of an incomplete object
@ -123,11 +123,11 @@ resume_3:
// Recover missing stripes, calculate parity // Recover missing stripes, calculate parity
if (pg.scheme == POOL_SCHEME_XOR) if (pg.scheme == POOL_SCHEME_XOR)
{ {
calc_rmw_parity_xor(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, entry_attr_size); calc_rmw_parity_xor(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
} }
else if (pg.scheme == POOL_SCHEME_JERASURE) else if (pg.scheme == POOL_SCHEME_JERASURE)
{ {
calc_rmw_parity_jerasure(op_data->stripes, pg.pg_size, op_data->pg_data_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, entry_attr_size); calc_rmw_parity_jerasure(op_data->stripes, pg.pg_size, op_data->pg_data_size, op_data->prev_set, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
} }
} }
// Send writes // Send writes

View File

@ -20,7 +20,7 @@ void osd_t::secondary_op_callback(osd_op_t *op)
if (op->req.hdr.opcode == OSD_OP_SEC_READ) if (op->req.hdr.opcode == OSD_OP_SEC_READ)
{ {
if (op->bs_op->retval >= 0) if (op->bs_op->retval >= 0)
op->reply.sec_rw.attr_len = entry_attr_size; op->reply.sec_rw.attr_len = clean_entry_bitmap_size;
else else
op->reply.sec_rw.attr_len = 0; op->reply.sec_rw.attr_len = 0;
if (op->bs_op->retval > 0) if (op->bs_op->retval > 0)
@ -62,8 +62,8 @@ void osd_t::exec_secondary(osd_op_t *cur_op)
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ) if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
{ {
// Allocate memory for the read operation // Allocate memory for the read operation
if (entry_attr_size > sizeof(unsigned)) if (clean_entry_bitmap_size > sizeof(unsigned))
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(entry_attr_size); cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(clean_entry_bitmap_size);
else else
cur_op->bitmap = &cur_op->bmp_data; cur_op->bitmap = &cur_op->bmp_data;
if (cur_op->req.sec_rw.len > 0) if (cur_op->req.sec_rw.len > 0)