Add "external" bitmap support to blockstore
parent
95c29b9dc3
commit
6107a4d07b
|
@ -64,6 +64,10 @@ Input:
|
||||||
- offset, len = offset and length within object. length may be zero, in that case
|
- offset, len = offset and length within object. length may be zero, in that case
|
||||||
read operation only returns the version / write operation only bumps the version
|
read operation only returns the version / write operation only bumps the version
|
||||||
- buf = pre-allocated buffer for data (read) / with data (write). may be NULL if len == 0.
|
- buf = pre-allocated buffer for data (read) / with data (write). may be NULL if len == 0.
|
||||||
|
- bitmap = <entry_attr_size> bytes long arbitrary data stored for each object in the metadata area.
|
||||||
|
when <entry_attr_size> fits into pointer size, it should be passed as this field's value.
|
||||||
|
when it doesn't fit, this field should be a pointer to that piece of data.
|
||||||
|
named "bitmap" because it's used for the "external bitmap" in Vitastor.
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
- retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC)
|
- retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC)
|
||||||
|
@ -141,6 +145,7 @@ struct blockstore_op_t
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
void *buf;
|
void *buf;
|
||||||
|
void *bitmap;
|
||||||
int retval;
|
int retval;
|
||||||
|
|
||||||
uint8_t private_data[BS_OP_PRIVATE_DATA_SIZE];
|
uint8_t private_data[BS_OP_PRIVATE_DATA_SIZE];
|
||||||
|
|
|
@ -428,7 +428,7 @@ resume_1:
|
||||||
{
|
{
|
||||||
new_clean_bitmap = (bs->inmemory_meta
|
new_clean_bitmap = (bs->inmemory_meta
|
||||||
? meta_new.buf + meta_new.pos*bs->clean_entry_size + sizeof(clean_disk_entry)
|
? meta_new.buf + meta_new.pos*bs->clean_entry_size + sizeof(clean_disk_entry)
|
||||||
: bs->clean_bitmap + (clean_loc >> bs->block_order)*bs->clean_entry_bitmap_size);
|
: bs->clean_bitmap + (clean_loc >> bs->block_order)*(bs->clean_entry_bitmap_size + bs->entry_attr_size));
|
||||||
if (clean_init_bitmap)
|
if (clean_init_bitmap)
|
||||||
{
|
{
|
||||||
memset(new_clean_bitmap, 0, bs->clean_entry_bitmap_size);
|
memset(new_clean_bitmap, 0, bs->clean_entry_bitmap_size);
|
||||||
|
@ -473,6 +473,7 @@ resume_1:
|
||||||
wait_state = 5;
|
wait_state = 5;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// zero out old metadata entry
|
||||||
memset(meta_old.buf + meta_old.pos*bs->clean_entry_size, 0, bs->clean_entry_size);
|
memset(meta_old.buf + meta_old.pos*bs->clean_entry_size, 0, bs->clean_entry_size);
|
||||||
await_sqe(15);
|
await_sqe(15);
|
||||||
data->iov = (struct iovec){ meta_old.buf, bs->meta_block_size };
|
data->iov = (struct iovec){ meta_old.buf, bs->meta_block_size };
|
||||||
|
@ -509,6 +510,12 @@ resume_1:
|
||||||
{
|
{
|
||||||
memcpy(&new_entry->bitmap, new_clean_bitmap, bs->clean_entry_bitmap_size);
|
memcpy(&new_entry->bitmap, new_clean_bitmap, bs->clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
|
if (bs->entry_attr_size)
|
||||||
|
{
|
||||||
|
// copy latest external bitmap/attributes
|
||||||
|
void *bmp_ptr = bs->entry_attr_size > sizeof(void*) ? dirty_end->second.bitmap : &dirty_end->second.bitmap;
|
||||||
|
memcpy((void*)(new_entry+1) + bs->clean_entry_bitmap_size, bmp_ptr, bs->entry_attr_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
await_sqe(6);
|
await_sqe(6);
|
||||||
data->iov = (struct iovec){ meta_new.buf, bs->meta_block_size };
|
data->iov = (struct iovec){ meta_new.buf, bs->meta_block_size };
|
||||||
|
|
|
@ -77,7 +77,8 @@
|
||||||
|
|
||||||
#include "blockstore_journal.h"
|
#include "blockstore_journal.h"
|
||||||
|
|
||||||
// 24 bytes + block bitmap per "clean" entry on disk with fixed metadata tables
|
// 32 bytes = 24 bytes + block bitmap (4 bytes by default) + external attributes (also bitmap, 4 bytes by default)
|
||||||
|
// per "clean" entry on disk with fixed metadata tables
|
||||||
// FIXME: maybe add crc32's to metadata
|
// FIXME: maybe add crc32's to metadata
|
||||||
struct __attribute__((__packed__)) clean_disk_entry
|
struct __attribute__((__packed__)) clean_disk_entry
|
||||||
{
|
{
|
||||||
|
@ -93,7 +94,7 @@ struct __attribute__((__packed__)) clean_entry
|
||||||
uint64_t location;
|
uint64_t location;
|
||||||
};
|
};
|
||||||
|
|
||||||
// 56 = 24 + 32 bytes per dirty entry in memory (obj_ver_id => dirty_entry)
|
// 64 = 24 + 40 bytes per dirty entry in memory (obj_ver_id => dirty_entry)
|
||||||
struct __attribute__((__packed__)) dirty_entry
|
struct __attribute__((__packed__)) dirty_entry
|
||||||
{
|
{
|
||||||
uint32_t state;
|
uint32_t state;
|
||||||
|
@ -102,6 +103,7 @@ struct __attribute__((__packed__)) dirty_entry
|
||||||
uint32_t offset; // data offset within object (stripe)
|
uint32_t offset; // data offset within object (stripe)
|
||||||
uint32_t len; // data length
|
uint32_t len; // data length
|
||||||
uint64_t journal_sector; // journal sector used for this entry
|
uint64_t journal_sector; // journal sector used for this entry
|
||||||
|
void* bitmap; // either external bitmap itself when it fits, or a pointer to it when it doesn't
|
||||||
};
|
};
|
||||||
|
|
||||||
// - Sync must be submitted after previous writes/deletes (not before!)
|
// - Sync must be submitted after previous writes/deletes (not before!)
|
||||||
|
@ -216,7 +218,7 @@ class blockstore_impl_t
|
||||||
|
|
||||||
uint32_t block_order;
|
uint32_t block_order;
|
||||||
uint64_t block_count;
|
uint64_t block_count;
|
||||||
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0;
|
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0, entry_attr_size = 0;
|
||||||
|
|
||||||
int meta_fd;
|
int meta_fd;
|
||||||
int data_fd;
|
int data_fd;
|
||||||
|
@ -250,6 +252,7 @@ class blockstore_impl_t
|
||||||
void open_data();
|
void open_data();
|
||||||
void open_meta();
|
void open_meta();
|
||||||
void open_journal();
|
void open_journal();
|
||||||
|
uint8_t* get_clean_entry_bitmap(uint64_t block_loc, int offset);
|
||||||
|
|
||||||
// Asynchronous init
|
// Asynchronous init
|
||||||
int initialized;
|
int initialized;
|
||||||
|
|
|
@ -98,9 +98,9 @@ void blockstore_init_meta::handle_entries(void* entries, unsigned count, int blo
|
||||||
for (unsigned i = 0; i < count; i++)
|
for (unsigned i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
clean_disk_entry *entry = (clean_disk_entry*)(entries + i*bs->clean_entry_size);
|
clean_disk_entry *entry = (clean_disk_entry*)(entries + i*bs->clean_entry_size);
|
||||||
if (!bs->inmemory_meta && bs->clean_entry_bitmap_size)
|
if (!bs->inmemory_meta && (bs->clean_entry_bitmap_size || bs->entry_attr_size))
|
||||||
{
|
{
|
||||||
memcpy(bs->clean_bitmap + (done_cnt+i)*bs->clean_entry_bitmap_size, &entry->bitmap, bs->clean_entry_bitmap_size);
|
memcpy(bs->clean_bitmap + (done_cnt+i)*(bs->clean_entry_bitmap_size + bs->entry_attr_size), &entry->bitmap, (bs->clean_entry_bitmap_size + bs->entry_attr_size));
|
||||||
}
|
}
|
||||||
if (entry->oid.inode > 0)
|
if (entry->oid.inode > 0)
|
||||||
{
|
{
|
||||||
|
@ -545,6 +545,21 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
.oid = je->small_write.oid,
|
.oid = je->small_write.oid,
|
||||||
.version = je->small_write.version,
|
.version = je->small_write.version,
|
||||||
};
|
};
|
||||||
|
void *bmp = (void*)je + sizeof(journal_entry_small_write);
|
||||||
|
if (bs->entry_attr_size <= sizeof(void*))
|
||||||
|
{
|
||||||
|
memcpy(&bmp, bmp, bs->entry_attr_size);
|
||||||
|
}
|
||||||
|
else if (!bs->journal.inmemory)
|
||||||
|
{
|
||||||
|
// FIXME Using large blockstore objects and not keeping journal in memory
|
||||||
|
// will result in a lot of small allocations for entry bitmaps. This can
|
||||||
|
// only be fixed by using a patched map with dynamic entry size, but not
|
||||||
|
// the btree_map, because it doesn't keep iterators valid all the time.
|
||||||
|
void *bmp_cp = malloc_or_die(bs->entry_attr_size);
|
||||||
|
memcpy(bmp_cp, bmp, bs->entry_attr_size);
|
||||||
|
bmp = bmp_cp;
|
||||||
|
}
|
||||||
bs->dirty_db.emplace(ov, (dirty_entry){
|
bs->dirty_db.emplace(ov, (dirty_entry){
|
||||||
.state = (BS_ST_SMALL_WRITE | BS_ST_SYNCED),
|
.state = (BS_ST_SMALL_WRITE | BS_ST_SYNCED),
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
|
@ -552,6 +567,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
.offset = je->small_write.offset,
|
.offset = je->small_write.offset,
|
||||||
.len = je->small_write.len,
|
.len = je->small_write.len,
|
||||||
.journal_sector = proc_pos,
|
.journal_sector = proc_pos,
|
||||||
|
.bitmap = bmp,
|
||||||
});
|
});
|
||||||
bs->journal.used_sectors[proc_pos]++;
|
bs->journal.used_sectors[proc_pos]++;
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
@ -609,6 +625,21 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
.oid = je->big_write.oid,
|
.oid = je->big_write.oid,
|
||||||
.version = je->big_write.version,
|
.version = je->big_write.version,
|
||||||
};
|
};
|
||||||
|
void *bmp = (void*)je + sizeof(journal_entry_big_write);
|
||||||
|
if (bs->entry_attr_size <= sizeof(void*))
|
||||||
|
{
|
||||||
|
memcpy(&bmp, bmp, bs->entry_attr_size);
|
||||||
|
}
|
||||||
|
else if (!bs->journal.inmemory)
|
||||||
|
{
|
||||||
|
// FIXME Using large blockstore objects and not keeping journal in memory
|
||||||
|
// will result in a lot of small allocations for entry bitmaps. This can
|
||||||
|
// only be fixed by using a patched map with dynamic entry size, but not
|
||||||
|
// the btree_map, because it doesn't keep iterators valid all the time.
|
||||||
|
void *bmp_cp = malloc_or_die(bs->entry_attr_size);
|
||||||
|
memcpy(bmp_cp, bmp, bs->entry_attr_size);
|
||||||
|
bmp = bmp_cp;
|
||||||
|
}
|
||||||
auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){
|
auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){
|
||||||
.state = (BS_ST_BIG_WRITE | BS_ST_SYNCED),
|
.state = (BS_ST_BIG_WRITE | BS_ST_SYNCED),
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
|
@ -616,6 +647,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
||||||
.offset = je->big_write.offset,
|
.offset = je->big_write.offset,
|
||||||
.len = je->big_write.len,
|
.len = je->big_write.len,
|
||||||
.journal_sector = proc_pos,
|
.journal_sector = proc_pos,
|
||||||
|
.bitmap = bmp,
|
||||||
}).first;
|
}).first;
|
||||||
if (bs->data_alloc->get(je->big_write.location >> bs->block_order))
|
if (bs->data_alloc->get(je->big_write.location >> bs->block_order))
|
||||||
{
|
{
|
||||||
|
|
|
@ -54,6 +54,9 @@ struct __attribute__((__packed__)) journal_entry_small_write
|
||||||
// data_offset is its offset within journal
|
// data_offset is its offset within journal
|
||||||
uint64_t data_offset;
|
uint64_t data_offset;
|
||||||
uint32_t crc32_data;
|
uint32_t crc32_data;
|
||||||
|
// small_write and big_write entries are followed by the "external" bitmap
|
||||||
|
// its size is dynamic and included in journal entry's <size> field
|
||||||
|
uint8_t bitmap[];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __attribute__((__packed__)) journal_entry_big_write
|
struct __attribute__((__packed__)) journal_entry_big_write
|
||||||
|
@ -68,6 +71,9 @@ struct __attribute__((__packed__)) journal_entry_big_write
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
uint64_t location;
|
uint64_t location;
|
||||||
|
// small_write and big_write entries are followed by the "external" bitmap
|
||||||
|
// its size is dynamic and included in journal entry's <size> field
|
||||||
|
uint8_t bitmap[];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __attribute__((__packed__)) journal_entry_stable
|
struct __attribute__((__packed__)) journal_entry_stable
|
||||||
|
|
|
@ -62,6 +62,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
cfg_data_size = strtoull(config["data_size"].c_str(), NULL, 10);
|
cfg_data_size = strtoull(config["data_size"].c_str(), NULL, 10);
|
||||||
meta_device = config["meta_device"];
|
meta_device = config["meta_device"];
|
||||||
meta_offset = strtoull(config["meta_offset"].c_str(), NULL, 10);
|
meta_offset = strtoull(config["meta_offset"].c_str(), NULL, 10);
|
||||||
|
entry_attr_size = strtoull(config["entry_attr_size"].c_str(), NULL, 10);
|
||||||
block_size = strtoull(config["block_size"].c_str(), NULL, 10);
|
block_size = strtoull(config["block_size"].c_str(), NULL, 10);
|
||||||
inmemory_meta = config["inmemory_metadata"] != "false";
|
inmemory_meta = config["inmemory_metadata"] != "false";
|
||||||
journal_device = config["journal_device"];
|
journal_device = config["journal_device"];
|
||||||
|
@ -106,7 +107,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
}
|
}
|
||||||
else if (disk_alignment % MEM_ALIGNMENT)
|
else if (disk_alignment % MEM_ALIGNMENT)
|
||||||
{
|
{
|
||||||
throw std::runtime_error("disk_alingment must be a multiple of "+std::to_string(MEM_ALIGNMENT));
|
throw std::runtime_error("disk_alignment must be a multiple of "+std::to_string(MEM_ALIGNMENT));
|
||||||
}
|
}
|
||||||
if (!journal_block_size)
|
if (!journal_block_size)
|
||||||
{
|
{
|
||||||
|
@ -182,7 +183,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
}
|
}
|
||||||
// init some fields
|
// init some fields
|
||||||
clean_entry_bitmap_size = block_size / bitmap_granularity / 8;
|
clean_entry_bitmap_size = block_size / bitmap_granularity / 8;
|
||||||
clean_entry_size = sizeof(clean_disk_entry) + clean_entry_bitmap_size;
|
clean_entry_size = sizeof(clean_disk_entry) + clean_entry_bitmap_size + entry_attr_size;
|
||||||
journal.block_size = journal_block_size;
|
journal.block_size = journal_block_size;
|
||||||
journal.next_free = journal_block_size;
|
journal.next_free = journal_block_size;
|
||||||
journal.used_start = journal_block_size;
|
journal.used_start = journal_block_size;
|
||||||
|
@ -247,9 +248,9 @@ void blockstore_impl_t::calc_lengths()
|
||||||
if (!metadata_buffer)
|
if (!metadata_buffer)
|
||||||
throw std::runtime_error("Failed to allocate memory for the metadata");
|
throw std::runtime_error("Failed to allocate memory for the metadata");
|
||||||
}
|
}
|
||||||
else if (clean_entry_bitmap_size)
|
else if (clean_entry_bitmap_size || entry_attr_size)
|
||||||
{
|
{
|
||||||
clean_bitmap = (uint8_t*)malloc(block_count * clean_entry_bitmap_size);
|
clean_bitmap = (uint8_t*)malloc(block_count * (clean_entry_bitmap_size + entry_attr_size));
|
||||||
if (!clean_bitmap)
|
if (!clean_bitmap)
|
||||||
throw std::runtime_error("Failed to allocate memory for the metadata sparse write bitmap");
|
throw std::runtime_error("Failed to allocate memory for the metadata sparse write bitmap");
|
||||||
}
|
}
|
||||||
|
|
|
@ -94,6 +94,21 @@ endwhile:
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint8_t* blockstore_impl_t::get_clean_entry_bitmap(uint64_t block_loc, int offset)
|
||||||
|
{
|
||||||
|
uint8_t *clean_entry_bitmap;
|
||||||
|
uint64_t meta_loc = block_loc >> block_order;
|
||||||
|
if (inmemory_meta)
|
||||||
|
{
|
||||||
|
uint64_t sector = (meta_loc / (meta_block_size / clean_entry_size)) * meta_block_size;
|
||||||
|
uint64_t pos = (meta_loc % (meta_block_size / clean_entry_size));
|
||||||
|
clean_entry_bitmap = (uint8_t*)(metadata_buffer + sector + pos*clean_entry_size + sizeof(clean_disk_entry) + offset);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
clean_entry_bitmap = (uint8_t*)(clean_bitmap + meta_loc*(clean_entry_bitmap_size + entry_attr_size) + offset);
|
||||||
|
return clean_entry_bitmap;
|
||||||
|
}
|
||||||
|
|
||||||
int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
{
|
{
|
||||||
auto clean_it = clean_db.find(read_op->oid);
|
auto clean_it = clean_db.find(read_op->oid);
|
||||||
|
@ -134,6 +149,10 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
if (!result_version)
|
if (!result_version)
|
||||||
{
|
{
|
||||||
result_version = dirty_it->first.version;
|
result_version = dirty_it->first.version;
|
||||||
|
if (entry_attr_size <= sizeof(void*))
|
||||||
|
read_op->bitmap = dirty_it->second.bitmap;
|
||||||
|
else if (read_op->bitmap)
|
||||||
|
memcpy(read_op->bitmap, dirty_it->second.bitmap, entry_attr_size);
|
||||||
}
|
}
|
||||||
if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
|
if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
|
||||||
dirty.state, dirty_it->first.version, dirty.location + (IS_JOURNAL(dirty.state) ? 0 : dirty.offset)))
|
dirty.state, dirty_it->first.version, dirty.location + (IS_JOURNAL(dirty.state) ? 0 : dirty.offset)))
|
||||||
|
@ -155,6 +174,11 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
if (!result_version)
|
if (!result_version)
|
||||||
{
|
{
|
||||||
result_version = clean_it->second.version;
|
result_version = clean_it->second.version;
|
||||||
|
void *clean_entry_bitmap = get_clean_entry_bitmap(clean_it->second.location, clean_entry_bitmap_size);
|
||||||
|
if (entry_attr_size <= sizeof(void*))
|
||||||
|
memcpy(&read_op->bitmap, clean_entry_bitmap, entry_attr_size);
|
||||||
|
else if (read_op->bitmap)
|
||||||
|
memcpy(read_op->bitmap, clean_entry_bitmap, entry_attr_size);
|
||||||
}
|
}
|
||||||
if (fulfilled < read_op->len)
|
if (fulfilled < read_op->len)
|
||||||
{
|
{
|
||||||
|
@ -169,18 +193,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uint64_t meta_loc = clean_it->second.location >> block_order;
|
uint8_t *clean_entry_bitmap = get_clean_entry_bitmap(clean_it->second.location, 0);
|
||||||
uint8_t *clean_entry_bitmap;
|
|
||||||
if (inmemory_meta)
|
|
||||||
{
|
|
||||||
uint64_t sector = (meta_loc / (meta_block_size / clean_entry_size)) * meta_block_size;
|
|
||||||
uint64_t pos = (meta_loc % (meta_block_size / clean_entry_size));
|
|
||||||
clean_entry_bitmap = (uint8_t*)(metadata_buffer + sector + pos*clean_entry_size + sizeof(clean_disk_entry));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
clean_entry_bitmap = (uint8_t*)(clean_bitmap + meta_loc*clean_entry_bitmap_size);
|
|
||||||
}
|
|
||||||
uint64_t bmp_start = 0, bmp_end = 0, bmp_size = block_size/bitmap_granularity;
|
uint64_t bmp_start = 0, bmp_end = 0, bmp_size = block_size/bitmap_granularity;
|
||||||
while (bmp_start < bmp_size)
|
while (bmp_start < bmp_size)
|
||||||
{
|
{
|
||||||
|
|
|
@ -268,6 +268,11 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
||||||
{
|
{
|
||||||
journal.used_sectors.erase(dirty_it->second.journal_sector);
|
journal.used_sectors.erase(dirty_it->second.journal_sector);
|
||||||
}
|
}
|
||||||
|
if (entry_attr_size > sizeof(void*))
|
||||||
|
{
|
||||||
|
free(dirty_it->second.bitmap);
|
||||||
|
dirty_it->second.bitmap = NULL;
|
||||||
|
}
|
||||||
if (dirty_it == dirty_start)
|
if (dirty_it == dirty_start)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -95,6 +95,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
#endif
|
#endif
|
||||||
// FIXME No strict need to add it into dirty_db here, it's just left
|
// FIXME No strict need to add it into dirty_db here, it's just left
|
||||||
// from the previous implementation where reads waited for writes
|
// from the previous implementation where reads waited for writes
|
||||||
|
void *bmp = NULL;
|
||||||
uint32_t state;
|
uint32_t state;
|
||||||
if (is_del)
|
if (is_del)
|
||||||
state = BS_ST_DELETE | BS_ST_IN_FLIGHT;
|
state = BS_ST_DELETE | BS_ST_IN_FLIGHT;
|
||||||
|
@ -109,6 +110,14 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
state |= BS_ST_IN_FLIGHT;
|
state |= BS_ST_IN_FLIGHT;
|
||||||
if (op->opcode == BS_OP_WRITE_STABLE)
|
if (op->opcode == BS_OP_WRITE_STABLE)
|
||||||
state |= BS_ST_INSTANT;
|
state |= BS_ST_INSTANT;
|
||||||
|
if (entry_attr_size > sizeof(void*))
|
||||||
|
{
|
||||||
|
bmp = calloc_or_die(1, entry_attr_size);
|
||||||
|
if (op->bitmap)
|
||||||
|
memcpy(bmp, op->bitmap, entry_attr_size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
bmp = op->bitmap;
|
||||||
}
|
}
|
||||||
dirty_db.emplace((obj_ver_id){
|
dirty_db.emplace((obj_ver_id){
|
||||||
.oid = op->oid,
|
.oid = op->oid,
|
||||||
|
@ -120,6 +129,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
.offset = is_del ? 0 : op->offset,
|
.offset = is_del ? 0 : op->offset,
|
||||||
.len = is_del ? 0 : op->len,
|
.len = is_del ? 0 : op->len,
|
||||||
.journal_sector = 0,
|
.journal_sector = 0,
|
||||||
|
.bitmap = bmp,
|
||||||
});
|
});
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -128,6 +138,8 @@ void blockstore_impl_t::cancel_all_writes(blockstore_op_t *op, blockstore_dirty_
|
||||||
{
|
{
|
||||||
while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
|
while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
|
||||||
{
|
{
|
||||||
|
if (entry_attr_size > sizeof(void*))
|
||||||
|
free(dirty_it->second.bitmap);
|
||||||
dirty_db.erase(dirty_it++);
|
dirty_db.erase(dirty_it++);
|
||||||
}
|
}
|
||||||
bool found = false;
|
bool found = false;
|
||||||
|
@ -305,7 +317,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
// Then pre-fill journal entry
|
// Then pre-fill journal entry
|
||||||
journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
|
journal_entry_small_write *je = (journal_entry_small_write*)prefill_single_journal_entry(
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
|
||||||
sizeof(journal_entry_small_write)
|
sizeof(journal_entry_small_write) + entry_attr_size
|
||||||
);
|
);
|
||||||
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
|
@ -324,6 +336,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
je->len = op->len;
|
je->len = op->len;
|
||||||
je->data_offset = journal.next_free;
|
je->data_offset = journal.next_free;
|
||||||
je->crc32_data = crc32c(0, op->buf, op->len);
|
je->crc32_data = crc32c(0, op->buf, op->len);
|
||||||
|
memcpy((void*)(je+1), (entry_attr_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), entry_attr_size);
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
if (immediate_commit != IMMEDIATE_NONE)
|
if (immediate_commit != IMMEDIATE_NONE)
|
||||||
|
@ -396,7 +409,7 @@ resume_2:
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe);
|
BS_SUBMIT_GET_SQE_DECL(sqe);
|
||||||
je = (journal_entry_big_write*)prefill_single_journal_entry(
|
je = (journal_entry_big_write*)prefill_single_journal_entry(
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
||||||
sizeof(journal_entry_big_write)
|
sizeof(journal_entry_big_write) + entry_attr_size
|
||||||
);
|
);
|
||||||
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
|
||||||
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
|
||||||
|
@ -412,6 +425,7 @@ resume_2:
|
||||||
je->offset = op->offset;
|
je->offset = op->offset;
|
||||||
je->len = op->len;
|
je->len = op->len;
|
||||||
je->location = dirty_it->second.location;
|
je->location = dirty_it->second.location;
|
||||||
|
memcpy((void*)(je+1), (entry_attr_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), entry_attr_size);
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe,
|
prepare_journal_sector_write(journal, journal.cur_sector, sqe,
|
||||||
|
|
|
@ -215,7 +215,7 @@ int* get_jerasure_decoding_matrix(osd_rmw_stripe_t *stripes, int pg_size, int pg
|
||||||
auto dec_it = matrix->decodings.find((reed_sol_erased_t){ .data = erased, .size = pg_size });
|
auto dec_it = matrix->decodings.find((reed_sol_erased_t){ .data = erased, .size = pg_size });
|
||||||
if (dec_it == matrix->decodings.end())
|
if (dec_it == matrix->decodings.end())
|
||||||
{
|
{
|
||||||
int *dm_ids = (int*)malloc(sizeof(int)*(pg_minsize + pg_minsize*pg_minsize + pg_size));
|
int *dm_ids = (int*)malloc_or_die(sizeof(int)*(pg_minsize + pg_minsize*pg_minsize + pg_size));
|
||||||
int *decoding_matrix = dm_ids + pg_minsize;
|
int *decoding_matrix = dm_ids + pg_minsize;
|
||||||
if (!dm_ids)
|
if (!dm_ids)
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
|
|
Loading…
Reference in New Issue