Fix resizer

rm-left-on-dead
Vitaliy Filippov 2022-07-06 02:24:17 +03:00
parent b10656ca09
commit cf487c95aa
2 changed files with 65 additions and 25 deletions

View File

@ -10,6 +10,7 @@
#define JOURNAL_MAGIC 0x4A33
#define JOURNAL_VERSION 1
#define JOURNAL_BUFFER_SIZE 4*1024*1024
#define JOURNAL_ENTRY_HEADER_SIZE 16
// We reserve some extra space for future stabilize requests during writes
// FIXME: This value should be dynamic i.e. Blockstore ideally shouldn't allow

View File

@ -85,6 +85,8 @@ struct disk_tool_t
/**** Methods ****/
~disk_tool_t();
void dump_journal_entry(int num, journal_entry *je);
int process_journal(std::function<int(void*)> block_fn);
int process_journal_block(void *buf, std::function<void(int, journal_entry*)> iter_fn);
@ -203,6 +205,15 @@ int main(int argc, char *argv[])
return 0;
}
disk_tool_t::~disk_tool_t()
{
if (data_alloc)
{
delete data_alloc;
data_alloc = NULL;
}
}
int disk_tool_t::dump_journal()
{
if (dsk.journal_block_size < DIRECT_IO_ALIGNMENT || (dsk.journal_block_size % DIRECT_IO_ALIGNMENT) ||
@ -211,14 +222,14 @@ int disk_tool_t::dump_journal()
fprintf(stderr, "Invalid journal block size\n");
return 1;
}
dsk.journal_fd = open(dsk.journal_device.c_str(), O_DIRECT|O_RDONLY);
if (dsk.journal_fd < 0)
{
fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno));
return 1;
}
if (all)
{
dsk.journal_fd = open(dsk.journal_device.c_str(), O_DIRECT|O_RDONLY);
if (dsk.journal_fd < 0)
{
fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno));
return 1;
}
void *journal_buf = memalign_or_die(MEM_ALIGNMENT, dsk.journal_block_size);
journal_pos = 0;
while (journal_pos < dsk.journal_len)
@ -248,6 +259,8 @@ int disk_tool_t::dump_journal()
}
}
free(journal_buf);
close(dsk.journal_fd);
dsk.journal_fd = -1;
}
else
{
@ -260,13 +273,17 @@ int disk_tool_t::dump_journal()
return r;
});
}
close(dsk.journal_fd);
dsk.journal_fd = -1;
return 0;
}
int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
{
dsk.journal_fd = open(dsk.journal_device.c_str(), O_DIRECT|O_RDONLY);
if (dsk.journal_fd < 0)
{
fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno));
return 1;
}
void *data = memalign_or_die(MEM_ALIGNMENT, dsk.journal_block_size);
journal_pos = 0;
int r = pread(dsk.journal_fd, data, dsk.journal_block_size, dsk.journal_offset+journal_pos);
@ -275,12 +292,15 @@ int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
if (je->magic != JOURNAL_MAGIC || je->type != JE_START || je_crc32(je) != je->crc32)
{
fprintf(stderr, "offset %08lx: journal superblock is invalid\n", journal_pos);
return 1;
r = 1;
}
else
{
started = false;
crc32_last = 0;
block_fn(data);
started = false;
crc32_last = 0;
journal_pos = je->start.journal_start;
while (1)
{
@ -293,8 +313,10 @@ int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
break;
}
}
close(dsk.journal_fd);
dsk.journal_fd = -1;
free(data);
return 0;
return r;
}
int disk_tool_t::process_journal_block(void *buf, std::function<void(int, journal_entry*)> iter_fn)
@ -303,11 +325,11 @@ int disk_tool_t::process_journal_block(void *buf, std::function<void(int, journa
journal_pos += dsk.journal_block_size;
int entry = 0;
bool wrapped = false;
while (pos < dsk.journal_block_size)
while (pos <= dsk.journal_block_size-JOURNAL_ENTRY_HEADER_SIZE)
{
journal_entry *je = (journal_entry*)((uint8_t*)buf + pos);
if (je->magic != JOURNAL_MAGIC || je->type < JE_MIN || je->type > JE_MAX ||
!all && started && je->crc32_prev != crc32_last)
!all && started && je->crc32_prev != crc32_last || pos > dsk.journal_block_size-je->size)
{
break;
}
@ -455,9 +477,9 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v1_t *)>
meta_pos += read_len;
for (uint64_t blk = 0; blk < read_len; blk += dsk.meta_block_size)
{
for (uint64_t ioff = 0; ioff < dsk.meta_block_size-dsk.clean_entry_size; ioff += dsk.clean_entry_size, block_num++)
for (uint64_t ioff = 0; ioff <= dsk.meta_block_size-dsk.clean_entry_size; ioff += dsk.clean_entry_size, block_num++)
{
clean_disk_entry *entry = (clean_disk_entry*)(data + blk + ioff);
clean_disk_entry *entry = (clean_disk_entry*)((uint8_t*)data + blk + ioff);
if (entry->oid.inode)
{
record_fn(block_num, entry, entry->bitmap);
@ -482,7 +504,7 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v1_t *)>
{
for (uint64_t ioff = 0; ioff < dsk.meta_block_size-dsk.clean_entry_size; ioff += dsk.clean_entry_size, block_num++)
{
clean_disk_entry *entry = (clean_disk_entry*)(data + blk + ioff);
clean_disk_entry *entry = (clean_disk_entry*)((uint8_t*)data + blk + ioff);
if (entry->oid.inode)
{
record_fn(block_num, entry, NULL);
@ -512,13 +534,13 @@ void disk_tool_t::dump_meta_header(blockstore_meta_header_v1_t *hdr)
if (hdr)
{
printf(
"{\"version\":\"0.6\",\"dsk.meta_block_size\":%u,\"dsk.data_block_size\":%u,\"dsk.bitmap_granularity\":%u,\"entries\":[\n",
"{\"version\":\"0.6\",\"meta_block_size\":%u,\"data_block_size\":%u,\"bitmap_granularity\":%u,\"entries\":[\n",
hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity
);
}
else
{
printf("{\"version\":\"0.5\",\"dsk.meta_block_size\":%lu,\"entries\":[\n", dsk.meta_block_size);
printf("{\"version\":\"0.5\",\"meta_block_size\":%lu,\"entries\":[\n", dsk.meta_block_size);
}
first = true;
}
@ -561,6 +583,8 @@ int disk_tool_t::resize_data()
if (r != 0)
return r;
// Check parameters and fill allocator
fprintf(stderr, "Reading metadata\n");
data_alloc = new allocator((new_data_len < dsk.data_len ? dsk.data_len : new_data_len) / dsk.data_block_size);
r = process_meta(
[this](blockstore_meta_header_v1_t *hdr)
{
@ -573,6 +597,7 @@ int disk_tool_t::resize_data()
);
if (r != 0)
return r;
fprintf(stderr, "Reading journal\n");
r = process_journal([this](void *buf)
{
return process_journal_block(buf, [this](int num, journal_entry *je)
@ -590,17 +615,21 @@ int disk_tool_t::resize_data()
if (r != 0)
return r;
// Copy data blocks into new places
fprintf(stderr, "Moving data blocks\n");
r = resize_copy_data();
if (r != 0)
return r;
// Rewrite journal
fprintf(stderr, "Writing new journal\n");
r = resize_rewrite_journal();
if (r != 0)
return r;
// Rewrite metadata
fprintf(stderr, "Writing new metadata\n");
r = resize_rewrite_meta();
if (r != 0)
return r;
fprintf(stderr, "Done\n");
return 0;
}
@ -673,8 +702,8 @@ void disk_tool_t::resize_init(blockstore_meta_header_v1_t *hdr)
fprintf(stderr, "Data alignment mismatch\n");
exit(1);
}
data_idx_diff = (new_data_offset-dsk.data_offset) / dsk.data_block_size;
free_first = new_data_offset > dsk.data_offset ? data_idx_diff : 0;
data_idx_diff = ((int64_t)(dsk.data_offset-new_data_offset)) / dsk.data_block_size;
free_first = new_data_offset > dsk.data_offset ? (new_data_offset-dsk.data_offset) / dsk.data_block_size : 0;
free_last = (new_data_offset+new_data_len < dsk.data_offset+dsk.data_len)
? (dsk.data_offset+dsk.data_len-new_data_offset-new_data_len) / dsk.data_block_size
: 0;
@ -737,6 +766,7 @@ int disk_tool_t::resize_remap_blocks()
fprintf(stderr, "Not enough space to move data\n");
return 1;
}
data_alloc->set(new_loc, true);
data_remap[p.first] = new_loc;
}
return 0;
@ -754,13 +784,14 @@ int disk_tool_t::resize_copy_data()
{
fprintf(stderr, "Failed to open data device %s: %s\n", dsk.data_device.c_str(), strerror(errno));
delete ringloop;
ringloop = NULL;
return 1;
}
moving_blocks = new resizer_data_moving_t[iodepth];
moving_blocks[0].buf = memalign_or_die(MEM_ALIGNMENT, iodepth*dsk.data_block_size);
for (int i = 1; i < iodepth; i++)
{
moving_blocks[i].buf = moving_blocks[0].buf + i*dsk.data_block_size;
moving_blocks[i].buf = (uint8_t*)moving_blocks[0].buf + i*dsk.data_block_size;
}
remap_active = 1;
remap_it = data_remap.begin();
@ -845,6 +876,7 @@ int disk_tool_t::resize_copy_data()
close(dsk.data_fd);
dsk.data_fd = -1;
delete ringloop;
ringloop = NULL;
return 0;
}
@ -855,23 +887,26 @@ int disk_tool_t::resize_rewrite_journal()
new_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_journal_len);
new_journal_ptr = new_buf;
new_journal_data = new_journal_ptr + dsk.journal_block_size;
new_journal_in_pos = 0;
memset(new_buf, 0, new_journal_len);
process_journal([this](void *buf)
{
return process_journal_block(buf, [this](int num, journal_entry *je)
{
journal_entry *ne = (journal_entry*)(new_journal_ptr + new_journal_in_pos);
if (je->type == JE_START)
{
journal_entry *ne = (journal_entry*)(new_journal_ptr + new_journal_in_pos);
*((journal_entry_start*)ne) = (journal_entry_start){
.magic = JOURNAL_MAGIC,
.type = JE_START,
.size = sizeof(ne->start),
.size = sizeof(journal_entry_start),
.journal_start = dsk.journal_block_size,
.version = JOURNAL_VERSION,
};
ne->crc32 = je_crc32(ne);
new_journal_ptr += dsk.journal_block_size;
new_journal_data = new_journal_ptr+dsk.journal_block_size;
new_journal_in_pos = 0;
}
else
{
@ -883,7 +918,7 @@ int disk_tool_t::resize_rewrite_journal()
fprintf(stderr, "Error: live entries don't fit to the new journal\n");
exit(1);
}
new_journal_data += dsk.journal_block_size;
new_journal_data = new_journal_ptr+dsk.journal_block_size;
new_journal_in_pos = 0;
if (dsk.journal_block_size < je->size)
{
@ -891,6 +926,7 @@ int disk_tool_t::resize_rewrite_journal()
exit(1);
}
}
journal_entry *ne = (journal_entry*)(new_journal_ptr + new_journal_in_pos);
memcpy(ne, je, je->size);
ne->crc32_prev = new_crc32_prev;
if (je->type == JE_BIG_WRITE || je->type == JE_BIG_WRITE_INSTANT)
@ -901,7 +937,7 @@ int disk_tool_t::resize_rewrite_journal()
{
ne->big_write.location = remap_it->second * dsk.data_block_size;
}
ne->big_write.location += data_idx_diff;
ne->big_write.location += data_idx_diff * dsk.data_block_size;
}
else if (je->type == JE_SMALL_WRITE || je->type == JE_SMALL_WRITE_INSTANT)
{
@ -957,7 +993,10 @@ int disk_tool_t::resize_rewrite_meta()
if (remap_it != data_remap.end())
block_num = remap_it->second;
if (block_num < free_first || block_num >= total_blocks-free_last)
return;
{
fprintf(stderr, "BUG: remapped block not in range\n");
exit(1);
}
block_num += data_idx_diff;
clean_disk_entry *new_entry = (clean_disk_entry*)(new_buf + dsk.meta_block_size +
dsk.meta_block_size*(block_num / new_entries_per_block) +