Fix resizer
parent
b10656ca09
commit
cf487c95aa
|
@ -10,6 +10,7 @@
|
||||||
#define JOURNAL_MAGIC 0x4A33
|
#define JOURNAL_MAGIC 0x4A33
|
||||||
#define JOURNAL_VERSION 1
|
#define JOURNAL_VERSION 1
|
||||||
#define JOURNAL_BUFFER_SIZE 4*1024*1024
|
#define JOURNAL_BUFFER_SIZE 4*1024*1024
|
||||||
|
#define JOURNAL_ENTRY_HEADER_SIZE 16
|
||||||
|
|
||||||
// We reserve some extra space for future stabilize requests during writes
|
// We reserve some extra space for future stabilize requests during writes
|
||||||
// FIXME: This value should be dynamic i.e. Blockstore ideally shouldn't allow
|
// FIXME: This value should be dynamic i.e. Blockstore ideally shouldn't allow
|
||||||
|
|
|
@ -85,6 +85,8 @@ struct disk_tool_t
|
||||||
|
|
||||||
/**** Methods ****/
|
/**** Methods ****/
|
||||||
|
|
||||||
|
~disk_tool_t();
|
||||||
|
|
||||||
void dump_journal_entry(int num, journal_entry *je);
|
void dump_journal_entry(int num, journal_entry *je);
|
||||||
int process_journal(std::function<int(void*)> block_fn);
|
int process_journal(std::function<int(void*)> block_fn);
|
||||||
int process_journal_block(void *buf, std::function<void(int, journal_entry*)> iter_fn);
|
int process_journal_block(void *buf, std::function<void(int, journal_entry*)> iter_fn);
|
||||||
|
@ -203,6 +205,15 @@ int main(int argc, char *argv[])
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
disk_tool_t::~disk_tool_t()
|
||||||
|
{
|
||||||
|
if (data_alloc)
|
||||||
|
{
|
||||||
|
delete data_alloc;
|
||||||
|
data_alloc = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int disk_tool_t::dump_journal()
|
int disk_tool_t::dump_journal()
|
||||||
{
|
{
|
||||||
if (dsk.journal_block_size < DIRECT_IO_ALIGNMENT || (dsk.journal_block_size % DIRECT_IO_ALIGNMENT) ||
|
if (dsk.journal_block_size < DIRECT_IO_ALIGNMENT || (dsk.journal_block_size % DIRECT_IO_ALIGNMENT) ||
|
||||||
|
@ -211,14 +222,14 @@ int disk_tool_t::dump_journal()
|
||||||
fprintf(stderr, "Invalid journal block size\n");
|
fprintf(stderr, "Invalid journal block size\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
dsk.journal_fd = open(dsk.journal_device.c_str(), O_DIRECT|O_RDONLY);
|
|
||||||
if (dsk.journal_fd < 0)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (all)
|
if (all)
|
||||||
{
|
{
|
||||||
|
dsk.journal_fd = open(dsk.journal_device.c_str(), O_DIRECT|O_RDONLY);
|
||||||
|
if (dsk.journal_fd < 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
void *journal_buf = memalign_or_die(MEM_ALIGNMENT, dsk.journal_block_size);
|
void *journal_buf = memalign_or_die(MEM_ALIGNMENT, dsk.journal_block_size);
|
||||||
journal_pos = 0;
|
journal_pos = 0;
|
||||||
while (journal_pos < dsk.journal_len)
|
while (journal_pos < dsk.journal_len)
|
||||||
|
@ -248,6 +259,8 @@ int disk_tool_t::dump_journal()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(journal_buf);
|
free(journal_buf);
|
||||||
|
close(dsk.journal_fd);
|
||||||
|
dsk.journal_fd = -1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -260,13 +273,17 @@ int disk_tool_t::dump_journal()
|
||||||
return r;
|
return r;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
close(dsk.journal_fd);
|
|
||||||
dsk.journal_fd = -1;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
|
int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
|
||||||
{
|
{
|
||||||
|
dsk.journal_fd = open(dsk.journal_device.c_str(), O_DIRECT|O_RDONLY);
|
||||||
|
if (dsk.journal_fd < 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failed to open journal device %s: %s\n", dsk.journal_device.c_str(), strerror(errno));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
void *data = memalign_or_die(MEM_ALIGNMENT, dsk.journal_block_size);
|
void *data = memalign_or_die(MEM_ALIGNMENT, dsk.journal_block_size);
|
||||||
journal_pos = 0;
|
journal_pos = 0;
|
||||||
int r = pread(dsk.journal_fd, data, dsk.journal_block_size, dsk.journal_offset+journal_pos);
|
int r = pread(dsk.journal_fd, data, dsk.journal_block_size, dsk.journal_offset+journal_pos);
|
||||||
|
@ -275,12 +292,15 @@ int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
|
||||||
if (je->magic != JOURNAL_MAGIC || je->type != JE_START || je_crc32(je) != je->crc32)
|
if (je->magic != JOURNAL_MAGIC || je->type != JE_START || je_crc32(je) != je->crc32)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "offset %08lx: journal superblock is invalid\n", journal_pos);
|
fprintf(stderr, "offset %08lx: journal superblock is invalid\n", journal_pos);
|
||||||
return 1;
|
r = 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
started = false;
|
||||||
|
crc32_last = 0;
|
||||||
block_fn(data);
|
block_fn(data);
|
||||||
started = false;
|
started = false;
|
||||||
|
crc32_last = 0;
|
||||||
journal_pos = je->start.journal_start;
|
journal_pos = je->start.journal_start;
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
|
@ -293,8 +313,10 @@ int disk_tool_t::process_journal(std::function<int(void*)> block_fn)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
close(dsk.journal_fd);
|
||||||
|
dsk.journal_fd = -1;
|
||||||
free(data);
|
free(data);
|
||||||
return 0;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int disk_tool_t::process_journal_block(void *buf, std::function<void(int, journal_entry*)> iter_fn)
|
int disk_tool_t::process_journal_block(void *buf, std::function<void(int, journal_entry*)> iter_fn)
|
||||||
|
@ -303,11 +325,11 @@ int disk_tool_t::process_journal_block(void *buf, std::function<void(int, journa
|
||||||
journal_pos += dsk.journal_block_size;
|
journal_pos += dsk.journal_block_size;
|
||||||
int entry = 0;
|
int entry = 0;
|
||||||
bool wrapped = false;
|
bool wrapped = false;
|
||||||
while (pos < dsk.journal_block_size)
|
while (pos <= dsk.journal_block_size-JOURNAL_ENTRY_HEADER_SIZE)
|
||||||
{
|
{
|
||||||
journal_entry *je = (journal_entry*)((uint8_t*)buf + pos);
|
journal_entry *je = (journal_entry*)((uint8_t*)buf + pos);
|
||||||
if (je->magic != JOURNAL_MAGIC || je->type < JE_MIN || je->type > JE_MAX ||
|
if (je->magic != JOURNAL_MAGIC || je->type < JE_MIN || je->type > JE_MAX ||
|
||||||
!all && started && je->crc32_prev != crc32_last)
|
!all && started && je->crc32_prev != crc32_last || pos > dsk.journal_block_size-je->size)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -455,9 +477,9 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v1_t *)>
|
||||||
meta_pos += read_len;
|
meta_pos += read_len;
|
||||||
for (uint64_t blk = 0; blk < read_len; blk += dsk.meta_block_size)
|
for (uint64_t blk = 0; blk < read_len; blk += dsk.meta_block_size)
|
||||||
{
|
{
|
||||||
for (uint64_t ioff = 0; ioff < dsk.meta_block_size-dsk.clean_entry_size; ioff += dsk.clean_entry_size, block_num++)
|
for (uint64_t ioff = 0; ioff <= dsk.meta_block_size-dsk.clean_entry_size; ioff += dsk.clean_entry_size, block_num++)
|
||||||
{
|
{
|
||||||
clean_disk_entry *entry = (clean_disk_entry*)(data + blk + ioff);
|
clean_disk_entry *entry = (clean_disk_entry*)((uint8_t*)data + blk + ioff);
|
||||||
if (entry->oid.inode)
|
if (entry->oid.inode)
|
||||||
{
|
{
|
||||||
record_fn(block_num, entry, entry->bitmap);
|
record_fn(block_num, entry, entry->bitmap);
|
||||||
|
@ -482,7 +504,7 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v1_t *)>
|
||||||
{
|
{
|
||||||
for (uint64_t ioff = 0; ioff < dsk.meta_block_size-dsk.clean_entry_size; ioff += dsk.clean_entry_size, block_num++)
|
for (uint64_t ioff = 0; ioff < dsk.meta_block_size-dsk.clean_entry_size; ioff += dsk.clean_entry_size, block_num++)
|
||||||
{
|
{
|
||||||
clean_disk_entry *entry = (clean_disk_entry*)(data + blk + ioff);
|
clean_disk_entry *entry = (clean_disk_entry*)((uint8_t*)data + blk + ioff);
|
||||||
if (entry->oid.inode)
|
if (entry->oid.inode)
|
||||||
{
|
{
|
||||||
record_fn(block_num, entry, NULL);
|
record_fn(block_num, entry, NULL);
|
||||||
|
@ -512,13 +534,13 @@ void disk_tool_t::dump_meta_header(blockstore_meta_header_v1_t *hdr)
|
||||||
if (hdr)
|
if (hdr)
|
||||||
{
|
{
|
||||||
printf(
|
printf(
|
||||||
"{\"version\":\"0.6\",\"dsk.meta_block_size\":%u,\"dsk.data_block_size\":%u,\"dsk.bitmap_granularity\":%u,\"entries\":[\n",
|
"{\"version\":\"0.6\",\"meta_block_size\":%u,\"data_block_size\":%u,\"bitmap_granularity\":%u,\"entries\":[\n",
|
||||||
hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity
|
hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
printf("{\"version\":\"0.5\",\"dsk.meta_block_size\":%lu,\"entries\":[\n", dsk.meta_block_size);
|
printf("{\"version\":\"0.5\",\"meta_block_size\":%lu,\"entries\":[\n", dsk.meta_block_size);
|
||||||
}
|
}
|
||||||
first = true;
|
first = true;
|
||||||
}
|
}
|
||||||
|
@ -561,6 +583,8 @@ int disk_tool_t::resize_data()
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
return r;
|
return r;
|
||||||
// Check parameters and fill allocator
|
// Check parameters and fill allocator
|
||||||
|
fprintf(stderr, "Reading metadata\n");
|
||||||
|
data_alloc = new allocator((new_data_len < dsk.data_len ? dsk.data_len : new_data_len) / dsk.data_block_size);
|
||||||
r = process_meta(
|
r = process_meta(
|
||||||
[this](blockstore_meta_header_v1_t *hdr)
|
[this](blockstore_meta_header_v1_t *hdr)
|
||||||
{
|
{
|
||||||
|
@ -573,6 +597,7 @@ int disk_tool_t::resize_data()
|
||||||
);
|
);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
return r;
|
return r;
|
||||||
|
fprintf(stderr, "Reading journal\n");
|
||||||
r = process_journal([this](void *buf)
|
r = process_journal([this](void *buf)
|
||||||
{
|
{
|
||||||
return process_journal_block(buf, [this](int num, journal_entry *je)
|
return process_journal_block(buf, [this](int num, journal_entry *je)
|
||||||
|
@ -590,17 +615,21 @@ int disk_tool_t::resize_data()
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
return r;
|
return r;
|
||||||
// Copy data blocks into new places
|
// Copy data blocks into new places
|
||||||
|
fprintf(stderr, "Moving data blocks\n");
|
||||||
r = resize_copy_data();
|
r = resize_copy_data();
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
return r;
|
return r;
|
||||||
// Rewrite journal
|
// Rewrite journal
|
||||||
|
fprintf(stderr, "Writing new journal\n");
|
||||||
r = resize_rewrite_journal();
|
r = resize_rewrite_journal();
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
return r;
|
return r;
|
||||||
// Rewrite metadata
|
// Rewrite metadata
|
||||||
|
fprintf(stderr, "Writing new metadata\n");
|
||||||
r = resize_rewrite_meta();
|
r = resize_rewrite_meta();
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
return r;
|
return r;
|
||||||
|
fprintf(stderr, "Done\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -673,8 +702,8 @@ void disk_tool_t::resize_init(blockstore_meta_header_v1_t *hdr)
|
||||||
fprintf(stderr, "Data alignment mismatch\n");
|
fprintf(stderr, "Data alignment mismatch\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
data_idx_diff = (new_data_offset-dsk.data_offset) / dsk.data_block_size;
|
data_idx_diff = ((int64_t)(dsk.data_offset-new_data_offset)) / dsk.data_block_size;
|
||||||
free_first = new_data_offset > dsk.data_offset ? data_idx_diff : 0;
|
free_first = new_data_offset > dsk.data_offset ? (new_data_offset-dsk.data_offset) / dsk.data_block_size : 0;
|
||||||
free_last = (new_data_offset+new_data_len < dsk.data_offset+dsk.data_len)
|
free_last = (new_data_offset+new_data_len < dsk.data_offset+dsk.data_len)
|
||||||
? (dsk.data_offset+dsk.data_len-new_data_offset-new_data_len) / dsk.data_block_size
|
? (dsk.data_offset+dsk.data_len-new_data_offset-new_data_len) / dsk.data_block_size
|
||||||
: 0;
|
: 0;
|
||||||
|
@ -737,6 +766,7 @@ int disk_tool_t::resize_remap_blocks()
|
||||||
fprintf(stderr, "Not enough space to move data\n");
|
fprintf(stderr, "Not enough space to move data\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
data_alloc->set(new_loc, true);
|
||||||
data_remap[p.first] = new_loc;
|
data_remap[p.first] = new_loc;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -754,13 +784,14 @@ int disk_tool_t::resize_copy_data()
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Failed to open data device %s: %s\n", dsk.data_device.c_str(), strerror(errno));
|
fprintf(stderr, "Failed to open data device %s: %s\n", dsk.data_device.c_str(), strerror(errno));
|
||||||
delete ringloop;
|
delete ringloop;
|
||||||
|
ringloop = NULL;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
moving_blocks = new resizer_data_moving_t[iodepth];
|
moving_blocks = new resizer_data_moving_t[iodepth];
|
||||||
moving_blocks[0].buf = memalign_or_die(MEM_ALIGNMENT, iodepth*dsk.data_block_size);
|
moving_blocks[0].buf = memalign_or_die(MEM_ALIGNMENT, iodepth*dsk.data_block_size);
|
||||||
for (int i = 1; i < iodepth; i++)
|
for (int i = 1; i < iodepth; i++)
|
||||||
{
|
{
|
||||||
moving_blocks[i].buf = moving_blocks[0].buf + i*dsk.data_block_size;
|
moving_blocks[i].buf = (uint8_t*)moving_blocks[0].buf + i*dsk.data_block_size;
|
||||||
}
|
}
|
||||||
remap_active = 1;
|
remap_active = 1;
|
||||||
remap_it = data_remap.begin();
|
remap_it = data_remap.begin();
|
||||||
|
@ -845,6 +876,7 @@ int disk_tool_t::resize_copy_data()
|
||||||
close(dsk.data_fd);
|
close(dsk.data_fd);
|
||||||
dsk.data_fd = -1;
|
dsk.data_fd = -1;
|
||||||
delete ringloop;
|
delete ringloop;
|
||||||
|
ringloop = NULL;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -855,23 +887,26 @@ int disk_tool_t::resize_rewrite_journal()
|
||||||
new_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_journal_len);
|
new_buf = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, new_journal_len);
|
||||||
new_journal_ptr = new_buf;
|
new_journal_ptr = new_buf;
|
||||||
new_journal_data = new_journal_ptr + dsk.journal_block_size;
|
new_journal_data = new_journal_ptr + dsk.journal_block_size;
|
||||||
|
new_journal_in_pos = 0;
|
||||||
memset(new_buf, 0, new_journal_len);
|
memset(new_buf, 0, new_journal_len);
|
||||||
process_journal([this](void *buf)
|
process_journal([this](void *buf)
|
||||||
{
|
{
|
||||||
return process_journal_block(buf, [this](int num, journal_entry *je)
|
return process_journal_block(buf, [this](int num, journal_entry *je)
|
||||||
{
|
{
|
||||||
journal_entry *ne = (journal_entry*)(new_journal_ptr + new_journal_in_pos);
|
|
||||||
if (je->type == JE_START)
|
if (je->type == JE_START)
|
||||||
{
|
{
|
||||||
|
journal_entry *ne = (journal_entry*)(new_journal_ptr + new_journal_in_pos);
|
||||||
*((journal_entry_start*)ne) = (journal_entry_start){
|
*((journal_entry_start*)ne) = (journal_entry_start){
|
||||||
.magic = JOURNAL_MAGIC,
|
.magic = JOURNAL_MAGIC,
|
||||||
.type = JE_START,
|
.type = JE_START,
|
||||||
.size = sizeof(ne->start),
|
.size = sizeof(journal_entry_start),
|
||||||
.journal_start = dsk.journal_block_size,
|
.journal_start = dsk.journal_block_size,
|
||||||
.version = JOURNAL_VERSION,
|
.version = JOURNAL_VERSION,
|
||||||
};
|
};
|
||||||
ne->crc32 = je_crc32(ne);
|
ne->crc32 = je_crc32(ne);
|
||||||
new_journal_ptr += dsk.journal_block_size;
|
new_journal_ptr += dsk.journal_block_size;
|
||||||
|
new_journal_data = new_journal_ptr+dsk.journal_block_size;
|
||||||
|
new_journal_in_pos = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -883,7 +918,7 @@ int disk_tool_t::resize_rewrite_journal()
|
||||||
fprintf(stderr, "Error: live entries don't fit to the new journal\n");
|
fprintf(stderr, "Error: live entries don't fit to the new journal\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
new_journal_data += dsk.journal_block_size;
|
new_journal_data = new_journal_ptr+dsk.journal_block_size;
|
||||||
new_journal_in_pos = 0;
|
new_journal_in_pos = 0;
|
||||||
if (dsk.journal_block_size < je->size)
|
if (dsk.journal_block_size < je->size)
|
||||||
{
|
{
|
||||||
|
@ -891,6 +926,7 @@ int disk_tool_t::resize_rewrite_journal()
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
journal_entry *ne = (journal_entry*)(new_journal_ptr + new_journal_in_pos);
|
||||||
memcpy(ne, je, je->size);
|
memcpy(ne, je, je->size);
|
||||||
ne->crc32_prev = new_crc32_prev;
|
ne->crc32_prev = new_crc32_prev;
|
||||||
if (je->type == JE_BIG_WRITE || je->type == JE_BIG_WRITE_INSTANT)
|
if (je->type == JE_BIG_WRITE || je->type == JE_BIG_WRITE_INSTANT)
|
||||||
|
@ -901,7 +937,7 @@ int disk_tool_t::resize_rewrite_journal()
|
||||||
{
|
{
|
||||||
ne->big_write.location = remap_it->second * dsk.data_block_size;
|
ne->big_write.location = remap_it->second * dsk.data_block_size;
|
||||||
}
|
}
|
||||||
ne->big_write.location += data_idx_diff;
|
ne->big_write.location += data_idx_diff * dsk.data_block_size;
|
||||||
}
|
}
|
||||||
else if (je->type == JE_SMALL_WRITE || je->type == JE_SMALL_WRITE_INSTANT)
|
else if (je->type == JE_SMALL_WRITE || je->type == JE_SMALL_WRITE_INSTANT)
|
||||||
{
|
{
|
||||||
|
@ -957,7 +993,10 @@ int disk_tool_t::resize_rewrite_meta()
|
||||||
if (remap_it != data_remap.end())
|
if (remap_it != data_remap.end())
|
||||||
block_num = remap_it->second;
|
block_num = remap_it->second;
|
||||||
if (block_num < free_first || block_num >= total_blocks-free_last)
|
if (block_num < free_first || block_num >= total_blocks-free_last)
|
||||||
return;
|
{
|
||||||
|
fprintf(stderr, "BUG: remapped block not in range\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
block_num += data_idx_diff;
|
block_num += data_idx_diff;
|
||||||
clean_disk_entry *new_entry = (clean_disk_entry*)(new_buf + dsk.meta_block_size +
|
clean_disk_entry *new_entry = (clean_disk_entry*)(new_buf + dsk.meta_block_size +
|
||||||
dsk.meta_block_size*(block_num / new_entries_per_block) +
|
dsk.meta_block_size*(block_num / new_entries_per_block) +
|
||||||
|
|
Loading…
Reference in New Issue