Use 4K blockstore block by default, use MEM_ALIGNMENT in osd code

trace-sqes
Vitaliy Filippov 2020-04-14 19:19:56 +03:00
parent 37b27c3025
commit f3a7ccff50
8 changed files with 23 additions and 19 deletions

View File

@ -15,7 +15,9 @@
// Memory alignment for direct I/O (usually 512 bytes) // Memory alignment for direct I/O (usually 512 bytes)
// All other alignments must be a multiple of this one // All other alignments must be a multiple of this one
#ifndef MEM_ALIGNMENT
#define MEM_ALIGNMENT 512 #define MEM_ALIGNMENT 512
#endif
// Default block size is 128 KB, current allowed range is 4K - 128M // Default block size is 128 KB, current allowed range is 4K - 128M
#define DEFAULT_ORDER 17 #define DEFAULT_ORDER 17

View File

@ -186,11 +186,11 @@ class blockstore_impl_t
uint64_t data_offset; uint64_t data_offset;
uint64_t cfg_journal_size, cfg_data_size; uint64_t cfg_journal_size, cfg_data_size;
// Required write alignment and journal/metadata/data areas' location alignment // Required write alignment and journal/metadata/data areas' location alignment
uint32_t disk_alignment = 512; uint32_t disk_alignment = 4096;
// Journal block size - minimum_io_size of the journal device is the best choice // Journal block size - minimum_io_size of the journal device is the best choice
uint64_t journal_block_size = 512; uint64_t journal_block_size = 4096;
// Metadata block size - minimum_io_size of the metadata device is the best choice // Metadata block size - minimum_io_size of the metadata device is the best choice
uint64_t meta_block_size = 512; uint64_t meta_block_size = 4096;
// Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment // Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment
uint64_t bitmap_granularity = 4096; uint64_t bitmap_granularity = 4096;
bool readonly = false; bool readonly = false;

View File

@ -137,7 +137,7 @@ struct journal_t
bool inmemory = false; bool inmemory = false;
void *buffer = NULL; void *buffer = NULL;
uint64_t block_size = 512; uint64_t block_size;
uint64_t offset, len; uint64_t offset, len;
// Next free block offset // Next free block offset
uint64_t next_free = 0; uint64_t next_free = 0;

View File

@ -75,7 +75,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
} }
if (!disk_alignment) if (!disk_alignment)
{ {
disk_alignment = 512; disk_alignment = 4096;
} }
else if (disk_alignment % MEM_ALIGNMENT) else if (disk_alignment % MEM_ALIGNMENT)
{ {
@ -83,7 +83,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
} }
if (!journal_block_size) if (!journal_block_size)
{ {
journal_block_size = 512; journal_block_size = 4096;
} }
else if (journal_block_size % MEM_ALIGNMENT) else if (journal_block_size % MEM_ALIGNMENT)
{ {
@ -91,7 +91,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
} }
if (!meta_block_size) if (!meta_block_size)
{ {
meta_block_size = 512; meta_block_size = 4096;
} }
else if (meta_block_size % MEM_ALIGNMENT) else if (meta_block_size % MEM_ALIGNMENT)
{ {

View File

@ -408,9 +408,9 @@ void osd_t::exec_op(osd_op_t *cur_op)
if (cur_op->req.hdr.magic != SECONDARY_OSD_OP_MAGIC || if (cur_op->req.hdr.magic != SECONDARY_OSD_OP_MAGIC ||
cur_op->req.hdr.opcode < OSD_OP_MIN || cur_op->req.hdr.opcode > OSD_OP_MAX || cur_op->req.hdr.opcode < OSD_OP_MIN || cur_op->req.hdr.opcode > OSD_OP_MAX ||
(cur_op->req.hdr.opcode == OSD_OP_SECONDARY_READ || cur_op->req.hdr.opcode == OSD_OP_SECONDARY_WRITE) && (cur_op->req.hdr.opcode == OSD_OP_SECONDARY_READ || cur_op->req.hdr.opcode == OSD_OP_SECONDARY_WRITE) &&
(cur_op->req.sec_rw.len > OSD_RW_MAX || cur_op->req.sec_rw.len % OSD_RW_ALIGN || cur_op->req.sec_rw.offset % OSD_RW_ALIGN) || (cur_op->req.sec_rw.len > OSD_RW_MAX || cur_op->req.sec_rw.len % bs_disk_alignment || cur_op->req.sec_rw.offset % bs_disk_alignment) ||
(cur_op->req.hdr.opcode == OSD_OP_READ || cur_op->req.hdr.opcode == OSD_OP_WRITE) && (cur_op->req.hdr.opcode == OSD_OP_READ || cur_op->req.hdr.opcode == OSD_OP_WRITE) &&
(cur_op->req.rw.len > OSD_RW_MAX || cur_op->req.rw.len % OSD_RW_ALIGN || cur_op->req.rw.offset % OSD_RW_ALIGN)) (cur_op->req.rw.len > OSD_RW_MAX || cur_op->req.rw.len % bs_disk_alignment || cur_op->req.rw.offset % bs_disk_alignment))
{ {
// Bad command // Bad command
finish_op(cur_op, -EINVAL); finish_op(cur_op, -EINVAL);

View File

@ -24,7 +24,9 @@
#define OSD_OP_SYNC 12 #define OSD_OP_SYNC 12
#define OSD_OP_MAX 12 #define OSD_OP_MAX 12
// Alignment & limit for read/write operations // Alignment & limit for read/write operations
#define OSD_RW_ALIGN 512 #ifndef MEM_ALIGNMENT
#define MEM_ALIGNMENT 512
#endif
#define OSD_RW_MAX 64*1024*1024 #define OSD_RW_MAX 64*1024*1024
// common request and reply headers // common request and reply headers

View File

@ -158,32 +158,32 @@ void osd_t::handle_op_hdr(osd_client_t *cl)
if (cur_op->req.hdr.opcode == OSD_OP_SECONDARY_READ) if (cur_op->req.hdr.opcode == OSD_OP_SECONDARY_READ)
{ {
if (cur_op->req.sec_rw.len > 0) if (cur_op->req.sec_rw.len > 0)
cur_op->buf = memalign(512, cur_op->req.sec_rw.len); cur_op->buf = memalign(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
cl->read_remaining = 0; cl->read_remaining = 0;
} }
else if (cur_op->req.hdr.opcode == OSD_OP_SECONDARY_WRITE) else if (cur_op->req.hdr.opcode == OSD_OP_SECONDARY_WRITE)
{ {
if (cur_op->req.sec_rw.len > 0) if (cur_op->req.sec_rw.len > 0)
cur_op->buf = memalign(512, cur_op->req.sec_rw.len); cur_op->buf = memalign(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
cl->read_remaining = cur_op->req.sec_rw.len; cl->read_remaining = cur_op->req.sec_rw.len;
} }
else if (cur_op->req.hdr.opcode == OSD_OP_SECONDARY_STABILIZE || else if (cur_op->req.hdr.opcode == OSD_OP_SECONDARY_STABILIZE ||
cur_op->req.hdr.opcode == OSD_OP_SECONDARY_ROLLBACK) cur_op->req.hdr.opcode == OSD_OP_SECONDARY_ROLLBACK)
{ {
if (cur_op->req.sec_stab.len > 0) if (cur_op->req.sec_stab.len > 0)
cur_op->buf = memalign(512, cur_op->req.sec_stab.len); cur_op->buf = memalign(MEM_ALIGNMENT, cur_op->req.sec_stab.len);
cl->read_remaining = cur_op->req.sec_stab.len; cl->read_remaining = cur_op->req.sec_stab.len;
} }
else if (cur_op->req.hdr.opcode == OSD_OP_READ) else if (cur_op->req.hdr.opcode == OSD_OP_READ)
{ {
if (cur_op->req.rw.len > 0) if (cur_op->req.rw.len > 0)
cur_op->buf = memalign(512, cur_op->req.rw.len); cur_op->buf = memalign(MEM_ALIGNMENT, cur_op->req.rw.len);
cl->read_remaining = 0; cl->read_remaining = 0;
} }
else if (cur_op->req.hdr.opcode == OSD_OP_WRITE) else if (cur_op->req.hdr.opcode == OSD_OP_WRITE)
{ {
if (cur_op->req.rw.len > 0) if (cur_op->req.rw.len > 0)
cur_op->buf = memalign(512, cur_op->req.rw.len); cur_op->buf = memalign(MEM_ALIGNMENT, cur_op->req.rw.len);
cl->read_remaining = cur_op->req.rw.len; cl->read_remaining = cur_op->req.rw.len;
} }
if (cl->read_remaining > 0) if (cl->read_remaining > 0)
@ -227,7 +227,7 @@ void osd_t::handle_reply_hdr(osd_client_t *cl)
else if (op->reply.hdr.opcode == OSD_OP_SECONDARY_LIST && else if (op->reply.hdr.opcode == OSD_OP_SECONDARY_LIST &&
op->reply.hdr.retval > 0) op->reply.hdr.retval > 0)
{ {
op->buf = memalign(512, sizeof(obj_ver_id) * op->reply.hdr.retval); op->buf = memalign(MEM_ALIGNMENT, sizeof(obj_ver_id) * op->reply.hdr.retval);
cl->read_state = CL_READ_REPLY_DATA; cl->read_state = CL_READ_REPLY_DATA;
cl->read_reply_id = op->req.hdr.id; cl->read_reply_id = op->req.hdr.id;
cl->read_buf = op->buf; cl->read_buf = op->buf;

View File

@ -181,7 +181,7 @@ uint64_t test_write(int connect_fd, uint64_t inode, uint64_t stripe, uint64_t ve
op.sec_rw.version = version; op.sec_rw.version = version;
op.sec_rw.offset = 0; op.sec_rw.offset = 0;
op.sec_rw.len = 128*1024; op.sec_rw.len = 128*1024;
void *data = memalign(512, op.sec_rw.len); void *data = memalign(MEM_ALIGNMENT, op.sec_rw.len);
for (int i = 0; i < (op.sec_rw.len)/sizeof(uint64_t); i++) for (int i = 0; i < (op.sec_rw.len)/sizeof(uint64_t); i++)
((uint64_t*)data)[i] = pattern; ((uint64_t*)data)[i] = pattern;
write_blocking(connect_fd, op.buf, OSD_PACKET_SIZE); write_blocking(connect_fd, op.buf, OSD_PACKET_SIZE);
@ -216,7 +216,7 @@ void* test_primary_read(int connect_fd, uint64_t inode, uint64_t offset, uint64_
op.rw.inode = inode; op.rw.inode = inode;
op.rw.offset = offset; op.rw.offset = offset;
op.rw.len = len; op.rw.len = len;
void *data = memalign(512, len); void *data = memalign(MEM_ALIGNMENT, len);
write_blocking(connect_fd, op.buf, OSD_PACKET_SIZE); write_blocking(connect_fd, op.buf, OSD_PACKET_SIZE);
int r = read_blocking(connect_fd, reply.buf, OSD_PACKET_SIZE); int r = read_blocking(connect_fd, reply.buf, OSD_PACKET_SIZE);
if (!check_reply(r, op, reply, len)) if (!check_reply(r, op, reply, len))
@ -244,7 +244,7 @@ void test_primary_write(int connect_fd, uint64_t inode, uint64_t offset, uint64_
op.rw.inode = inode; op.rw.inode = inode;
op.rw.offset = offset; op.rw.offset = offset;
op.rw.len = len; op.rw.len = len;
void *data = memalign(512, len); void *data = memalign(MEM_ALIGNMENT, len);
set_pattern(data, len, pattern); set_pattern(data, len, pattern);
write_blocking(connect_fd, op.buf, OSD_PACKET_SIZE); write_blocking(connect_fd, op.buf, OSD_PACKET_SIZE);
write_blocking(connect_fd, data, len); write_blocking(connect_fd, data, len);