diff --git a/src/osd_primary.cpp b/src/osd_primary.cpp index c7da0fa9..9c10bf29 100644 --- a/src/osd_primary.cpp +++ b/src/osd_primary.cpp @@ -51,9 +51,9 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op) finish_op(cur_op, -EINVAL); return false; } + int stripe_count = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size); osd_primary_op_data_t *op_data = (osd_primary_op_data_t*)calloc_or_die( - 1, sizeof(osd_primary_op_data_t) + entry_attr_size + - sizeof(osd_rmw_stripe_t) * (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size) + 1, sizeof(osd_primary_op_data_t) + (entry_attr_size + sizeof(osd_rmw_stripe_t)) * stripe_count ); op_data->pg_num = pg_num; op_data->oid = oid; @@ -62,6 +62,11 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op) op_data->pg_data_size = pg_data_size; cur_op->op_data = op_data; split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes); + // Allocate bitmaps along with stripes to avoid extra allocations and fragmentation + for (int i = 0; i < stripe_count; i++) + { + op_data->stripes[i].bmp_buf = (void*)(op_data->stripes+stripe_count) + entry_attr_size*i; + } pg_it->second.inflight++; return true; } @@ -117,7 +122,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op) if (pg.state == PG_ACTIVE || op_data->scheme == POOL_SCHEME_REPLICATED) { // Fast happy-path - cur_op->buf = alloc_read_buffer(op_data->stripes, op_data->pg_data_size, 0, entry_attr_size); + cur_op->buf = alloc_read_buffer(op_data->stripes, op_data->pg_data_size, 0); submit_primary_subops(SUBMIT_READ, op_data->target_ver, (op_data->scheme == POOL_SCHEME_REPLICATED ? pg.pg_size : op_data->pg_data_size), pg.cur_set.data(), cur_op); op_data->st = 1; @@ -135,7 +140,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op) op_data->pg_size = pg.pg_size; op_data->scheme = pg.scheme; op_data->degraded = 1; - cur_op->buf = alloc_read_buffer(op_data->stripes, pg.pg_size, 0, entry_attr_size); + cur_op->buf = alloc_read_buffer(op_data->stripes, pg.pg_size, 0); submit_primary_subops(SUBMIT_READ, op_data->target_ver, pg.pg_size, cur_set, cur_op); op_data->st = 1; } diff --git a/src/osd_rmw.cpp b/src/osd_rmw.cpp index 0000d83f..27e297d7 100644 --- a/src/osd_rmw.cpp +++ b/src/osd_rmw.cpp @@ -309,7 +309,7 @@ int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int pg return 0; } -void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size, uint32_t bitmap_size) +void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size) { // Calculate buffer size uint64_t buf_size = add_size; @@ -321,7 +321,7 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad } } // Allocate buffer - void *buf = memalign_or_die(MEM_ALIGNMENT, buf_size + bitmap_size*read_pg_size); + void *buf = memalign_or_die(MEM_ALIGNMENT, buf_size); uint64_t buf_pos = add_size; for (int role = 0; role < read_pg_size; role++) { @@ -331,16 +331,6 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad buf_pos += stripes[role].read_end - stripes[role].read_start; } } - // Bitmaps are allocated in the end so data buffers remain aligned - // FIXME: Don't allocate bitmaps here because it probably increases memory fragmentation - if (bitmap_size > 0) - { - for (int role = 0; role < read_pg_size; role++) - { - stripes[role].bmp_buf = buf + buf_pos; - buf_pos += bitmap_size; - } - } return buf; } @@ -446,7 +436,7 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_ } } // Allocate read buffers - void *rmw_buf = alloc_read_buffer(stripes, pg_size, (write_parity ? pg_size-pg_minsize : 0) * (end - start), bitmap_size); + void *rmw_buf = alloc_read_buffer(stripes, pg_size, (write_parity ? pg_size-pg_minsize : 0) * (end - start)); // Position write buffers uint64_t buf_pos = 0, in_pos = 0; for (int role = 0; role < pg_size; role++) diff --git a/src/osd_rmw.h b/src/osd_rmw.h index 931c5cb1..0e175d4f 100644 --- a/src/osd_rmw.h +++ b/src/osd_rmw.h @@ -35,7 +35,7 @@ void reconstruct_stripes_xor(osd_rmw_stripe_t *stripes, int pg_size, uint32_t bi int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int pg_minsize, int pg_size); -void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size, uint32_t bitmap_size); +void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size); void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_set, uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize, uint64_t *write_osd_set, diff --git a/src/osd_rmw_test.cpp b/src/osd_rmw_test.cpp index 64b278c7..02b158cc 100644 --- a/src/osd_rmw_test.cpp +++ b/src/osd_rmw_test.cpp @@ -127,10 +127,13 @@ void test1() void test4() { const uint32_t bmp = 4; + unsigned bitmaps[3] = { 0 }; osd_num_t osd_set[3] = { 1, 0, 3 }; osd_rmw_stripe_t stripes[3] = { 0 }; // Test 4.1 split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes); + for (int i = 0; i < 3; i++) + stripes[i].bmp_buf = bitmaps+i; void* write_buf = malloc(8192); void* rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024, bmp); assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024); @@ -652,7 +655,7 @@ void test13() assert(stripes[1].read_start == 0 && stripes[1].read_end == 128*1024); assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024); assert(stripes[3].read_start == 0 && stripes[3].read_end == 128*1024); - void *read_buf = alloc_read_buffer(stripes, 4, 0, 0); + void *read_buf = alloc_read_buffer(stripes, 4, 0); assert(read_buf); assert(stripes[0].read_buf == read_buf); assert(stripes[1].read_buf == read_buf+128*1024); @@ -683,7 +686,7 @@ void test13() assert(stripes[1].read_start == 0 && stripes[1].read_end == 0); assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024); assert(stripes[3].read_start == 0 && stripes[3].read_end == 128*1024); - read_buf = alloc_read_buffer(stripes, 4, 0, 0); + read_buf = alloc_read_buffer(stripes, 4, 0); assert(read_buf); assert(stripes[0].read_buf == read_buf); assert(stripes[1].read_buf == NULL); @@ -723,6 +726,7 @@ void test14() osd_num_t osd_set[3] = { 1, 2, 0 }; osd_num_t write_osd_set[3] = { 1, 2, 3 }; osd_rmw_stripe_t stripes[3] = { 0 }; + unsigned bitmaps[3] = { 0 }; // Test 13.0 void *write_buf = malloc_or_die(8192); split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes); @@ -731,6 +735,8 @@ void test14() assert(stripes[2].req_start == 0 && stripes[2].req_end == 0); // Test 13.1 void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024, bmp); + for (int i = 0; i < 3; i++) + stripes[i].bmp_buf = bitmaps+i; assert(rmw_buf); assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024-4096); assert(stripes[1].read_start == 4096 && stripes[1].read_end == 128*1024); @@ -777,7 +783,9 @@ void test14() assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024); assert(stripes[1].read_start == 0 && stripes[1].read_end == 128*1024); assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024); - void *read_buf = alloc_read_buffer(stripes, 3, 0, bmp); + void *read_buf = alloc_read_buffer(stripes, 3, 0); + for (int i = 0; i < 3; i++) + stripes[i].bmp_buf = bitmaps+i; assert(read_buf); assert(stripes[0].read_buf == read_buf); assert(stripes[1].read_buf == read_buf+128*1024);