Allocate bitmaps along with stripes to avoid memory fragmentation

rdma-zerocopy
Vitaliy Filippov 2021-01-12 01:09:59 +03:00
parent 004f265393
commit 6bf88883ac
4 changed files with 24 additions and 21 deletions

View File

@ -51,9 +51,9 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
finish_op(cur_op, -EINVAL);
return false;
}
int stripe_count = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size);
osd_primary_op_data_t *op_data = (osd_primary_op_data_t*)calloc_or_die(
1, sizeof(osd_primary_op_data_t) + entry_attr_size +
sizeof(osd_rmw_stripe_t) * (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size)
1, sizeof(osd_primary_op_data_t) + (entry_attr_size + sizeof(osd_rmw_stripe_t)) * stripe_count
);
op_data->pg_num = pg_num;
op_data->oid = oid;
@ -62,6 +62,11 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
op_data->pg_data_size = pg_data_size;
cur_op->op_data = op_data;
split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes);
// Allocate bitmaps along with stripes to avoid extra allocations and fragmentation
for (int i = 0; i < stripe_count; i++)
{
op_data->stripes[i].bmp_buf = (void*)(op_data->stripes+stripe_count) + entry_attr_size*i;
}
pg_it->second.inflight++;
return true;
}
@ -117,7 +122,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
if (pg.state == PG_ACTIVE || op_data->scheme == POOL_SCHEME_REPLICATED)
{
// Fast happy-path
cur_op->buf = alloc_read_buffer(op_data->stripes, op_data->pg_data_size, 0, entry_attr_size);
cur_op->buf = alloc_read_buffer(op_data->stripes, op_data->pg_data_size, 0);
submit_primary_subops(SUBMIT_READ, op_data->target_ver,
(op_data->scheme == POOL_SCHEME_REPLICATED ? pg.pg_size : op_data->pg_data_size), pg.cur_set.data(), cur_op);
op_data->st = 1;
@ -135,7 +140,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
op_data->pg_size = pg.pg_size;
op_data->scheme = pg.scheme;
op_data->degraded = 1;
cur_op->buf = alloc_read_buffer(op_data->stripes, pg.pg_size, 0, entry_attr_size);
cur_op->buf = alloc_read_buffer(op_data->stripes, pg.pg_size, 0);
submit_primary_subops(SUBMIT_READ, op_data->target_ver, pg.pg_size, cur_set, cur_op);
op_data->st = 1;
}

View File

@ -309,7 +309,7 @@ int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int pg
return 0;
}
void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size, uint32_t bitmap_size)
void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size)
{
// Calculate buffer size
uint64_t buf_size = add_size;
@ -321,7 +321,7 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad
}
}
// Allocate buffer
void *buf = memalign_or_die(MEM_ALIGNMENT, buf_size + bitmap_size*read_pg_size);
void *buf = memalign_or_die(MEM_ALIGNMENT, buf_size);
uint64_t buf_pos = add_size;
for (int role = 0; role < read_pg_size; role++)
{
@ -331,16 +331,6 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad
buf_pos += stripes[role].read_end - stripes[role].read_start;
}
}
// Bitmaps are allocated in the end so data buffers remain aligned
// FIXME: Don't allocate bitmaps here because it probably increases memory fragmentation
if (bitmap_size > 0)
{
for (int role = 0; role < read_pg_size; role++)
{
stripes[role].bmp_buf = buf + buf_pos;
buf_pos += bitmap_size;
}
}
return buf;
}
@ -446,7 +436,7 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
}
}
// Allocate read buffers
void *rmw_buf = alloc_read_buffer(stripes, pg_size, (write_parity ? pg_size-pg_minsize : 0) * (end - start), bitmap_size);
void *rmw_buf = alloc_read_buffer(stripes, pg_size, (write_parity ? pg_size-pg_minsize : 0) * (end - start));
// Position write buffers
uint64_t buf_pos = 0, in_pos = 0;
for (int role = 0; role < pg_size; role++)

View File

@ -35,7 +35,7 @@ void reconstruct_stripes_xor(osd_rmw_stripe_t *stripes, int pg_size, uint32_t bi
int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int pg_minsize, int pg_size);
void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size, uint32_t bitmap_size);
void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size);
void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_set,
uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize, uint64_t *write_osd_set,

View File

@ -127,10 +127,13 @@ void test1()
void test4()
{
const uint32_t bmp = 4;
unsigned bitmaps[3] = { 0 };
osd_num_t osd_set[3] = { 1, 0, 3 };
osd_rmw_stripe_t stripes[3] = { 0 };
// Test 4.1
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
for (int i = 0; i < 3; i++)
stripes[i].bmp_buf = bitmaps+i;
void* write_buf = malloc(8192);
void* rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024, bmp);
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
@ -652,7 +655,7 @@ void test13()
assert(stripes[1].read_start == 0 && stripes[1].read_end == 128*1024);
assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024);
assert(stripes[3].read_start == 0 && stripes[3].read_end == 128*1024);
void *read_buf = alloc_read_buffer(stripes, 4, 0, 0);
void *read_buf = alloc_read_buffer(stripes, 4, 0);
assert(read_buf);
assert(stripes[0].read_buf == read_buf);
assert(stripes[1].read_buf == read_buf+128*1024);
@ -683,7 +686,7 @@ void test13()
assert(stripes[1].read_start == 0 && stripes[1].read_end == 0);
assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024);
assert(stripes[3].read_start == 0 && stripes[3].read_end == 128*1024);
read_buf = alloc_read_buffer(stripes, 4, 0, 0);
read_buf = alloc_read_buffer(stripes, 4, 0);
assert(read_buf);
assert(stripes[0].read_buf == read_buf);
assert(stripes[1].read_buf == NULL);
@ -723,6 +726,7 @@ void test14()
osd_num_t osd_set[3] = { 1, 2, 0 };
osd_num_t write_osd_set[3] = { 1, 2, 3 };
osd_rmw_stripe_t stripes[3] = { 0 };
unsigned bitmaps[3] = { 0 };
// Test 13.0
void *write_buf = malloc_or_die(8192);
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
@ -731,6 +735,8 @@ void test14()
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
// Test 13.1
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024, bmp);
for (int i = 0; i < 3; i++)
stripes[i].bmp_buf = bitmaps+i;
assert(rmw_buf);
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024-4096);
assert(stripes[1].read_start == 4096 && stripes[1].read_end == 128*1024);
@ -777,7 +783,9 @@ void test14()
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
assert(stripes[1].read_start == 0 && stripes[1].read_end == 128*1024);
assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024);
void *read_buf = alloc_read_buffer(stripes, 3, 0, bmp);
void *read_buf = alloc_read_buffer(stripes, 3, 0);
for (int i = 0; i < 3; i++)
stripes[i].bmp_buf = bitmaps+i;
assert(read_buf);
assert(stripes[0].read_buf == read_buf);
assert(stripes[1].read_buf == read_buf+128*1024);