Allow to overwrite incomplete objects or parts of objects to recover them

Vitaliy Filippov 2020-10-24 02:14:41 +03:00
parent 660c2412fb
commit e8ac08be14
4 changed files with 139 additions and 4 deletions

View File

@ -23,6 +23,9 @@ osd: ./libblockstore.so osd_main.cpp osd.h osd_ops.h $(OSD_OBJS)
stub_osd: stub_osd.o rw_blocking.o
g++ $(CXXFLAGS) -o $@ stub_osd.o rw_blocking.o -ltcmalloc_minimal
osd_rmw_test: osd_rmw_test.o
g++ $(CXXFLAGS) -o $@ osd_rmw_test.o
STUB_URING_OSD_OBJS := stub_uring_osd.o epoll_manager.o messenger.o msgr_send.o msgr_receive.o ringloop.o timerfd_manager.o json11.o
stub_uring_osd: $(STUB_URING_OSD_OBJS)
g++ $(CXXFLAGS) -o $@ -ltcmalloc_minimal $(STUB_URING_OSD_OBJS) -luring

View File

@ -239,6 +239,12 @@ resume_1:
{
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
pg.pg_size, pg.pg_minsize, pg.pg_cursize, pg.cur_set.data(), bs_block_size);
if (!cur_op->rmw_buf)
{
// Refuse partial overwrite of an incomplete object
cur_op->reply.hdr.retval = -EINVAL;
goto continue_others;
}
}
// Read required blocks
submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, pg.pg_size, op_data->prev_set, cur_op);
@ -361,10 +367,12 @@ resume_9:
remove_object_from_state(op_data->oid, op_data->object_state, pg);
pg.clean_count++;
}
cur_op->reply.hdr.retval = cur_op->req.rw.len;
continue_others:
// Remove version override
pg.ver_override.erase(op_data->oid);
object_id oid = op_data->oid;
finish_op(cur_op, cur_op->req.rw.len);
finish_op(cur_op, cur_op->reply.hdr.retval);
// Continue other write operations to the same object
auto next_it = pg.write_queue.find(oid);
auto this_it = next_it;

View File

@ -208,7 +208,7 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
{
pg_cursize = 0;
// Object is degraded/misplaced and will be moved to <write_osd_set>
for (int role = 0; role < pg_size; role++)
for (int role = 0; role < pg_minsize; role++)
{
if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0)
{
@ -228,6 +228,20 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
pg_cursize++;
}
}
for (int role = pg_minsize; role < pg_size; role++)
{
if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0)
{
for (int r2 = 0; r2 < pg_minsize; r2++)
{
cover_read(0, chunk_size, stripes[r2]);
}
}
if (read_osd_set[role] != 0)
{
pg_cursize++;
}
}
}
if (pg_cursize < pg_size)
{
@ -251,8 +265,8 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
}
if (found < pg_minsize)
{
// FIXME Object is incomplete - refuse partial overwrite
assert(0);
// Object is incomplete - refuse partial overwrite
return NULL;
}
}
}

View File

@ -13,6 +13,8 @@ void test6();
void test7();
void test8();
void test9();
void test10();
void test11();
/***
@ -91,6 +93,28 @@ Cases:
}
+ check write0 buffer
10. full overwrite/recovery case:
calc_rmw(offset=0, len=256K, read_osd_set=[1,0,0], write_osd_set=[1,2,3])
= {
read: [ [ 0, 0 ], [ 0, 0 ], [ 0, 0 ] ],
write: [ [ 0, 128K ], [ 0, 128K ], [ 0, 128K ] ],
input buffer: [ write0, write1 ],
rmw buffer: [ write2 ],
}
then, after calc_rmw_parity_xor(): all the same
+ check write2 buffer
10. partial recovery case:
calc_rmw(offset=128K, len=128K, read_osd_set=[1,0,0], write_osd_set=[1,2,3])
= {
read: [ [ 0, 128K ], [ 0, 0 ], [ 0, 0 ] ],
write: [ [ 0, 0 ], [ 0, 128K ], [ 0, 128K ] ],
input buffer: [ write1 ],
rmw buffer: [ write2, read0 ],
}
then, after calc_rmw_parity_xor(): all the same
+ check write2 buffer
***/
int main(int narg, char *args[])
@ -109,6 +133,10 @@ int main(int narg, char *args[])
test8();
// Test 9
test9();
// Test 10
test10();
// Test 11
test11();
// End
printf("all ok\n");
return 0;
@ -361,3 +389,85 @@ void test9()
check_pattern(stripes[0].write_buf, 128*1024, PATTERN1);
free(rmw_buf);
}
void test10()
{
osd_num_t osd_set[3] = { 1, 0, 0 };
osd_num_t write_osd_set[3] = { 1, 2, 3 };
osd_rmw_stripe_t stripes[3] = { 0 };
// Test 10.0
split_stripes(2, 128*1024, 0, 256*1024, stripes);
assert(stripes[0].req_start == 0 && stripes[0].req_end == 128*1024);
assert(stripes[1].req_start == 0 && stripes[1].req_end == 128*1024);
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
// Test 10.1
void *write_buf = malloc(256*1024);
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024);
assert(rmw_buf);
assert(stripes[0].read_start == 0 && stripes[0].read_end == 0);
assert(stripes[1].read_start == 0 && stripes[1].read_end == 0);
assert(stripes[2].read_start == 0 && stripes[2].read_end == 0);
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
assert(stripes[0].read_buf == NULL);
assert(stripes[1].read_buf == NULL);
assert(stripes[2].read_buf == NULL);
assert(stripes[0].write_buf == write_buf);
assert(stripes[1].write_buf == write_buf+128*1024);
assert(stripes[2].write_buf == rmw_buf);
// Test 10.2
set_pattern(stripes[0].write_buf, 128*1024, PATTERN1);
set_pattern(stripes[1].write_buf, 128*1024, PATTERN2);
calc_rmw_parity_xor(stripes, 3, osd_set, write_osd_set, 128*1024);
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
assert(stripes[0].write_buf == write_buf);
assert(stripes[1].write_buf == write_buf+128*1024);
assert(stripes[2].write_buf == rmw_buf);
check_pattern(stripes[2].write_buf, 128*1024, PATTERN1^PATTERN2);
free(rmw_buf);
free(write_buf);
}
void test11()
{
osd_num_t osd_set[3] = { 1, 0, 0 };
osd_num_t write_osd_set[3] = { 1, 2, 3 };
osd_rmw_stripe_t stripes[3] = { 0 };
// Test 11.0
split_stripes(2, 128*1024, 128*1024, 256*1024, stripes);
assert(stripes[0].req_start == 0 && stripes[0].req_end == 0);
assert(stripes[1].req_start == 0 && stripes[1].req_end == 128*1024);
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
// Test 11.1
void *write_buf = malloc(256*1024);
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024);
assert(rmw_buf);
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
assert(stripes[1].read_start == 0 && stripes[1].read_end == 0);
assert(stripes[2].read_start == 0 && stripes[2].read_end == 0);
assert(stripes[0].write_start == 0 && stripes[0].write_end == 0);
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
assert(stripes[0].read_buf == rmw_buf+128*1024);
assert(stripes[1].read_buf == NULL);
assert(stripes[2].read_buf == NULL);
assert(stripes[0].write_buf == NULL);
assert(stripes[1].write_buf == write_buf);
assert(stripes[2].write_buf == rmw_buf);
// Test 11.2
set_pattern(stripes[0].read_buf, 128*1024, PATTERN1);
set_pattern(stripes[1].write_buf, 128*1024, PATTERN2);
calc_rmw_parity_xor(stripes, 3, osd_set, write_osd_set, 128*1024);
assert(stripes[0].write_start == 0 && stripes[0].write_end == 0);
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
assert(stripes[0].write_buf == NULL);
assert(stripes[1].write_buf == write_buf);
assert(stripes[2].write_buf == rmw_buf);
check_pattern(stripes[2].write_buf, 128*1024, PATTERN1^PATTERN2);
free(rmw_buf);
free(write_buf);
}