diff --git a/Makefile b/Makefile index aa2ca0b9..e4c493ef 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,9 @@ osd: ./libblockstore.so osd_main.cpp osd.h osd_ops.h $(OSD_OBJS) stub_osd: stub_osd.o rw_blocking.o g++ $(CXXFLAGS) -o $@ stub_osd.o rw_blocking.o -ltcmalloc_minimal +osd_rmw_test: osd_rmw_test.o + g++ $(CXXFLAGS) -o $@ osd_rmw_test.o + STUB_URING_OSD_OBJS := stub_uring_osd.o epoll_manager.o messenger.o msgr_send.o msgr_receive.o ringloop.o timerfd_manager.o json11.o stub_uring_osd: $(STUB_URING_OSD_OBJS) g++ $(CXXFLAGS) -o $@ -ltcmalloc_minimal $(STUB_URING_OSD_OBJS) -luring diff --git a/osd_primary.cpp b/osd_primary.cpp index 9e0846f3..381eed1e 100644 --- a/osd_primary.cpp +++ b/osd_primary.cpp @@ -239,6 +239,12 @@ resume_1: { cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set, pg.pg_size, pg.pg_minsize, pg.pg_cursize, pg.cur_set.data(), bs_block_size); + if (!cur_op->rmw_buf) + { + // Refuse partial overwrite of an incomplete object + cur_op->reply.hdr.retval = -EINVAL; + goto continue_others; + } } // Read required blocks submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, pg.pg_size, op_data->prev_set, cur_op); @@ -361,10 +367,12 @@ resume_9: remove_object_from_state(op_data->oid, op_data->object_state, pg); pg.clean_count++; } + cur_op->reply.hdr.retval = cur_op->req.rw.len; +continue_others: // Remove version override pg.ver_override.erase(op_data->oid); object_id oid = op_data->oid; - finish_op(cur_op, cur_op->req.rw.len); + finish_op(cur_op, cur_op->reply.hdr.retval); // Continue other write operations to the same object auto next_it = pg.write_queue.find(oid); auto this_it = next_it; diff --git a/osd_rmw.cpp b/osd_rmw.cpp index bbf66604..886de5fd 100644 --- a/osd_rmw.cpp +++ b/osd_rmw.cpp @@ -208,7 +208,7 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_ { pg_cursize = 0; // Object is degraded/misplaced and will be moved to - for (int role = 0; role < pg_size; role++) + for (int role = 0; role < pg_minsize; role++) { if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0) { @@ -228,6 +228,20 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_ pg_cursize++; } } + for (int role = pg_minsize; role < pg_size; role++) + { + if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0) + { + for (int r2 = 0; r2 < pg_minsize; r2++) + { + cover_read(0, chunk_size, stripes[r2]); + } + } + if (read_osd_set[role] != 0) + { + pg_cursize++; + } + } } if (pg_cursize < pg_size) { @@ -251,8 +265,8 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_ } if (found < pg_minsize) { - // FIXME Object is incomplete - refuse partial overwrite - assert(0); + // Object is incomplete - refuse partial overwrite + return NULL; } } } diff --git a/osd_rmw_test.cpp b/osd_rmw_test.cpp index b5c0f894..84f9dc19 100644 --- a/osd_rmw_test.cpp +++ b/osd_rmw_test.cpp @@ -13,6 +13,8 @@ void test6(); void test7(); void test8(); void test9(); +void test10(); +void test11(); /*** @@ -91,6 +93,28 @@ Cases: } + check write0 buffer +10. full overwrite/recovery case: + calc_rmw(offset=0, len=256K, read_osd_set=[1,0,0], write_osd_set=[1,2,3]) + = { + read: [ [ 0, 0 ], [ 0, 0 ], [ 0, 0 ] ], + write: [ [ 0, 128K ], [ 0, 128K ], [ 0, 128K ] ], + input buffer: [ write0, write1 ], + rmw buffer: [ write2 ], + } + then, after calc_rmw_parity_xor(): all the same + + check write2 buffer + +10. partial recovery case: + calc_rmw(offset=128K, len=128K, read_osd_set=[1,0,0], write_osd_set=[1,2,3]) + = { + read: [ [ 0, 128K ], [ 0, 0 ], [ 0, 0 ] ], + write: [ [ 0, 0 ], [ 0, 128K ], [ 0, 128K ] ], + input buffer: [ write1 ], + rmw buffer: [ write2, read0 ], + } + then, after calc_rmw_parity_xor(): all the same + + check write2 buffer + ***/ int main(int narg, char *args[]) @@ -109,6 +133,10 @@ int main(int narg, char *args[]) test8(); // Test 9 test9(); + // Test 10 + test10(); + // Test 11 + test11(); // End printf("all ok\n"); return 0; @@ -361,3 +389,85 @@ void test9() check_pattern(stripes[0].write_buf, 128*1024, PATTERN1); free(rmw_buf); } + +void test10() +{ + osd_num_t osd_set[3] = { 1, 0, 0 }; + osd_num_t write_osd_set[3] = { 1, 2, 3 }; + osd_rmw_stripe_t stripes[3] = { 0 }; + // Test 10.0 + split_stripes(2, 128*1024, 0, 256*1024, stripes); + assert(stripes[0].req_start == 0 && stripes[0].req_end == 128*1024); + assert(stripes[1].req_start == 0 && stripes[1].req_end == 128*1024); + assert(stripes[2].req_start == 0 && stripes[2].req_end == 0); + // Test 10.1 + void *write_buf = malloc(256*1024); + void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024); + assert(rmw_buf); + assert(stripes[0].read_start == 0 && stripes[0].read_end == 0); + assert(stripes[1].read_start == 0 && stripes[1].read_end == 0); + assert(stripes[2].read_start == 0 && stripes[2].read_end == 0); + assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024); + assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024); + assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); + assert(stripes[0].read_buf == NULL); + assert(stripes[1].read_buf == NULL); + assert(stripes[2].read_buf == NULL); + assert(stripes[0].write_buf == write_buf); + assert(stripes[1].write_buf == write_buf+128*1024); + assert(stripes[2].write_buf == rmw_buf); + // Test 10.2 + set_pattern(stripes[0].write_buf, 128*1024, PATTERN1); + set_pattern(stripes[1].write_buf, 128*1024, PATTERN2); + calc_rmw_parity_xor(stripes, 3, osd_set, write_osd_set, 128*1024); + assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024); + assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024); + assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); + assert(stripes[0].write_buf == write_buf); + assert(stripes[1].write_buf == write_buf+128*1024); + assert(stripes[2].write_buf == rmw_buf); + check_pattern(stripes[2].write_buf, 128*1024, PATTERN1^PATTERN2); + free(rmw_buf); + free(write_buf); +} + +void test11() +{ + osd_num_t osd_set[3] = { 1, 0, 0 }; + osd_num_t write_osd_set[3] = { 1, 2, 3 }; + osd_rmw_stripe_t stripes[3] = { 0 }; + // Test 11.0 + split_stripes(2, 128*1024, 128*1024, 256*1024, stripes); + assert(stripes[0].req_start == 0 && stripes[0].req_end == 0); + assert(stripes[1].req_start == 0 && stripes[1].req_end == 128*1024); + assert(stripes[2].req_start == 0 && stripes[2].req_end == 0); + // Test 11.1 + void *write_buf = malloc(256*1024); + void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024); + assert(rmw_buf); + assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024); + assert(stripes[1].read_start == 0 && stripes[1].read_end == 0); + assert(stripes[2].read_start == 0 && stripes[2].read_end == 0); + assert(stripes[0].write_start == 0 && stripes[0].write_end == 0); + assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024); + assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); + assert(stripes[0].read_buf == rmw_buf+128*1024); + assert(stripes[1].read_buf == NULL); + assert(stripes[2].read_buf == NULL); + assert(stripes[0].write_buf == NULL); + assert(stripes[1].write_buf == write_buf); + assert(stripes[2].write_buf == rmw_buf); + // Test 11.2 + set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); + set_pattern(stripes[1].write_buf, 128*1024, PATTERN2); + calc_rmw_parity_xor(stripes, 3, osd_set, write_osd_set, 128*1024); + assert(stripes[0].write_start == 0 && stripes[0].write_end == 0); + assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024); + assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); + assert(stripes[0].write_buf == NULL); + assert(stripes[1].write_buf == write_buf); + assert(stripes[2].write_buf == rmw_buf); + check_pattern(stripes[2].write_buf, 128*1024, PATTERN1^PATTERN2); + free(rmw_buf); + free(write_buf); +}