diff --git a/osd_flush.cpp b/osd_flush.cpp index 657ff5eb..9b16f0bb 100644 --- a/osd_flush.cpp +++ b/osd_flush.cpp @@ -204,6 +204,22 @@ bool osd_t::pick_next_recovery(osd_recovery_op_t &op) } } } + for (auto pg_it = pgs.begin(); pg_it != pgs.end(); pg_it++) + { + if ((pg_it->second.state & (PG_ACTIVE | PG_HAS_MISPLACED)) == (PG_ACTIVE | PG_HAS_MISPLACED)) + { + for (auto obj_it = pg_it->second.misplaced_objects.begin(); obj_it != pg_it->second.misplaced_objects.end(); obj_it++) + { + if (recovery_ops.find(obj_it->first) == recovery_ops.end()) + { + op.degraded = false; + op.pg_num = pg_it->first; + op.oid = obj_it->first; + return true; + } + } + } + } return false; } @@ -266,15 +282,7 @@ void osd_t::submit_recovery_op(osd_recovery_op_t *op) pg->print_state(); } } - if (st->state == OBJ_DEGRADED) - { - pg->clean_count++; - } - else - { - assert(st->state == (OBJ_DEGRADED|OBJ_MISPLACED)); - pg->misplaced_objects[op->oid] = change_osd_set(st, pg); - } + pg->clean_count++; st->object_count--; if (!st->object_count) { @@ -305,58 +313,3 @@ bool osd_t::continue_recovery() } return true; } - -// This is likely not needed at all, because we'll always recover objects to the clean state -pg_osd_set_state_t* osd_t::change_osd_set(pg_osd_set_state_t *st, pg_t *pg) -{ - pg_osd_set_state_t *new_st; - pg_osd_set_t new_set(st->osd_set); - for (uint64_t role = 0; role < pg->pg_size; role++) - { - if (pg->cur_set[role] != 0) - { - // Maintain order (outdated -> role -> osd_num) - int added = 0; - for (int j = 0; j < new_set.size(); j++) - { - if (new_set[j].role == role && new_set[j].osd_num == pg->cur_set[role]) - { - if (new_set[j].outdated) - { - if (!added) - new_set[j].outdated = false; - else - { - new_set.erase(new_set.begin()+j); - j--; - } - } - break; - } - else if (!added && (new_set[j].outdated || new_set[j].role > role || - new_set[j].role == role && new_set[j].osd_num > pg->cur_set[role])) - { - new_set.insert(new_set.begin()+j, (pg_obj_loc_t){ - .role = role, - .osd_num = pg->cur_set[role], - .outdated = false, - }); - added = 1; - } - } - } - } - auto st_it = pg->state_dict.find(new_set); - if (st_it != pg->state_dict.end()) - { - st_it = pg->state_dict.emplace(new_set, (pg_osd_set_state_t){ - .read_target = pg->cur_set, - .osd_set = new_set, - .state = OBJ_MISPLACED, - .object_count = 0, - }).first; - } - new_st = &st_it->second; - new_st->object_count++; - return new_st; -} diff --git a/osd_primary.cpp b/osd_primary.cpp index 138fcff2..090acd89 100644 --- a/osd_primary.cpp +++ b/osd_primary.cpp @@ -31,8 +31,9 @@ struct osd_primary_op_data_t int degraded = 0, pg_size, pg_minsize; osd_rmw_stripe_t *stripes; osd_op_t *subops = NULL; - void *recovery_buf = NULL; uint64_t *prev_set = NULL; + uint64_t object_state = 0; + // for sync. oops, requires freeing std::vector *unstable_write_osds = NULL; pg_num_t *dirty_pgs = NULL; @@ -117,27 +118,32 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op) return true; } -uint64_t* get_object_osd_set(pg_t &pg, object_id &oid, uint64_t *def) +uint64_t* get_object_osd_set(pg_t &pg, object_id &oid, uint64_t *def, uint64_t &object_state) { if (!(pg.state & (PG_HAS_INCOMPLETE | PG_HAS_DEGRADED | PG_HAS_MISPLACED))) { + object_state = 0; return def; } auto st_it = pg.incomplete_objects.find(oid); if (st_it != pg.incomplete_objects.end()) { + object_state = st_it->second->state; return st_it->second->read_target.data(); } st_it = pg.degraded_objects.find(oid); if (st_it != pg.degraded_objects.end()) { + object_state = st_it->second->state; return st_it->second->read_target.data(); } st_it = pg.misplaced_objects.find(oid); if (st_it != pg.misplaced_objects.end()) { + object_state = st_it->second->state; return st_it->second->read_target.data(); } + object_state = 0; return def; } @@ -171,7 +177,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op) else { // PG may be degraded or have misplaced objects - uint64_t* cur_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data()); + uint64_t* cur_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), op_data->object_state); if (extend_missing_stripes(op_data->stripes, cur_set, pg.pg_minsize, pg.pg_size) < 0) { finish_op(cur_op, -EIO); @@ -404,7 +410,6 @@ void osd_t::continue_primary_write(osd_op_t *cur_op) return; } osd_primary_op_data_t *op_data = cur_op->op_data; - // FIXME: Handle operation cancel auto & pg = pgs[op_data->pg_num]; if (op_data->st == 1) goto resume_1; else if (op_data->st == 2) goto resume_2; @@ -413,8 +418,6 @@ void osd_t::continue_primary_write(osd_op_t *cur_op) else if (op_data->st == 5) goto resume_5; else if (op_data->st == 6) goto resume_6; else if (op_data->st == 7) goto resume_7; - else if (op_data->st == 8) goto resume_8; - else if (op_data->st == 9) goto resume_9; assert(op_data->st == 0); // Check if actions are pending for this object { @@ -441,91 +444,12 @@ void osd_t::continue_primary_write(osd_op_t *cur_op) } pg.write_queue.emplace(op_data->oid, cur_op); } - if (pg.state & PG_HAS_DEGRADED) - { - op_data->prev_set = NULL; - { - auto st_it = pg.degraded_objects.find(op_data->oid); - if (st_it != pg.degraded_objects.end()) - op_data->prev_set = st_it->second->read_target.data(); - } - if (op_data->prev_set != NULL) - { - // Read the whole object - op_data->recovery_buf = memalign(512, pg.pg_size * bs_block_size); - for (int i = 0; i < pg.pg_size; i++) - { - op_data->stripes[i].read_buf = op_data->recovery_buf + i*bs_block_size; - op_data->stripes[i].read_start = 0; - op_data->stripes[i].read_end = bs_block_size; - op_data->stripes[i].missing = op_data->prev_set[i] == 0; - op_data->stripes[i].write_end = 0; - } - op_data->degraded = 1; - submit_primary_subops(SUBMIT_READ, pg.pg_size, op_data->prev_set, cur_op); -resume_8: - op_data->st = 8; - return; -resume_9: - if (op_data->errors > 0) - { - pg_cancel_write_queue(pg, op_data->oid, op_data->epipe > 0 ? -EPIPE : -EIO); - return; - } - reconstruct_stripes(op_data->stripes, pg.pg_size); - if (cur_op->req.rw.len > 0) - { - memcpy( - op_data->recovery_buf + cur_op->req.rw.offset - op_data->oid.stripe, - cur_op->buf, cur_op->req.rw.len - ); - } - free(cur_op->buf); - cur_op->buf = op_data->recovery_buf; - op_data->recovery_buf = NULL; - if (cur_op->req.rw.len > 0) - { - // Write modified parts - uint32_t start = 0, end = 0; - for (int role = 0; role < pg.pg_minsize; role++) - { - if (op_data->stripes[role].req_end != 0) - { - start = !end || op_data->stripes[role].req_start < start ? op_data->stripes[role].req_start : start; - end = std::max(op_data->stripes[role].req_end, end); - op_data->stripes[role].write_start = op_data->stripes[role].req_start; - op_data->stripes[role].write_end = op_data->stripes[role].req_end; - op_data->stripes[role].write_buf = cur_op->buf + role*bs_block_size + op_data->stripes[role].write_start; - } - } - for (int role = pg.pg_minsize; role < pg.pg_size; role++) - { - op_data->stripes[role].write_start = start; - op_data->stripes[role].write_end = end; - op_data->stripes[role].write_buf = cur_op->buf + role*bs_block_size + op_data->stripes[role].write_start; - } - } - // Also write recovered parts - uint64_t *cur_set = pg.cur_set.data(); - for (int role = 0; role < pg.pg_size; role++) - { - if (cur_set[role] != op_data->prev_set[role]) - { - op_data->stripes[role].write_start = 0; - op_data->stripes[role].write_end = bs_block_size; - op_data->stripes[role].write_buf = cur_op->buf + role*bs_block_size; - } - } - pg.ver_override[op_data->oid] = op_data->fact_ver; - // Send writes - submit_primary_subops(SUBMIT_WRITE, pg.pg_size, cur_set, cur_op); - op_data->st = 4; - return; - } - } resume_1: // Determine blocks to read and write - cur_op->rmw_buf = calc_rmw_reads(cur_op->buf, op_data->stripes, pg.cur_set.data(), pg.pg_size, pg.pg_minsize, pg.pg_cursize); + // Missing chunks are allowed to be overwritten even in incomplete objects + op_data->prev_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), op_data->object_state); + cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set, + pg.pg_size, pg.pg_minsize, pg.pg_cursize, pg.cur_set.data(), bs_block_size); // Read required blocks submit_primary_subops(SUBMIT_RMW_READ, pg.pg_size, pg.cur_set.data(), cur_op); resume_2: @@ -540,7 +464,7 @@ resume_3: // Save version override for parallel reads pg.ver_override[op_data->oid] = op_data->fact_ver; // Recover missing stripes, calculate parity - calc_rmw_parity(op_data->stripes, pg.pg_size); + calc_rmw_parity(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size); // Send writes submit_primary_subops(SUBMIT_WRITE, pg.pg_size, pg.cur_set.data(), cur_op); resume_4: @@ -693,7 +617,6 @@ resume_1: syncs_in_progress.push_back(cur_op); } resume_2: - // FIXME: Handle operation cancel if (unstable_writes.size() == 0) { // Nothing to sync diff --git a/osd_rmw.cpp b/osd_rmw.cpp index 1df19da3..2c110fc0 100644 --- a/osd_rmw.cpp +++ b/osd_rmw.cpp @@ -1,4 +1,5 @@ #include +#include #include #include "xor.h" #include "osd_rmw.h" @@ -79,18 +80,21 @@ void reconstruct_stripe(osd_rmw_stripe_t *stripes, int pg_size, int role) } else if (prev >= 0) { + assert(stripes[role].read_start >= stripes[prev].read_start && + stripes[role].read_start >= stripes[other].read_start); memxor( - stripes[prev].read_buf + (stripes[prev].read_start - stripes[role].read_start), - stripes[other].read_buf + (stripes[other].read_start - stripes[other].read_start), + stripes[prev].read_buf + (stripes[role].read_start - stripes[prev].read_start), + stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start), stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start ); prev = -1; } else { + assert(stripes[role].read_start >= stripes[other].read_start); memxor( stripes[role].read_buf, - stripes[other].read_buf + (stripes[other].read_start - stripes[role].read_start), + stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start), stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start ); } @@ -156,10 +160,11 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad return buf; } -void* calc_rmw_reads(void *write_buf, osd_rmw_stripe_t *stripes, uint64_t *osd_set, uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize) +void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_set, + uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize, uint64_t *write_osd_set, uint64_t chunk_size) { // Generic parity modification (read-modify-write) algorithm - // Reconstruct -> Read -> Calc parity -> Write + // Read -> Reconstruct missing chunks -> Calc parity chunks -> Write // Now we always read continuous ranges. This means that an update of the beginning // of one data stripe and the end of another will lead to a read of full paired stripes. // FIXME: (Maybe) read small individual ranges in that case instead. @@ -174,64 +179,79 @@ void* calc_rmw_reads(void *write_buf, osd_rmw_stripe_t *stripes, uint64_t *osd_s stripes[role].write_end = stripes[role].req_end; } } - for (int role = 0; role < pg_minsize; role++) - { - cover_read(start, end, stripes[role]); - } - int has_parity = 0; + int write_parity = 0; for (int role = pg_minsize; role < pg_size; role++) { - if (osd_set[role] != 0) + if (write_osd_set[role] != 0) { - has_parity++; + write_parity = 1; stripes[role].write_start = start; stripes[role].write_end = end; } - else - stripes[role].missing = true; + } + if (write_parity) + { + for (int role = 0; role < pg_minsize; role++) + { + cover_read(start, end, stripes[role]); + } + } + if (write_osd_set != read_osd_set) + { + // Object is degraded/misplaced and will be moved to + for (int role = 0; role < pg_size; role++) + { + if (write_osd_set[role] != read_osd_set[role]) + { + // We need to get data for any moved / recovered chunk + // And we need a continuous write buffer so we'll only optimize + // for the case when the whole chunk is ovewritten in the request + if (stripes[role].req_start != 0 || + stripes[role].req_end != chunk_size) + { + stripes[role].read_start = 0; + stripes[role].read_end = chunk_size; + } + } + } } if (pg_cursize < pg_size) { - if (has_parity == 0) + // Some stripe(s) are missing, so we need to read parity + for (int role = 0; role < pg_size; role++) { - // Parity is missing, we don't need to read anything - for (int role = 0; role < pg_minsize; role++) + if (read_osd_set[role] == 0 && stripes[role].read_end != 0) { - stripes[role].read_end = 0; - } - } - else - { - // Other stripe(s) are missing - for (int role = 0; role < pg_minsize; role++) - { - if (osd_set[role] == 0 && stripes[role].read_end != 0) + stripes[role].missing = true; + int found = 0; + for (int r2 = 0; r2 < pg_size && found < pg_minsize; r2++) { - stripes[role].missing = true; - for (int r2 = 0; r2 < pg_size; r2++) + // Read the non-covered range of from at least other stripes to reconstruct it + if (read_osd_set[r2] != 0) { - // Read the non-covered range of from all other stripes to reconstruct it - if (r2 != role && osd_set[r2] != 0) - { - extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]); - } + extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]); + found++; } } + if (found < pg_minsize) + { + // Incomplete object (FIXME) + } } } } // Allocate read buffers - void *rmw_buf = alloc_read_buffer(stripes, pg_size, has_parity * (end - start)); - // Position parity & write buffers + void *rmw_buf = alloc_read_buffer(stripes, pg_size, (write_parity ? pg_size-pg_minsize : 0) * (end - start)); + // Position write buffers uint64_t buf_pos = 0, in_pos = 0; for (int role = 0; role < pg_size; role++) { if (stripes[role].req_end != 0) { - stripes[role].write_buf = write_buf + in_pos; + stripes[role].write_buf = request_buf + in_pos; in_pos += stripes[role].req_end - stripes[role].req_start; } - else if (role >= pg_minsize && osd_set[role] != 0) + else if (role >= pg_minsize && read_osd_set[role] != 0) { stripes[role].write_buf = rmw_buf + buf_pos; buf_pos += end - start; @@ -321,8 +341,9 @@ static void xor_multiple_buffers(buf_len_t *xor1, int n1, buf_len_t *xor2, int n } } -void reconstruct_stripes(osd_rmw_stripe_t *stripes, int pg_size) +void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size) { + int pg_minsize = pg_size-1; for (int role = 0; role < pg_size; role++) { if (stripes[role].read_end != 0 && stripes[role].missing) @@ -332,41 +353,81 @@ void reconstruct_stripes(osd_rmw_stripe_t *stripes, int pg_size) break; } } -} - -void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size) -{ - if (stripes[pg_size-1].missing) + uint32_t start = 0, end = 0; + if (!stripes[pg_minsize].missing || write_osd_set != read_osd_set) { - // Parity OSD is unavailable - return; - } - reconstruct_stripes(stripes, pg_size); - // Calculate new parity (EC k+1) - int parity = pg_size-1, prev = -2; - auto wr_end = stripes[parity].write_end; - auto wr_start = stripes[parity].write_start; - for (int other = 0; other < pg_size-1; other++) - { - if (prev == -2) + for (int role = 0; role < pg_minsize; role++) { - prev = other; - } - else - { - int n1 = 0, n2 = 0; - buf_len_t xor1[3], xor2[3]; - if (prev == -1) + if (stripes[role].req_end != 0) { - xor1[n1++] = { .buf = stripes[parity].write_buf, .len = wr_end-wr_start }; + start = !end || stripes[role].req_start < start ? stripes[role].req_start : start; + end = std::max(stripes[role].req_end, end); + } + } + } + if (write_osd_set != read_osd_set) + { + for (int role = 0; role < pg_minsize; role++) + { + if (write_osd_set[role] != read_osd_set[role] && + (stripes[role].req_start != 0 || stripes[role].req_end != chunk_size)) + { + // Copy modified chunk into the read buffer to write it back + memcpy( + stripes[role].read_buf + stripes[role].req_start, + stripes[role].write_buf, + stripes[role].req_end - stripes[role].req_start + ); + stripes[role].write_buf = stripes[role].read_buf; + stripes[role].write_start = 0; + stripes[role].write_end = chunk_size; + } + } + } + if (!stripes[pg_minsize].missing) + { + // Calculate new parity (EC k+1) + int parity = pg_minsize, prev = -2; + for (int other = 0; other < pg_minsize; other++) + { + if (prev == -2) + { + prev = other; } else { - get_old_new_buffers(stripes[prev], wr_start, wr_end, xor1, n1); - prev = -1; + int n1 = 0, n2 = 0; + buf_len_t xor1[3], xor2[3]; + if (prev == -1) + { + xor1[n1++] = { .buf = stripes[parity].write_buf, .len = end-start }; + } + else + { + get_old_new_buffers(stripes[prev], start, end, xor1, n1); + prev = -1; + } + get_old_new_buffers(stripes[other], start, end, xor2, n2); + xor_multiple_buffers(xor1, n1, xor2, n2, stripes[parity].write_buf, end-start); + } + } + } + if (write_osd_set != read_osd_set) + { + for (int role = pg_minsize; role < pg_size; role++) + { + if (write_osd_set[role] != read_osd_set[role] && (start != 0 || end != chunk_size)) + { + // Copy new parity into the read buffer to write it back + memcpy( + stripes[role].read_buf + start, + stripes[role].write_buf, + end - start + ); + stripes[role].write_buf = stripes[role].read_buf; + stripes[role].write_start = 0; + stripes[role].write_end = chunk_size; } - get_old_new_buffers(stripes[other], wr_start, wr_end, xor2, n2); - xor_multiple_buffers(xor1, n1, xor2, n2, stripes[parity].write_buf, wr_end-wr_start); } } } diff --git a/osd_rmw.h b/osd_rmw.h index 7ae20043..b5c1b4a4 100644 --- a/osd_rmw.h +++ b/osd_rmw.h @@ -25,14 +25,13 @@ struct osd_rmw_stripe_t void split_stripes(uint64_t pg_minsize, uint32_t bs_block_size, uint32_t start, uint32_t len, osd_rmw_stripe_t *stripes); -void reconstruct_stripes(osd_rmw_stripe_t *stripes, int pg_size); - void reconstruct_stripe(osd_rmw_stripe_t *stripes, int pg_size, int role); int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int minsize, int size); void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size); -void* calc_rmw_reads(void *write_buf, osd_rmw_stripe_t *stripes, uint64_t *osd_set, uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize); +void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_set, + uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize, uint64_t *write_osd_set, uint64_t chunk_size); -void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size); +void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size); diff --git a/osd_rmw_test.cpp b/osd_rmw_test.cpp index f4647ac0..75e9b820 100644 --- a/osd_rmw_test.cpp +++ b/osd_rmw_test.cpp @@ -2,6 +2,64 @@ #include "osd_rmw.cpp" #include "test_pattern.h" +/*** + +Cases: + +1. split(offset=128K-4K, len=8K) + = [ [ 128K-4K, 128K ], [ 0, 4K ], [ 0, 0 ] ] + +2. read(offset=128K-4K, len=8K, osd_set=[1,0,3]) + = { read: [ [ 0, 128K ], [ 0, 4K ], [ 0, 4K ] ] } + +3. cover_read(0, 128K, { req: [ 128K-4K, 4K ] }) + = { read: [ 0, 128K-4K ] } + +4. write(offset=128K-4K, len=8K, osd_set=[1,0,3]) + = { + read: [ [ 0, 128K ], [ 4K, 128K ], [ 4K, 128K ] ], + write: [ [ 128K-4K, 128K ], [ 0, 4K ], [ 0, 128K ] ], + input buffer: [ write0, write1 ], + rmw buffer: [ write2, read0, read1, read2 ], + } + + check write2 buffer + +5. write(offset=0, len=128K+64K, osd_set=[1,0,3]) + = { + req: [ [ 0, 128K ], [ 0, 64K ], [ 0, 0 ] ], + read: [ [ 64K, 128K ], [ 64K, 128K ], [ 64K, 128K ] ], + write: [ [ 0, 128K ], [ 0, 64K ], [ 0, 128K ] ], + input buffer: [ write0, write1 ], + rmw buffer: [ write2, read0, read1, read2 ], + } + +6. write(offset=0, len=128K+64K, osd_set=[1,2,3]) + = { + req: [ [ 0, 128K ], [ 0, 64K ], [ 0, 0 ] ], + read: [ [ 0, 0 ], [ 64K, 128K ], [ 0, 0 ] ], + write: [ [ 0, 128K ], [ 0, 64K ], [ 0, 128K ] ], + input buffer: [ write0, write1 ], + rmw buffer: [ write2, read1 ], + } + +7. calc_rmw(offset=128K-4K, len=8K, osd_set=[1,0,3], write_set=[1,2,3]) + = { + read: [ [ 0, 128K ], [ 0, 128K ], [ 0, 128K ] ], + write: [ [ 128K-4K, 128K ], [ 0, 4K ], [ 0, 128K ] ], + input buffer: [ write0, write1 ], + rmw buffer: [ write2, read0, read1, read2 ], + } + then, after calc_rmw_parity(): { + write: [ [ 128K-4K, 128K ], [ 0, 128K ], [ 0, 128K ] ], + write1==read1, + } + + check write1 buffer + + check write2 buffer + +***/ + +void test7(); + int main(int narg, char *args[]) { osd_num_t osd_set[3] = { 1, 0, 3 }; @@ -28,10 +86,13 @@ int main(int narg, char *args[]) memset(stripes, 0, sizeof(stripes)); split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes); void* write_buf = malloc(8192); - void* rmw_buf = calc_rmw_reads(write_buf, stripes, osd_set, 3, 2, 2); + void* rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024); assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024); assert(stripes[1].read_start == 4096 && stripes[1].read_end == 128*1024); assert(stripes[2].read_start == 4096 && stripes[2].read_end == 128*1024); + assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024); + assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096); + assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); assert(stripes[0].read_buf == rmw_buf+128*1024); assert(stripes[1].read_buf == rmw_buf+128*1024*2); assert(stripes[2].read_buf == rmw_buf+128*1024*3-4096); @@ -43,7 +104,7 @@ int main(int narg, char *args[]) set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); // old data set_pattern(stripes[1].read_buf, 128*1024-4096, UINT64_MAX); // didn't read it, it's missing set_pattern(stripes[2].read_buf, 128*1024-4096, 0); // old parity = 0 - calc_rmw_parity(stripes, 3); + calc_rmw_parity(stripes, 3, osd_set, osd_set, 128*1024); check_pattern(stripes[2].write_buf, 4096, PATTERN0^PATTERN1); // new parity check_pattern(stripes[2].write_buf+4096, 128*1024-4096*2, 0); // new parity check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity @@ -57,10 +118,13 @@ int main(int narg, char *args[]) assert(stripes[2].req_end == 0); // Test 5.2 write_buf = malloc(64*1024*3); - rmw_buf = calc_rmw_reads(write_buf, stripes, osd_set, 3, 2, 2); + rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024); assert(stripes[0].read_start == 64*1024 && stripes[0].read_end == 128*1024); assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024); assert(stripes[2].read_start == 64*1024 && stripes[2].read_end == 128*1024); + assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024); + assert(stripes[1].write_start == 0 && stripes[1].write_end == 64*1024); + assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); assert(stripes[0].read_buf == rmw_buf+128*1024); assert(stripes[1].read_buf == rmw_buf+64*3*1024); assert(stripes[2].read_buf == rmw_buf+64*4*1024); @@ -74,10 +138,13 @@ int main(int narg, char *args[]) split_stripes(2, 128*1024, 0, 64*1024*3, stripes); osd_set[1] = 2; write_buf = malloc(64*1024*3); - rmw_buf = calc_rmw_reads(write_buf, stripes, osd_set, 3, 2, 3); + rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, osd_set, 128*1024); assert(stripes[0].read_end == 0); assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024); assert(stripes[2].read_end == 0); + assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024); + assert(stripes[1].write_start == 0 && stripes[1].write_end == 64*1024); + assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); assert(stripes[0].read_buf == 0); assert(stripes[1].read_buf == rmw_buf+128*1024); assert(stripes[2].read_buf == 0); @@ -87,7 +154,49 @@ int main(int narg, char *args[]) free(rmw_buf); free(write_buf); osd_set[1] = 0; + // Test 7 + test7(); // End printf("all ok\n"); return 0; } + +void test7() +{ + osd_num_t osd_set[3] = { 1, 0, 3 }; + osd_num_t write_osd_set[3] = { 1, 2, 3 }; + osd_rmw_stripe_t stripes[3] = { 0 }; + // Test 7.1 + split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes); + void *write_buf = malloc(8192); + void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, write_osd_set, 128*1024); + assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024); + assert(stripes[1].read_start == 0 && stripes[1].read_end == 128*1024); + assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024); + assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024); + assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096); + assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); + assert(stripes[0].read_buf == rmw_buf+128*1024); + assert(stripes[1].read_buf == rmw_buf+128*1024*2); + assert(stripes[2].read_buf == rmw_buf+128*1024*3); + assert(stripes[0].write_buf == write_buf); + assert(stripes[1].write_buf == write_buf+4096); + assert(stripes[2].write_buf == rmw_buf); + // Test 7.2 + set_pattern(write_buf, 8192, PATTERN0); + set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); // old data + set_pattern(stripes[1].read_buf, 128*1024, UINT64_MAX); // didn't read it, it's missing + set_pattern(stripes[2].read_buf, 128*1024, 0); // old parity = 0 + calc_rmw_parity(stripes, 3, osd_set, write_osd_set, 128*1024); + assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024); + assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024); + assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); + assert(stripes[1].write_buf == stripes[1].read_buf); + check_pattern(stripes[1].write_buf, 4096, PATTERN0); + check_pattern(stripes[1].write_buf+4096, 128*1024-4096, PATTERN1); + check_pattern(stripes[2].write_buf, 4096, PATTERN0^PATTERN1); // new parity + check_pattern(stripes[2].write_buf+4096, 128*1024-4096*2, 0); // new parity + check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity + free(rmw_buf); + free(write_buf); +}