(Almost) Implement misplaced recovery, integrating it into calc_rmw()

trace-sqes
Vitaliy Filippov 2020-04-04 22:19:45 +03:00
parent 6212195440
commit cf7de0f181
5 changed files with 274 additions and 229 deletions

View File

@ -204,6 +204,22 @@ bool osd_t::pick_next_recovery(osd_recovery_op_t &op)
}
}
}
for (auto pg_it = pgs.begin(); pg_it != pgs.end(); pg_it++)
{
if ((pg_it->second.state & (PG_ACTIVE | PG_HAS_MISPLACED)) == (PG_ACTIVE | PG_HAS_MISPLACED))
{
for (auto obj_it = pg_it->second.misplaced_objects.begin(); obj_it != pg_it->second.misplaced_objects.end(); obj_it++)
{
if (recovery_ops.find(obj_it->first) == recovery_ops.end())
{
op.degraded = false;
op.pg_num = pg_it->first;
op.oid = obj_it->first;
return true;
}
}
}
}
return false;
}
@ -266,15 +282,7 @@ void osd_t::submit_recovery_op(osd_recovery_op_t *op)
pg->print_state();
}
}
if (st->state == OBJ_DEGRADED)
{
pg->clean_count++;
}
else
{
assert(st->state == (OBJ_DEGRADED|OBJ_MISPLACED));
pg->misplaced_objects[op->oid] = change_osd_set(st, pg);
}
pg->clean_count++;
st->object_count--;
if (!st->object_count)
{
@ -305,58 +313,3 @@ bool osd_t::continue_recovery()
}
return true;
}
// This is likely not needed at all, because we'll always recover objects to the clean state
pg_osd_set_state_t* osd_t::change_osd_set(pg_osd_set_state_t *st, pg_t *pg)
{
pg_osd_set_state_t *new_st;
pg_osd_set_t new_set(st->osd_set);
for (uint64_t role = 0; role < pg->pg_size; role++)
{
if (pg->cur_set[role] != 0)
{
// Maintain order (outdated -> role -> osd_num)
int added = 0;
for (int j = 0; j < new_set.size(); j++)
{
if (new_set[j].role == role && new_set[j].osd_num == pg->cur_set[role])
{
if (new_set[j].outdated)
{
if (!added)
new_set[j].outdated = false;
else
{
new_set.erase(new_set.begin()+j);
j--;
}
}
break;
}
else if (!added && (new_set[j].outdated || new_set[j].role > role ||
new_set[j].role == role && new_set[j].osd_num > pg->cur_set[role]))
{
new_set.insert(new_set.begin()+j, (pg_obj_loc_t){
.role = role,
.osd_num = pg->cur_set[role],
.outdated = false,
});
added = 1;
}
}
}
}
auto st_it = pg->state_dict.find(new_set);
if (st_it != pg->state_dict.end())
{
st_it = pg->state_dict.emplace(new_set, (pg_osd_set_state_t){
.read_target = pg->cur_set,
.osd_set = new_set,
.state = OBJ_MISPLACED,
.object_count = 0,
}).first;
}
new_st = &st_it->second;
new_st->object_count++;
return new_st;
}

View File

@ -31,8 +31,9 @@ struct osd_primary_op_data_t
int degraded = 0, pg_size, pg_minsize;
osd_rmw_stripe_t *stripes;
osd_op_t *subops = NULL;
void *recovery_buf = NULL;
uint64_t *prev_set = NULL;
uint64_t object_state = 0;
// for sync. oops, requires freeing
std::vector<unstable_osd_num_t> *unstable_write_osds = NULL;
pg_num_t *dirty_pgs = NULL;
@ -117,27 +118,32 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
return true;
}
uint64_t* get_object_osd_set(pg_t &pg, object_id &oid, uint64_t *def)
uint64_t* get_object_osd_set(pg_t &pg, object_id &oid, uint64_t *def, uint64_t &object_state)
{
if (!(pg.state & (PG_HAS_INCOMPLETE | PG_HAS_DEGRADED | PG_HAS_MISPLACED)))
{
object_state = 0;
return def;
}
auto st_it = pg.incomplete_objects.find(oid);
if (st_it != pg.incomplete_objects.end())
{
object_state = st_it->second->state;
return st_it->second->read_target.data();
}
st_it = pg.degraded_objects.find(oid);
if (st_it != pg.degraded_objects.end())
{
object_state = st_it->second->state;
return st_it->second->read_target.data();
}
st_it = pg.misplaced_objects.find(oid);
if (st_it != pg.misplaced_objects.end())
{
object_state = st_it->second->state;
return st_it->second->read_target.data();
}
object_state = 0;
return def;
}
@ -171,7 +177,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
else
{
// PG may be degraded or have misplaced objects
uint64_t* cur_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data());
uint64_t* cur_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), op_data->object_state);
if (extend_missing_stripes(op_data->stripes, cur_set, pg.pg_minsize, pg.pg_size) < 0)
{
finish_op(cur_op, -EIO);
@ -404,7 +410,6 @@ void osd_t::continue_primary_write(osd_op_t *cur_op)
return;
}
osd_primary_op_data_t *op_data = cur_op->op_data;
// FIXME: Handle operation cancel
auto & pg = pgs[op_data->pg_num];
if (op_data->st == 1) goto resume_1;
else if (op_data->st == 2) goto resume_2;
@ -413,8 +418,6 @@ void osd_t::continue_primary_write(osd_op_t *cur_op)
else if (op_data->st == 5) goto resume_5;
else if (op_data->st == 6) goto resume_6;
else if (op_data->st == 7) goto resume_7;
else if (op_data->st == 8) goto resume_8;
else if (op_data->st == 9) goto resume_9;
assert(op_data->st == 0);
// Check if actions are pending for this object
{
@ -441,91 +444,12 @@ void osd_t::continue_primary_write(osd_op_t *cur_op)
}
pg.write_queue.emplace(op_data->oid, cur_op);
}
if (pg.state & PG_HAS_DEGRADED)
{
op_data->prev_set = NULL;
{
auto st_it = pg.degraded_objects.find(op_data->oid);
if (st_it != pg.degraded_objects.end())
op_data->prev_set = st_it->second->read_target.data();
}
if (op_data->prev_set != NULL)
{
// Read the whole object
op_data->recovery_buf = memalign(512, pg.pg_size * bs_block_size);
for (int i = 0; i < pg.pg_size; i++)
{
op_data->stripes[i].read_buf = op_data->recovery_buf + i*bs_block_size;
op_data->stripes[i].read_start = 0;
op_data->stripes[i].read_end = bs_block_size;
op_data->stripes[i].missing = op_data->prev_set[i] == 0;
op_data->stripes[i].write_end = 0;
}
op_data->degraded = 1;
submit_primary_subops(SUBMIT_READ, pg.pg_size, op_data->prev_set, cur_op);
resume_8:
op_data->st = 8;
return;
resume_9:
if (op_data->errors > 0)
{
pg_cancel_write_queue(pg, op_data->oid, op_data->epipe > 0 ? -EPIPE : -EIO);
return;
}
reconstruct_stripes(op_data->stripes, pg.pg_size);
if (cur_op->req.rw.len > 0)
{
memcpy(
op_data->recovery_buf + cur_op->req.rw.offset - op_data->oid.stripe,
cur_op->buf, cur_op->req.rw.len
);
}
free(cur_op->buf);
cur_op->buf = op_data->recovery_buf;
op_data->recovery_buf = NULL;
if (cur_op->req.rw.len > 0)
{
// Write modified parts
uint32_t start = 0, end = 0;
for (int role = 0; role < pg.pg_minsize; role++)
{
if (op_data->stripes[role].req_end != 0)
{
start = !end || op_data->stripes[role].req_start < start ? op_data->stripes[role].req_start : start;
end = std::max(op_data->stripes[role].req_end, end);
op_data->stripes[role].write_start = op_data->stripes[role].req_start;
op_data->stripes[role].write_end = op_data->stripes[role].req_end;
op_data->stripes[role].write_buf = cur_op->buf + role*bs_block_size + op_data->stripes[role].write_start;
}
}
for (int role = pg.pg_minsize; role < pg.pg_size; role++)
{
op_data->stripes[role].write_start = start;
op_data->stripes[role].write_end = end;
op_data->stripes[role].write_buf = cur_op->buf + role*bs_block_size + op_data->stripes[role].write_start;
}
}
// Also write recovered parts
uint64_t *cur_set = pg.cur_set.data();
for (int role = 0; role < pg.pg_size; role++)
{
if (cur_set[role] != op_data->prev_set[role])
{
op_data->stripes[role].write_start = 0;
op_data->stripes[role].write_end = bs_block_size;
op_data->stripes[role].write_buf = cur_op->buf + role*bs_block_size;
}
}
pg.ver_override[op_data->oid] = op_data->fact_ver;
// Send writes
submit_primary_subops(SUBMIT_WRITE, pg.pg_size, cur_set, cur_op);
op_data->st = 4;
return;
}
}
resume_1:
// Determine blocks to read and write
cur_op->rmw_buf = calc_rmw_reads(cur_op->buf, op_data->stripes, pg.cur_set.data(), pg.pg_size, pg.pg_minsize, pg.pg_cursize);
// Missing chunks are allowed to be overwritten even in incomplete objects
op_data->prev_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), op_data->object_state);
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
pg.pg_size, pg.pg_minsize, pg.pg_cursize, pg.cur_set.data(), bs_block_size);
// Read required blocks
submit_primary_subops(SUBMIT_RMW_READ, pg.pg_size, pg.cur_set.data(), cur_op);
resume_2:
@ -540,7 +464,7 @@ resume_3:
// Save version override for parallel reads
pg.ver_override[op_data->oid] = op_data->fact_ver;
// Recover missing stripes, calculate parity
calc_rmw_parity(op_data->stripes, pg.pg_size);
calc_rmw_parity(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size);
// Send writes
submit_primary_subops(SUBMIT_WRITE, pg.pg_size, pg.cur_set.data(), cur_op);
resume_4:
@ -693,7 +617,6 @@ resume_1:
syncs_in_progress.push_back(cur_op);
}
resume_2:
// FIXME: Handle operation cancel
if (unstable_writes.size() == 0)
{
// Nothing to sync

View File

@ -1,4 +1,5 @@
#include <malloc.h>
#include <string.h>
#include <assert.h>
#include "xor.h"
#include "osd_rmw.h"
@ -79,18 +80,21 @@ void reconstruct_stripe(osd_rmw_stripe_t *stripes, int pg_size, int role)
}
else if (prev >= 0)
{
assert(stripes[role].read_start >= stripes[prev].read_start &&
stripes[role].read_start >= stripes[other].read_start);
memxor(
stripes[prev].read_buf + (stripes[prev].read_start - stripes[role].read_start),
stripes[other].read_buf + (stripes[other].read_start - stripes[other].read_start),
stripes[prev].read_buf + (stripes[role].read_start - stripes[prev].read_start),
stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start),
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start
);
prev = -1;
}
else
{
assert(stripes[role].read_start >= stripes[other].read_start);
memxor(
stripes[role].read_buf,
stripes[other].read_buf + (stripes[other].read_start - stripes[role].read_start),
stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start),
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start
);
}
@ -156,10 +160,11 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad
return buf;
}
void* calc_rmw_reads(void *write_buf, osd_rmw_stripe_t *stripes, uint64_t *osd_set, uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize)
void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_set,
uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize, uint64_t *write_osd_set, uint64_t chunk_size)
{
// Generic parity modification (read-modify-write) algorithm
// Reconstruct -> Read -> Calc parity -> Write
// Read -> Reconstruct missing chunks -> Calc parity chunks -> Write
// Now we always read continuous ranges. This means that an update of the beginning
// of one data stripe and the end of another will lead to a read of full paired stripes.
// FIXME: (Maybe) read small individual ranges in that case instead.
@ -174,64 +179,79 @@ void* calc_rmw_reads(void *write_buf, osd_rmw_stripe_t *stripes, uint64_t *osd_s
stripes[role].write_end = stripes[role].req_end;
}
}
for (int role = 0; role < pg_minsize; role++)
{
cover_read(start, end, stripes[role]);
}
int has_parity = 0;
int write_parity = 0;
for (int role = pg_minsize; role < pg_size; role++)
{
if (osd_set[role] != 0)
if (write_osd_set[role] != 0)
{
has_parity++;
write_parity = 1;
stripes[role].write_start = start;
stripes[role].write_end = end;
}
else
stripes[role].missing = true;
}
if (write_parity)
{
for (int role = 0; role < pg_minsize; role++)
{
cover_read(start, end, stripes[role]);
}
}
if (write_osd_set != read_osd_set)
{
// Object is degraded/misplaced and will be moved to <write_osd_set>
for (int role = 0; role < pg_size; role++)
{
if (write_osd_set[role] != read_osd_set[role])
{
// We need to get data for any moved / recovered chunk
// And we need a continuous write buffer so we'll only optimize
// for the case when the whole chunk is ovewritten in the request
if (stripes[role].req_start != 0 ||
stripes[role].req_end != chunk_size)
{
stripes[role].read_start = 0;
stripes[role].read_end = chunk_size;
}
}
}
}
if (pg_cursize < pg_size)
{
if (has_parity == 0)
// Some stripe(s) are missing, so we need to read parity
for (int role = 0; role < pg_size; role++)
{
// Parity is missing, we don't need to read anything
for (int role = 0; role < pg_minsize; role++)
if (read_osd_set[role] == 0 && stripes[role].read_end != 0)
{
stripes[role].read_end = 0;
}
}
else
{
// Other stripe(s) are missing
for (int role = 0; role < pg_minsize; role++)
{
if (osd_set[role] == 0 && stripes[role].read_end != 0)
stripes[role].missing = true;
int found = 0;
for (int r2 = 0; r2 < pg_size && found < pg_minsize; r2++)
{
stripes[role].missing = true;
for (int r2 = 0; r2 < pg_size; r2++)
// Read the non-covered range of <role> from at least <minsize> other stripes to reconstruct it
if (read_osd_set[r2] != 0)
{
// Read the non-covered range of <role> from all other stripes to reconstruct it
if (r2 != role && osd_set[r2] != 0)
{
extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]);
}
extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]);
found++;
}
}
if (found < pg_minsize)
{
// Incomplete object (FIXME)
}
}
}
}
// Allocate read buffers
void *rmw_buf = alloc_read_buffer(stripes, pg_size, has_parity * (end - start));
// Position parity & write buffers
void *rmw_buf = alloc_read_buffer(stripes, pg_size, (write_parity ? pg_size-pg_minsize : 0) * (end - start));
// Position write buffers
uint64_t buf_pos = 0, in_pos = 0;
for (int role = 0; role < pg_size; role++)
{
if (stripes[role].req_end != 0)
{
stripes[role].write_buf = write_buf + in_pos;
stripes[role].write_buf = request_buf + in_pos;
in_pos += stripes[role].req_end - stripes[role].req_start;
}
else if (role >= pg_minsize && osd_set[role] != 0)
else if (role >= pg_minsize && read_osd_set[role] != 0)
{
stripes[role].write_buf = rmw_buf + buf_pos;
buf_pos += end - start;
@ -321,8 +341,9 @@ static void xor_multiple_buffers(buf_len_t *xor1, int n1, buf_len_t *xor2, int n
}
}
void reconstruct_stripes(osd_rmw_stripe_t *stripes, int pg_size)
void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size)
{
int pg_minsize = pg_size-1;
for (int role = 0; role < pg_size; role++)
{
if (stripes[role].read_end != 0 && stripes[role].missing)
@ -332,41 +353,81 @@ void reconstruct_stripes(osd_rmw_stripe_t *stripes, int pg_size)
break;
}
}
}
void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size)
{
if (stripes[pg_size-1].missing)
uint32_t start = 0, end = 0;
if (!stripes[pg_minsize].missing || write_osd_set != read_osd_set)
{
// Parity OSD is unavailable
return;
}
reconstruct_stripes(stripes, pg_size);
// Calculate new parity (EC k+1)
int parity = pg_size-1, prev = -2;
auto wr_end = stripes[parity].write_end;
auto wr_start = stripes[parity].write_start;
for (int other = 0; other < pg_size-1; other++)
{
if (prev == -2)
for (int role = 0; role < pg_minsize; role++)
{
prev = other;
}
else
{
int n1 = 0, n2 = 0;
buf_len_t xor1[3], xor2[3];
if (prev == -1)
if (stripes[role].req_end != 0)
{
xor1[n1++] = { .buf = stripes[parity].write_buf, .len = wr_end-wr_start };
start = !end || stripes[role].req_start < start ? stripes[role].req_start : start;
end = std::max(stripes[role].req_end, end);
}
}
}
if (write_osd_set != read_osd_set)
{
for (int role = 0; role < pg_minsize; role++)
{
if (write_osd_set[role] != read_osd_set[role] &&
(stripes[role].req_start != 0 || stripes[role].req_end != chunk_size))
{
// Copy modified chunk into the read buffer to write it back
memcpy(
stripes[role].read_buf + stripes[role].req_start,
stripes[role].write_buf,
stripes[role].req_end - stripes[role].req_start
);
stripes[role].write_buf = stripes[role].read_buf;
stripes[role].write_start = 0;
stripes[role].write_end = chunk_size;
}
}
}
if (!stripes[pg_minsize].missing)
{
// Calculate new parity (EC k+1)
int parity = pg_minsize, prev = -2;
for (int other = 0; other < pg_minsize; other++)
{
if (prev == -2)
{
prev = other;
}
else
{
get_old_new_buffers(stripes[prev], wr_start, wr_end, xor1, n1);
prev = -1;
int n1 = 0, n2 = 0;
buf_len_t xor1[3], xor2[3];
if (prev == -1)
{
xor1[n1++] = { .buf = stripes[parity].write_buf, .len = end-start };
}
else
{
get_old_new_buffers(stripes[prev], start, end, xor1, n1);
prev = -1;
}
get_old_new_buffers(stripes[other], start, end, xor2, n2);
xor_multiple_buffers(xor1, n1, xor2, n2, stripes[parity].write_buf, end-start);
}
}
}
if (write_osd_set != read_osd_set)
{
for (int role = pg_minsize; role < pg_size; role++)
{
if (write_osd_set[role] != read_osd_set[role] && (start != 0 || end != chunk_size))
{
// Copy new parity into the read buffer to write it back
memcpy(
stripes[role].read_buf + start,
stripes[role].write_buf,
end - start
);
stripes[role].write_buf = stripes[role].read_buf;
stripes[role].write_start = 0;
stripes[role].write_end = chunk_size;
}
get_old_new_buffers(stripes[other], wr_start, wr_end, xor2, n2);
xor_multiple_buffers(xor1, n1, xor2, n2, stripes[parity].write_buf, wr_end-wr_start);
}
}
}

View File

@ -25,14 +25,13 @@ struct osd_rmw_stripe_t
void split_stripes(uint64_t pg_minsize, uint32_t bs_block_size, uint32_t start, uint32_t len, osd_rmw_stripe_t *stripes);
void reconstruct_stripes(osd_rmw_stripe_t *stripes, int pg_size);
void reconstruct_stripe(osd_rmw_stripe_t *stripes, int pg_size, int role);
int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int minsize, int size);
void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t add_size);
void* calc_rmw_reads(void *write_buf, osd_rmw_stripe_t *stripes, uint64_t *osd_set, uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize);
void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_set,
uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize, uint64_t *write_osd_set, uint64_t chunk_size);
void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size);
void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size);

View File

@ -2,6 +2,64 @@
#include "osd_rmw.cpp"
#include "test_pattern.h"
/***
Cases:
1. split(offset=128K-4K, len=8K)
= [ [ 128K-4K, 128K ], [ 0, 4K ], [ 0, 0 ] ]
2. read(offset=128K-4K, len=8K, osd_set=[1,0,3])
= { read: [ [ 0, 128K ], [ 0, 4K ], [ 0, 4K ] ] }
3. cover_read(0, 128K, { req: [ 128K-4K, 4K ] })
= { read: [ 0, 128K-4K ] }
4. write(offset=128K-4K, len=8K, osd_set=[1,0,3])
= {
read: [ [ 0, 128K ], [ 4K, 128K ], [ 4K, 128K ] ],
write: [ [ 128K-4K, 128K ], [ 0, 4K ], [ 0, 128K ] ],
input buffer: [ write0, write1 ],
rmw buffer: [ write2, read0, read1, read2 ],
}
+ check write2 buffer
5. write(offset=0, len=128K+64K, osd_set=[1,0,3])
= {
req: [ [ 0, 128K ], [ 0, 64K ], [ 0, 0 ] ],
read: [ [ 64K, 128K ], [ 64K, 128K ], [ 64K, 128K ] ],
write: [ [ 0, 128K ], [ 0, 64K ], [ 0, 128K ] ],
input buffer: [ write0, write1 ],
rmw buffer: [ write2, read0, read1, read2 ],
}
6. write(offset=0, len=128K+64K, osd_set=[1,2,3])
= {
req: [ [ 0, 128K ], [ 0, 64K ], [ 0, 0 ] ],
read: [ [ 0, 0 ], [ 64K, 128K ], [ 0, 0 ] ],
write: [ [ 0, 128K ], [ 0, 64K ], [ 0, 128K ] ],
input buffer: [ write0, write1 ],
rmw buffer: [ write2, read1 ],
}
7. calc_rmw(offset=128K-4K, len=8K, osd_set=[1,0,3], write_set=[1,2,3])
= {
read: [ [ 0, 128K ], [ 0, 128K ], [ 0, 128K ] ],
write: [ [ 128K-4K, 128K ], [ 0, 4K ], [ 0, 128K ] ],
input buffer: [ write0, write1 ],
rmw buffer: [ write2, read0, read1, read2 ],
}
then, after calc_rmw_parity(): {
write: [ [ 128K-4K, 128K ], [ 0, 128K ], [ 0, 128K ] ],
write1==read1,
}
+ check write1 buffer
+ check write2 buffer
***/
void test7();
int main(int narg, char *args[])
{
osd_num_t osd_set[3] = { 1, 0, 3 };
@ -28,10 +86,13 @@ int main(int narg, char *args[])
memset(stripes, 0, sizeof(stripes));
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
void* write_buf = malloc(8192);
void* rmw_buf = calc_rmw_reads(write_buf, stripes, osd_set, 3, 2, 2);
void* rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024);
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
assert(stripes[1].read_start == 4096 && stripes[1].read_end == 128*1024);
assert(stripes[2].read_start == 4096 && stripes[2].read_end == 128*1024);
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096);
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
assert(stripes[0].read_buf == rmw_buf+128*1024);
assert(stripes[1].read_buf == rmw_buf+128*1024*2);
assert(stripes[2].read_buf == rmw_buf+128*1024*3-4096);
@ -43,7 +104,7 @@ int main(int narg, char *args[])
set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); // old data
set_pattern(stripes[1].read_buf, 128*1024-4096, UINT64_MAX); // didn't read it, it's missing
set_pattern(stripes[2].read_buf, 128*1024-4096, 0); // old parity = 0
calc_rmw_parity(stripes, 3);
calc_rmw_parity(stripes, 3, osd_set, osd_set, 128*1024);
check_pattern(stripes[2].write_buf, 4096, PATTERN0^PATTERN1); // new parity
check_pattern(stripes[2].write_buf+4096, 128*1024-4096*2, 0); // new parity
check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity
@ -57,10 +118,13 @@ int main(int narg, char *args[])
assert(stripes[2].req_end == 0);
// Test 5.2
write_buf = malloc(64*1024*3);
rmw_buf = calc_rmw_reads(write_buf, stripes, osd_set, 3, 2, 2);
rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024);
assert(stripes[0].read_start == 64*1024 && stripes[0].read_end == 128*1024);
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
assert(stripes[2].read_start == 64*1024 && stripes[2].read_end == 128*1024);
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
assert(stripes[1].write_start == 0 && stripes[1].write_end == 64*1024);
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
assert(stripes[0].read_buf == rmw_buf+128*1024);
assert(stripes[1].read_buf == rmw_buf+64*3*1024);
assert(stripes[2].read_buf == rmw_buf+64*4*1024);
@ -74,10 +138,13 @@ int main(int narg, char *args[])
split_stripes(2, 128*1024, 0, 64*1024*3, stripes);
osd_set[1] = 2;
write_buf = malloc(64*1024*3);
rmw_buf = calc_rmw_reads(write_buf, stripes, osd_set, 3, 2, 3);
rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, osd_set, 128*1024);
assert(stripes[0].read_end == 0);
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
assert(stripes[2].read_end == 0);
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
assert(stripes[1].write_start == 0 && stripes[1].write_end == 64*1024);
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
assert(stripes[0].read_buf == 0);
assert(stripes[1].read_buf == rmw_buf+128*1024);
assert(stripes[2].read_buf == 0);
@ -87,7 +154,49 @@ int main(int narg, char *args[])
free(rmw_buf);
free(write_buf);
osd_set[1] = 0;
// Test 7
test7();
// End
printf("all ok\n");
return 0;
}
void test7()
{
osd_num_t osd_set[3] = { 1, 0, 3 };
osd_num_t write_osd_set[3] = { 1, 2, 3 };
osd_rmw_stripe_t stripes[3] = { 0 };
// Test 7.1
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
void *write_buf = malloc(8192);
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, write_osd_set, 128*1024);
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
assert(stripes[1].read_start == 0 && stripes[1].read_end == 128*1024);
assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024);
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096);
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
assert(stripes[0].read_buf == rmw_buf+128*1024);
assert(stripes[1].read_buf == rmw_buf+128*1024*2);
assert(stripes[2].read_buf == rmw_buf+128*1024*3);
assert(stripes[0].write_buf == write_buf);
assert(stripes[1].write_buf == write_buf+4096);
assert(stripes[2].write_buf == rmw_buf);
// Test 7.2
set_pattern(write_buf, 8192, PATTERN0);
set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); // old data
set_pattern(stripes[1].read_buf, 128*1024, UINT64_MAX); // didn't read it, it's missing
set_pattern(stripes[2].read_buf, 128*1024, 0); // old parity = 0
calc_rmw_parity(stripes, 3, osd_set, write_osd_set, 128*1024);
assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024);
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
assert(stripes[1].write_buf == stripes[1].read_buf);
check_pattern(stripes[1].write_buf, 4096, PATTERN0);
check_pattern(stripes[1].write_buf+4096, 128*1024-4096, PATTERN1);
check_pattern(stripes[2].write_buf, 4096, PATTERN0^PATTERN1); // new parity
check_pattern(stripes[2].write_buf+4096, 128*1024-4096*2, 0); // new parity
check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity
free(rmw_buf);
free(write_buf);
}