forked from vitalif/vitastor
Implement chained (optimized) read in the primary OSD code
parent
6950b8e3a0
commit
38a3df4a0e
|
@ -66,7 +66,7 @@ target_link_libraries(fio_vitastor_blk
|
||||||
# vitastor-osd
|
# vitastor-osd
|
||||||
add_executable(vitastor-osd
|
add_executable(vitastor-osd
|
||||||
osd_main.cpp osd.cpp osd_secondary.cpp msgr_receive.cpp msgr_send.cpp osd_peering.cpp osd_flush.cpp osd_peering_pg.cpp
|
osd_main.cpp osd.cpp osd_secondary.cpp msgr_receive.cpp msgr_send.cpp osd_peering.cpp osd_flush.cpp osd_peering_pg.cpp
|
||||||
osd_primary.cpp osd_primary_sync.cpp osd_primary_write.cpp osd_primary_subops.cpp
|
osd_primary.cpp osd_primary_chain.cpp osd_primary_sync.cpp osd_primary_write.cpp osd_primary_subops.cpp
|
||||||
etcd_state_client.cpp messenger.cpp msgr_stop.cpp msgr_op.cpp osd_cluster.cpp http_client.cpp osd_ops.cpp pg_states.cpp
|
etcd_state_client.cpp messenger.cpp msgr_stop.cpp msgr_op.cpp osd_cluster.cpp http_client.cpp osd_ops.cpp pg_states.cpp
|
||||||
osd_rmw.cpp base64.cpp timerfd_manager.cpp epoll_manager.cpp ../json11/json11.cpp
|
osd_rmw.cpp base64.cpp timerfd_manager.cpp epoll_manager.cpp ../json11/json11.cpp
|
||||||
)
|
)
|
||||||
|
|
|
@ -811,6 +811,7 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
||||||
.inode = op->cur_inode,
|
.inode = op->cur_inode,
|
||||||
.offset = part->offset,
|
.offset = part->offset,
|
||||||
.len = part->len,
|
.len = part->len,
|
||||||
|
.meta_revision = 0,
|
||||||
} },
|
} },
|
||||||
.bitmap = op->opcode == OSD_OP_WRITE ? NULL : op->part_bitmaps + pg_bitmap_size*i,
|
.bitmap = op->opcode == OSD_OP_WRITE ? NULL : op->part_bitmaps + pg_bitmap_size*i,
|
||||||
.bitmap_len = (unsigned)(op->opcode == OSD_OP_WRITE ? 0 : pg_bitmap_size),
|
.bitmap_len = (unsigned)(op->opcode == OSD_OP_WRITE ? 0 : pg_bitmap_size),
|
||||||
|
|
|
@ -20,6 +20,10 @@ osd_t::osd_t(blockstore_config_t & config, ring_loop_t *ringloop)
|
||||||
bs_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
bs_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
||||||
clean_entry_bitmap_size = bs_block_size / bs_bitmap_granularity / 8;
|
clean_entry_bitmap_size = bs_block_size / bs_bitmap_granularity / 8;
|
||||||
|
|
||||||
|
zero_buffer_size = 1<<20;
|
||||||
|
zero_buffer = malloc_or_die(zero_buffer_size);
|
||||||
|
memset(zero_buffer, 0, zero_buffer_size);
|
||||||
|
|
||||||
this->config = config;
|
this->config = config;
|
||||||
this->ringloop = ringloop;
|
this->ringloop = ringloop;
|
||||||
|
|
||||||
|
@ -58,6 +62,7 @@ osd_t::~osd_t()
|
||||||
delete epmgr;
|
delete epmgr;
|
||||||
delete bs;
|
delete bs;
|
||||||
close(listen_fd);
|
close(listen_fd);
|
||||||
|
free(zero_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void osd_t::parse_config(blockstore_config_t & config)
|
void osd_t::parse_config(blockstore_config_t & config)
|
||||||
|
|
37
src/osd.h
37
src/osd.h
|
@ -66,6 +66,28 @@ struct inode_stats_t
|
||||||
uint64_t op_bytes[3] = { 0 };
|
uint64_t op_bytes[3] = { 0 };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bitmap_request_t
|
||||||
|
{
|
||||||
|
osd_num_t osd_num;
|
||||||
|
object_id oid;
|
||||||
|
uint64_t version;
|
||||||
|
void *bmp_buf;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline bool operator < (const bitmap_request_t & a, const bitmap_request_t & b)
|
||||||
|
{
|
||||||
|
return a.osd_num < b.osd_num || a.osd_num == b.osd_num && a.oid < b.oid;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct osd_chain_read_t
|
||||||
|
{
|
||||||
|
int chain_pos;
|
||||||
|
inode_t inode;
|
||||||
|
uint32_t offset, len;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct osd_rmw_stripe_t;
|
||||||
|
|
||||||
class osd_t
|
class osd_t
|
||||||
{
|
{
|
||||||
// config
|
// config
|
||||||
|
@ -126,6 +148,8 @@ class osd_t
|
||||||
bool stopping = false;
|
bool stopping = false;
|
||||||
int inflight_ops = 0;
|
int inflight_ops = 0;
|
||||||
blockstore_t *bs;
|
blockstore_t *bs;
|
||||||
|
void *zero_buffer = NULL;
|
||||||
|
uint64_t zero_buffer_size = 0;
|
||||||
uint32_t bs_block_size, bs_bitmap_granularity, clean_entry_bitmap_size;
|
uint32_t bs_block_size, bs_bitmap_granularity, clean_entry_bitmap_size;
|
||||||
ring_loop_t *ringloop;
|
ring_loop_t *ringloop;
|
||||||
timerfd_manager_t *tfd = NULL;
|
timerfd_manager_t *tfd = NULL;
|
||||||
|
@ -216,7 +240,10 @@ class osd_t
|
||||||
void handle_primary_bs_subop(osd_op_t *subop);
|
void handle_primary_bs_subop(osd_op_t *subop);
|
||||||
void add_bs_subop_stats(osd_op_t *subop);
|
void add_bs_subop_stats(osd_op_t *subop);
|
||||||
void pg_cancel_write_queue(pg_t & pg, osd_op_t *first_op, object_id oid, int retval);
|
void pg_cancel_write_queue(pg_t & pg, osd_op_t *first_op, object_id oid, int retval);
|
||||||
void submit_primary_subops(int submit_type, uint64_t op_version, int pg_size, const uint64_t* osd_set, osd_op_t *cur_op);
|
|
||||||
|
void submit_primary_subops(int submit_type, uint64_t op_version, const uint64_t* osd_set, osd_op_t *cur_op);
|
||||||
|
int submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t op_version,
|
||||||
|
osd_rmw_stripe_t *stripes, const uint64_t* osd_set, osd_op_t *cur_op, int subop_idx, int zero_read);
|
||||||
void submit_primary_del_subops(osd_op_t *cur_op, uint64_t *cur_set, uint64_t set_size, pg_osd_set_t & loc_set);
|
void submit_primary_del_subops(osd_op_t *cur_op, uint64_t *cur_set, uint64_t set_size, pg_osd_set_t & loc_set);
|
||||||
void submit_primary_del_batch(osd_op_t *cur_op, obj_ver_osd_t *chunks_to_delete, int chunks_to_delete_count);
|
void submit_primary_del_batch(osd_op_t *cur_op, obj_ver_osd_t *chunks_to_delete, int chunks_to_delete_count);
|
||||||
int submit_primary_sync_subops(osd_op_t *cur_op);
|
int submit_primary_sync_subops(osd_op_t *cur_op);
|
||||||
|
@ -224,6 +251,14 @@ class osd_t
|
||||||
|
|
||||||
uint64_t* get_object_osd_set(pg_t &pg, object_id &oid, uint64_t *def, pg_osd_set_state_t **object_state);
|
uint64_t* get_object_osd_set(pg_t &pg, object_id &oid, uint64_t *def, pg_osd_set_state_t **object_state);
|
||||||
|
|
||||||
|
void continue_chained_read(osd_op_t *cur_op);
|
||||||
|
int submit_chained_read_requests(pg_t & pg, osd_op_t *cur_op);
|
||||||
|
void send_chained_read_results(pg_t & pg, osd_op_t *cur_op);
|
||||||
|
std::vector<osd_chain_read_t> collect_chained_read_requests(osd_op_t *cur_op);
|
||||||
|
int collect_bitmap_requests(osd_op_t *cur_op, pg_t & pg, std::vector<bitmap_request_t> & bitmap_requests);
|
||||||
|
int submit_bitmap_subops(osd_op_t *cur_op, pg_t & pg);
|
||||||
|
int read_bitmaps(osd_op_t *cur_op, pg_t & pg, int base_state);
|
||||||
|
|
||||||
inline pg_num_t map_to_pg(object_id oid, uint64_t pg_stripe_size)
|
inline pg_num_t map_to_pg(object_id oid, uint64_t pg_stripe_size)
|
||||||
{
|
{
|
||||||
uint64_t pg_count = pg_counts[INODE_POOL(oid.inode)];
|
uint64_t pg_count = pg_counts[INODE_POOL(oid.inode)];
|
||||||
|
|
|
@ -184,6 +184,10 @@ struct __attribute__((__packed__)) osd_op_rw_t
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
// length
|
// length
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
|
// flags (for future)
|
||||||
|
uint32_t flags;
|
||||||
|
// inode metadata revision
|
||||||
|
uint64_t meta_revision;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __attribute__((__packed__)) osd_reply_rw_t
|
struct __attribute__((__packed__)) osd_reply_rw_t
|
||||||
|
|
|
@ -28,6 +28,7 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto & pool_cfg = pool_cfg_it->second;
|
auto & pool_cfg = pool_cfg_it->second;
|
||||||
|
// FIXME: op_data->pg_data_size can probably be removed (there's pg.pg_data_size)
|
||||||
uint64_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
|
uint64_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
|
||||||
uint64_t pg_block_size = bs_block_size * pg_data_size;
|
uint64_t pg_block_size = bs_block_size * pg_data_size;
|
||||||
object_id oid = {
|
object_id oid = {
|
||||||
|
@ -52,20 +53,81 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int stripe_count = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size);
|
int stripe_count = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg_it->second.pg_size);
|
||||||
|
int chain_size = 0;
|
||||||
|
if (cur_op->req.hdr.opcode == OSD_OP_READ && cur_op->req.rw.meta_revision > 0)
|
||||||
|
{
|
||||||
|
// Chained read
|
||||||
|
auto inode_it = st_cli.inode_config.find(cur_op->req.rw.inode);
|
||||||
|
if (inode_it->second.mod_revision != cur_op->req.rw.meta_revision)
|
||||||
|
{
|
||||||
|
// Client view of the metadata differs from OSD's view
|
||||||
|
// Operation can't be completed correctly, client should retry later
|
||||||
|
finish_op(cur_op, -EPIPE);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Find parents from the same pool. Optimized reads only work within pools
|
||||||
|
while (inode_it != st_cli.inode_config.end() && inode_it->second.parent_id &&
|
||||||
|
INODE_POOL(inode_it->second.parent_id) == pg_it->second.pool_id)
|
||||||
|
{
|
||||||
|
chain_size++;
|
||||||
|
inode_it = st_cli.inode_config.find(inode_it->second.parent_id);
|
||||||
|
}
|
||||||
|
if (chain_size)
|
||||||
|
{
|
||||||
|
// Add the original inode
|
||||||
|
chain_size++;
|
||||||
|
}
|
||||||
|
}
|
||||||
osd_primary_op_data_t *op_data = (osd_primary_op_data_t*)calloc_or_die(
|
osd_primary_op_data_t *op_data = (osd_primary_op_data_t*)calloc_or_die(
|
||||||
1, sizeof(osd_primary_op_data_t) + (clean_entry_bitmap_size + sizeof(osd_rmw_stripe_t)) * stripe_count
|
// Allocate:
|
||||||
|
// - op_data
|
||||||
|
1, sizeof(osd_primary_op_data_t) +
|
||||||
|
// - stripes
|
||||||
|
// - resulting bitmap buffers
|
||||||
|
stripe_count * (clean_entry_bitmap_size + sizeof(osd_rmw_stripe_t)) +
|
||||||
|
chain_size * (
|
||||||
|
// - copy of the chain
|
||||||
|
sizeof(inode_t) +
|
||||||
|
// - bitmap buffers for chained read
|
||||||
|
stripe_count * clean_entry_bitmap_size +
|
||||||
|
// - 'missing' flags for chained reads
|
||||||
|
(pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 0 : pg_it->second.pg_size)
|
||||||
|
)
|
||||||
);
|
);
|
||||||
|
void *data_buf = ((void*)op_data) + sizeof(osd_primary_op_data_t);
|
||||||
op_data->pg_num = pg_num;
|
op_data->pg_num = pg_num;
|
||||||
op_data->oid = oid;
|
op_data->oid = oid;
|
||||||
op_data->stripes = ((osd_rmw_stripe_t*)(op_data+1));
|
op_data->stripes = (osd_rmw_stripe_t*)data_buf;
|
||||||
|
data_buf += sizeof(osd_rmw_stripe_t) * stripe_count;
|
||||||
op_data->scheme = pool_cfg.scheme;
|
op_data->scheme = pool_cfg.scheme;
|
||||||
op_data->pg_data_size = pg_data_size;
|
op_data->pg_data_size = pg_data_size;
|
||||||
|
op_data->pg_size = pg_it->second.pg_size;
|
||||||
cur_op->op_data = op_data;
|
cur_op->op_data = op_data;
|
||||||
split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes);
|
split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes);
|
||||||
// Allocate bitmaps along with stripes to avoid extra allocations and fragmentation
|
// Allocate bitmaps along with stripes to avoid extra allocations and fragmentation
|
||||||
for (int i = 0; i < stripe_count; i++)
|
for (int i = 0; i < stripe_count; i++)
|
||||||
{
|
{
|
||||||
op_data->stripes[i].bmp_buf = (void*)(op_data->stripes+stripe_count) + clean_entry_bitmap_size*i;
|
op_data->stripes[i].bmp_buf = data_buf;
|
||||||
|
data_buf += clean_entry_bitmap_size;
|
||||||
|
}
|
||||||
|
op_data->chain_size = chain_size;
|
||||||
|
if (chain_size > 0)
|
||||||
|
{
|
||||||
|
op_data->read_chain = (inode_t*)data_buf;
|
||||||
|
data_buf += sizeof(inode_t) * chain_size;
|
||||||
|
op_data->snapshot_bitmaps = data_buf;
|
||||||
|
data_buf += chain_size * stripe_count * clean_entry_bitmap_size;
|
||||||
|
op_data->missing_flags = (uint8_t*)data_buf;
|
||||||
|
data_buf += chain_size * (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 0 : pg_it->second.pg_size);
|
||||||
|
// Copy chain
|
||||||
|
int chain_num = 0;
|
||||||
|
op_data->read_chain[chain_num++] = cur_op->req.rw.inode;
|
||||||
|
auto inode_it = st_cli.inode_config.find(cur_op->req.rw.inode);
|
||||||
|
while (inode_it != st_cli.inode_config.end() && inode_it->second.parent_id)
|
||||||
|
{
|
||||||
|
op_data->read_chain[chain_num++] = inode_it->second.parent_id;
|
||||||
|
inode_it = st_cli.inode_config.find(inode_it->second.parent_id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pg_it->second.inflight++;
|
pg_it->second.inflight++;
|
||||||
return true;
|
return true;
|
||||||
|
@ -106,10 +168,17 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
cur_op->reply.rw.bitmap_len = 0;
|
|
||||||
osd_primary_op_data_t *op_data = cur_op->op_data;
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
if (op_data->st == 1) goto resume_1;
|
if (op_data->chain_size)
|
||||||
else if (op_data->st == 2) goto resume_2;
|
{
|
||||||
|
continue_chained_read(cur_op);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (op_data->st == 1)
|
||||||
|
goto resume_1;
|
||||||
|
else if (op_data->st == 2)
|
||||||
|
goto resume_2;
|
||||||
|
cur_op->reply.rw.bitmap_len = 0;
|
||||||
{
|
{
|
||||||
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
|
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
|
||||||
for (int role = 0; role < op_data->pg_data_size; role++)
|
for (int role = 0; role < op_data->pg_data_size; role++)
|
||||||
|
@ -124,8 +193,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
|
||||||
{
|
{
|
||||||
// Fast happy-path
|
// Fast happy-path
|
||||||
cur_op->buf = alloc_read_buffer(op_data->stripes, op_data->pg_data_size, 0);
|
cur_op->buf = alloc_read_buffer(op_data->stripes, op_data->pg_data_size, 0);
|
||||||
submit_primary_subops(SUBMIT_READ, op_data->target_ver,
|
submit_primary_subops(SUBMIT_READ, op_data->target_ver, pg.cur_set.data(), cur_op);
|
||||||
(op_data->scheme == POOL_SCHEME_REPLICATED ? pg.pg_size : op_data->pg_data_size), pg.cur_set.data(), cur_op);
|
|
||||||
op_data->st = 1;
|
op_data->st = 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -142,7 +210,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
|
||||||
op_data->scheme = pg.scheme;
|
op_data->scheme = pg.scheme;
|
||||||
op_data->degraded = 1;
|
op_data->degraded = 1;
|
||||||
cur_op->buf = alloc_read_buffer(op_data->stripes, pg.pg_size, 0);
|
cur_op->buf = alloc_read_buffer(op_data->stripes, pg.pg_size, 0);
|
||||||
submit_primary_subops(SUBMIT_READ, op_data->target_ver, pg.pg_size, cur_set, cur_op);
|
submit_primary_subops(SUBMIT_READ, op_data->target_ver, cur_set, cur_op);
|
||||||
op_data->st = 1;
|
op_data->st = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -265,7 +333,7 @@ resume_1:
|
||||||
// Determine which OSDs contain this object and delete it
|
// Determine which OSDs contain this object and delete it
|
||||||
op_data->prev_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), &op_data->object_state);
|
op_data->prev_set = get_object_osd_set(pg, op_data->oid, pg.cur_set.data(), &op_data->object_state);
|
||||||
// Submit 1 read to determine the actual version number
|
// Submit 1 read to determine the actual version number
|
||||||
submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, pg.pg_size, op_data->prev_set, cur_op);
|
submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, op_data->prev_set, cur_op);
|
||||||
resume_2:
|
resume_2:
|
||||||
op_data->st = 2;
|
op_data->st = 2;
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -31,15 +31,31 @@ struct osd_primary_op_data_t
|
||||||
uint64_t *prev_set = NULL;
|
uint64_t *prev_set = NULL;
|
||||||
pg_osd_set_state_t *object_state = NULL;
|
pg_osd_set_state_t *object_state = NULL;
|
||||||
|
|
||||||
// for sync. oops, requires freeing
|
union
|
||||||
std::vector<unstable_osd_num_t> *unstable_write_osds = NULL;
|
{
|
||||||
pool_pg_num_t *dirty_pgs = NULL;
|
struct
|
||||||
int dirty_pg_count = 0;
|
{
|
||||||
osd_num_t *dirty_osds = NULL;
|
// for sync. oops, requires freeing
|
||||||
int dirty_osd_count = 0;
|
std::vector<unstable_osd_num_t> *unstable_write_osds;
|
||||||
obj_ver_id *unstable_writes = NULL;
|
pool_pg_num_t *dirty_pgs;
|
||||||
obj_ver_osd_t *copies_to_delete = NULL;
|
int dirty_pg_count;
|
||||||
int copies_to_delete_count = 0;
|
osd_num_t *dirty_osds;
|
||||||
|
int dirty_osd_count;
|
||||||
|
obj_ver_id *unstable_writes;
|
||||||
|
obj_ver_osd_t *copies_to_delete;
|
||||||
|
int copies_to_delete_count;
|
||||||
|
};
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
// for read_bitmaps
|
||||||
|
void *snapshot_bitmaps;
|
||||||
|
inode_t *read_chain;
|
||||||
|
uint8_t *missing_flags;
|
||||||
|
int chain_size;
|
||||||
|
osd_chain_read_t *chain_reads;
|
||||||
|
int chain_read_count;
|
||||||
|
};
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
bool contains_osd(osd_num_t *osd_set, uint64_t size, osd_num_t osd_num);
|
bool contains_osd(osd_num_t *osd_set, uint64_t size, osd_num_t osd_num);
|
||||||
|
|
|
@ -0,0 +1,549 @@
|
||||||
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
#include "osd_primary.h"
|
||||||
|
#include "allocator.h"
|
||||||
|
|
||||||
|
void osd_t::continue_chained_read(osd_op_t *cur_op)
|
||||||
|
{
|
||||||
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
|
||||||
|
if (op_data->st == 1)
|
||||||
|
goto resume_1;
|
||||||
|
else if (op_data->st == 2)
|
||||||
|
goto resume_2;
|
||||||
|
else if (op_data->st == 3)
|
||||||
|
goto resume_3;
|
||||||
|
else if (op_data->st == 4)
|
||||||
|
goto resume_4;
|
||||||
|
cur_op->reply.rw.bitmap_len = 0;
|
||||||
|
for (int role = 0; role < op_data->pg_data_size; role++)
|
||||||
|
{
|
||||||
|
op_data->stripes[role].read_start = op_data->stripes[role].req_start;
|
||||||
|
op_data->stripes[role].read_end = op_data->stripes[role].req_end;
|
||||||
|
}
|
||||||
|
resume_1:
|
||||||
|
resume_2:
|
||||||
|
// Read bitmaps
|
||||||
|
if (read_bitmaps(cur_op, pg, 1) != 0)
|
||||||
|
return;
|
||||||
|
// Prepare & submit reads
|
||||||
|
if (submit_chained_read_requests(pg, cur_op) != 0)
|
||||||
|
return;
|
||||||
|
if (op_data->n_subops > 0)
|
||||||
|
{
|
||||||
|
// Wait for reads
|
||||||
|
op_data->st = 3;
|
||||||
|
resume_3:
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
resume_4:
|
||||||
|
if (op_data->errors > 0)
|
||||||
|
{
|
||||||
|
free(op_data->chain_reads);
|
||||||
|
op_data->chain_reads = NULL;
|
||||||
|
finish_op(cur_op, op_data->epipe > 0 ? -EPIPE : -EIO);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
send_chained_read_results(pg, cur_op);
|
||||||
|
finish_op(cur_op, cur_op->req.rw.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
int osd_t::read_bitmaps(osd_op_t *cur_op, pg_t & pg, int base_state)
|
||||||
|
{
|
||||||
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
if (op_data->st == base_state)
|
||||||
|
goto resume_0;
|
||||||
|
else if (op_data->st == base_state+1)
|
||||||
|
goto resume_1;
|
||||||
|
if (pg.state == PG_ACTIVE && pg.scheme == POOL_SCHEME_REPLICATED)
|
||||||
|
{
|
||||||
|
// Happy path for clean replicated PGs (all bitmaps are available locally)
|
||||||
|
for (int chain_num = 0; chain_num < op_data->chain_size; chain_num++)
|
||||||
|
{
|
||||||
|
object_id cur_oid = { .inode = op_data->read_chain[chain_num], .stripe = op_data->oid.stripe };
|
||||||
|
auto vo_it = pg.ver_override.find(cur_oid);
|
||||||
|
auto read_version = (vo_it != pg.ver_override.end() ? vo_it->second : UINT64_MAX);
|
||||||
|
// Read bitmap synchronously from the local database
|
||||||
|
bs->read_bitmap(cur_oid, read_version, op_data->snapshot_bitmaps + chain_num*clean_entry_bitmap_size, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (submit_bitmap_subops(cur_op, pg) < 0)
|
||||||
|
{
|
||||||
|
// Failure
|
||||||
|
finish_op(cur_op, -EIO);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
resume_0:
|
||||||
|
if (op_data->n_subops > 0)
|
||||||
|
{
|
||||||
|
// Wait for subops
|
||||||
|
op_data->st = base_state;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
resume_1:
|
||||||
|
if (pg.scheme != POOL_SCHEME_REPLICATED)
|
||||||
|
{
|
||||||
|
for (int chain_num = 0; chain_num < op_data->chain_size; chain_num++)
|
||||||
|
{
|
||||||
|
// Check if we need to reconstruct any bitmaps
|
||||||
|
for (int i = 0; i < pg.pg_size; i++)
|
||||||
|
{
|
||||||
|
if (op_data->missing_flags[chain_num*pg.pg_size + i])
|
||||||
|
{
|
||||||
|
osd_rmw_stripe_t local_stripes[pg.pg_size] = { 0 };
|
||||||
|
for (i = 0; i < pg.pg_size; i++)
|
||||||
|
{
|
||||||
|
local_stripes[i].missing = op_data->missing_flags[chain_num*pg.pg_size + i] && true;
|
||||||
|
local_stripes[i].bmp_buf = op_data->snapshot_bitmaps + (chain_num*pg.pg_size + i)*clean_entry_bitmap_size;
|
||||||
|
local_stripes[i].read_start = local_stripes[i].read_end = 1;
|
||||||
|
}
|
||||||
|
if (pg.scheme == POOL_SCHEME_XOR)
|
||||||
|
{
|
||||||
|
reconstruct_stripes_xor(local_stripes, pg.pg_size, clean_entry_bitmap_size);
|
||||||
|
}
|
||||||
|
else if (pg.scheme == POOL_SCHEME_JERASURE)
|
||||||
|
{
|
||||||
|
reconstruct_stripes_jerasure(local_stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int osd_t::collect_bitmap_requests(osd_op_t *cur_op, pg_t & pg, std::vector<bitmap_request_t> & bitmap_requests)
|
||||||
|
{
|
||||||
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
for (int chain_num = 0; chain_num < op_data->chain_size; chain_num++)
|
||||||
|
{
|
||||||
|
object_id cur_oid = { .inode = op_data->read_chain[chain_num], .stripe = op_data->oid.stripe };
|
||||||
|
auto vo_it = pg.ver_override.find(cur_oid);
|
||||||
|
uint64_t target_version = vo_it != pg.ver_override.end() ? vo_it->second : UINT64_MAX;
|
||||||
|
pg_osd_set_state_t *object_state;
|
||||||
|
uint64_t* cur_set = get_object_osd_set(pg, cur_oid, pg.cur_set.data(), &object_state);
|
||||||
|
if (pg.scheme == POOL_SCHEME_REPLICATED)
|
||||||
|
{
|
||||||
|
osd_num_t read_target = 0;
|
||||||
|
for (int i = 0; i < pg.pg_size; i++)
|
||||||
|
{
|
||||||
|
if (cur_set[i] == this->osd_num || cur_set[i] != 0 && read_target == 0)
|
||||||
|
{
|
||||||
|
// Select local or any other available OSD for reading
|
||||||
|
read_target = cur_set[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(read_target != 0);
|
||||||
|
bitmap_requests.push_back((bitmap_request_t){
|
||||||
|
.osd_num = read_target,
|
||||||
|
.oid = cur_oid,
|
||||||
|
.version = target_version,
|
||||||
|
.bmp_buf = op_data->snapshot_bitmaps + chain_num*clean_entry_bitmap_size,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
osd_rmw_stripe_t local_stripes[pg.pg_size];
|
||||||
|
memcpy(local_stripes, op_data->stripes, sizeof(osd_rmw_stripe_t) * pg.pg_size);
|
||||||
|
if (extend_missing_stripes(local_stripes, cur_set, pg.pg_data_size, pg.pg_size) < 0)
|
||||||
|
{
|
||||||
|
free(op_data->snapshot_bitmaps);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
int need_at_least = 0;
|
||||||
|
for (int i = 0; i < pg.pg_size; i++)
|
||||||
|
{
|
||||||
|
if (local_stripes[i].read_end != 0 && cur_set[i] == 0)
|
||||||
|
{
|
||||||
|
// We need this part of the bitmap, but it's unavailable
|
||||||
|
need_at_least = pg.pg_data_size;
|
||||||
|
op_data->missing_flags[chain_num*pg.pg_size + i] = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
op_data->missing_flags[chain_num*pg.pg_size + i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int found = 0;
|
||||||
|
for (int i = 0; i < pg.pg_size; i++)
|
||||||
|
{
|
||||||
|
if (cur_set[i] != 0 && (local_stripes[i].read_end != 0 || found < need_at_least))
|
||||||
|
{
|
||||||
|
// Read part of the bitmap
|
||||||
|
bitmap_requests.push_back((bitmap_request_t){
|
||||||
|
.osd_num = cur_set[i],
|
||||||
|
.oid = {
|
||||||
|
.inode = cur_oid.inode,
|
||||||
|
.stripe = cur_oid.stripe | i,
|
||||||
|
},
|
||||||
|
.version = target_version,
|
||||||
|
.bmp_buf = op_data->snapshot_bitmaps + (chain_num*pg.pg_size + i)*clean_entry_bitmap_size,
|
||||||
|
});
|
||||||
|
found++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Already checked by extend_missing_stripes, so it's fine to use assert
|
||||||
|
assert(found >= need_at_least);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::sort(bitmap_requests.begin(), bitmap_requests.end());
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int osd_t::submit_bitmap_subops(osd_op_t *cur_op, pg_t & pg)
|
||||||
|
{
|
||||||
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
std::vector<bitmap_request_t> *bitmap_requests = new std::vector<bitmap_request_t>();
|
||||||
|
if (collect_bitmap_requests(cur_op, pg, *bitmap_requests) < 0)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
op_data->n_subops = 0;
|
||||||
|
for (int i = 0; i < bitmap_requests->size(); i++)
|
||||||
|
{
|
||||||
|
if ((i == bitmap_requests->size()-1 || (*bitmap_requests)[i+1].osd_num != (*bitmap_requests)[i].osd_num) &&
|
||||||
|
(*bitmap_requests)[i].osd_num != this->osd_num)
|
||||||
|
{
|
||||||
|
op_data->n_subops++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (op_data->n_subops)
|
||||||
|
{
|
||||||
|
op_data->fact_ver = 0;
|
||||||
|
op_data->done = op_data->errors = 0;
|
||||||
|
op_data->subops = new osd_op_t[op_data->n_subops];
|
||||||
|
}
|
||||||
|
for (int i = 0, subop_idx = 0, prev = 0; i < bitmap_requests->size(); i++)
|
||||||
|
{
|
||||||
|
if (i == bitmap_requests->size()-1 || (*bitmap_requests)[i+1].osd_num != (*bitmap_requests)[i].osd_num)
|
||||||
|
{
|
||||||
|
osd_num_t subop_osd_num = (*bitmap_requests)[i].osd_num;
|
||||||
|
if (subop_osd_num == this->osd_num)
|
||||||
|
{
|
||||||
|
// Read bitmap synchronously from the local database
|
||||||
|
for (int j = prev; j <= i; j++)
|
||||||
|
{
|
||||||
|
bs->read_bitmap((*bitmap_requests)[j].oid, (*bitmap_requests)[j].version, (*bitmap_requests)[j].bmp_buf, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Send to a remote OSD
|
||||||
|
osd_op_t *subop = op_data->subops+subop_idx;
|
||||||
|
subop->op_type = OSD_OP_OUT;
|
||||||
|
subop->peer_fd = c_cli.osd_peer_fds.at(subop_osd_num);
|
||||||
|
// FIXME: Use the pre-allocated buffer
|
||||||
|
subop->buf = malloc_or_die(sizeof(obj_ver_id)*(i+1-prev));
|
||||||
|
subop->req = (osd_any_op_t){
|
||||||
|
.sec_read_bmp = {
|
||||||
|
.header = {
|
||||||
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
|
.id = c_cli.next_subop_id++,
|
||||||
|
.opcode = OSD_OP_SEC_READ_BMP,
|
||||||
|
},
|
||||||
|
.len = sizeof(obj_ver_id)*(i+1-prev),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
obj_ver_id *ov = (obj_ver_id*)subop->buf;
|
||||||
|
for (int j = prev; j <= i; j++, ov++)
|
||||||
|
{
|
||||||
|
ov->oid = (*bitmap_requests)[j].oid;
|
||||||
|
ov->version = (*bitmap_requests)[j].version;
|
||||||
|
}
|
||||||
|
subop->callback = [cur_op, bitmap_requests, prev, i, this](osd_op_t *subop)
|
||||||
|
{
|
||||||
|
int requested_count = subop->req.sec_read_bmp.len / sizeof(obj_ver_id);
|
||||||
|
if (subop->reply.hdr.retval == requested_count * (8 + clean_entry_bitmap_size))
|
||||||
|
{
|
||||||
|
void *cur_buf = subop->buf + 8;
|
||||||
|
for (int j = prev; j <= i; j++)
|
||||||
|
{
|
||||||
|
memcpy((*bitmap_requests)[j].bmp_buf, cur_buf, clean_entry_bitmap_size);
|
||||||
|
cur_buf += 8 + clean_entry_bitmap_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((cur_op->op_data->errors + cur_op->op_data->done + 1) >= cur_op->op_data->n_subops)
|
||||||
|
{
|
||||||
|
delete bitmap_requests;
|
||||||
|
}
|
||||||
|
handle_primary_subop(subop, cur_op);
|
||||||
|
};
|
||||||
|
c_cli.outbox_push(subop);
|
||||||
|
subop_idx++;
|
||||||
|
}
|
||||||
|
prev = i+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!op_data->n_subops)
|
||||||
|
{
|
||||||
|
delete bitmap_requests;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<osd_chain_read_t> osd_t::collect_chained_read_requests(osd_op_t *cur_op)
|
||||||
|
{
|
||||||
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
std::vector<osd_chain_read_t> chain_reads;
|
||||||
|
int stripe_count = (op_data->scheme == POOL_SCHEME_REPLICATED ? 1 : op_data->pg_size);
|
||||||
|
memset(op_data->stripes[0].bmp_buf, 0, stripe_count * clean_entry_bitmap_size);
|
||||||
|
uint8_t *global_bitmap = (uint8_t*)op_data->stripes[0].bmp_buf;
|
||||||
|
// We always use at most 1 read request per layer
|
||||||
|
for (int chain_pos = 0; chain_pos < op_data->chain_size; chain_pos++)
|
||||||
|
{
|
||||||
|
uint8_t *part_bitmap = ((uint8_t*)op_data->snapshot_bitmaps) + chain_pos*stripe_count*clean_entry_bitmap_size;
|
||||||
|
int start = (cur_op->req.rw.offset - op_data->oid.stripe)/bs_bitmap_granularity;
|
||||||
|
int end = start + cur_op->req.rw.len/bs_bitmap_granularity;
|
||||||
|
// Skip unneeded part in the beginning
|
||||||
|
while (start < end && (
|
||||||
|
((global_bitmap[start>>3] >> (start&7)) & 1) ||
|
||||||
|
!((part_bitmap[start>>3] >> (start&7)) & 1)))
|
||||||
|
{
|
||||||
|
start++;
|
||||||
|
}
|
||||||
|
// Skip unneeded part in the end
|
||||||
|
while (start < end && (
|
||||||
|
((global_bitmap[(end-1)>>3] >> ((end-1)&7)) & 1) ||
|
||||||
|
!((part_bitmap[(end-1)>>3] >> ((end-1)&7)) & 1)))
|
||||||
|
{
|
||||||
|
end--;
|
||||||
|
}
|
||||||
|
if (start < end)
|
||||||
|
{
|
||||||
|
// Copy (OR) bits in between
|
||||||
|
int cur = start;
|
||||||
|
for (; cur < end && (cur & 0x7); cur++)
|
||||||
|
{
|
||||||
|
global_bitmap[cur>>3] = global_bitmap[cur>>3] | (part_bitmap[cur>>3] & (1 << (cur&7)));
|
||||||
|
}
|
||||||
|
for (; cur <= end-8; cur += 8)
|
||||||
|
{
|
||||||
|
global_bitmap[cur>>3] = global_bitmap[cur>>3] | part_bitmap[cur>>3];
|
||||||
|
}
|
||||||
|
for (; cur < end; cur++)
|
||||||
|
{
|
||||||
|
global_bitmap[cur>>3] = global_bitmap[cur>>3] | (part_bitmap[cur>>3] & (1 << (cur&7)));
|
||||||
|
}
|
||||||
|
// Add request
|
||||||
|
chain_reads.push_back((osd_chain_read_t){
|
||||||
|
.chain_pos = chain_pos,
|
||||||
|
.inode = op_data->read_chain[chain_pos],
|
||||||
|
.offset = start*bs_bitmap_granularity,
|
||||||
|
.len = (end-start)*bs_bitmap_granularity,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return chain_reads;
|
||||||
|
}
|
||||||
|
|
||||||
|
int osd_t::submit_chained_read_requests(pg_t & pg, osd_op_t *cur_op)
|
||||||
|
{
|
||||||
|
// Decide which parts of which objects we need to read based on bitmaps
|
||||||
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
auto chain_reads = collect_chained_read_requests(cur_op);
|
||||||
|
int stripe_count = (pg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg.pg_size);
|
||||||
|
op_data->chain_read_count = chain_reads.size();
|
||||||
|
op_data->chain_reads = (osd_chain_read_t*)calloc_or_die(
|
||||||
|
1, (sizeof(osd_chain_read_t) + sizeof(osd_rmw_stripe_t)*stripe_count) * chain_reads.size()
|
||||||
|
);
|
||||||
|
osd_rmw_stripe_t *chain_stripes = (osd_rmw_stripe_t*)(((void*)op_data->chain_reads) + sizeof(osd_chain_read_t) * op_data->chain_read_count);
|
||||||
|
// Now process each subrequest as a separate read, including reconstruction if needed
|
||||||
|
// Prepare reads
|
||||||
|
int n_subops = 0;
|
||||||
|
uint64_t read_buffer_size = 0;
|
||||||
|
for (int cri = 0; cri < chain_reads.size(); cri++)
|
||||||
|
{
|
||||||
|
op_data->chain_reads[cri] = chain_reads[cri];
|
||||||
|
object_id cur_oid = { .inode = chain_reads[cri].inode, .stripe = op_data->oid.stripe };
|
||||||
|
// FIXME: maybe introduce split_read_stripes to shorten these lines and to remove read_start=req_start
|
||||||
|
osd_rmw_stripe_t *stripes = chain_stripes + cri*stripe_count;
|
||||||
|
split_stripes(pg.pg_data_size, bs_block_size, chain_reads[cri].offset, chain_reads[cri].len, stripes);
|
||||||
|
if (op_data->scheme == POOL_SCHEME_REPLICATED && !stripes[0].req_end)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (int role = 0; role < op_data->pg_data_size; role++)
|
||||||
|
{
|
||||||
|
stripes[role].read_start = stripes[role].req_start;
|
||||||
|
stripes[role].read_end = stripes[role].req_end;
|
||||||
|
}
|
||||||
|
uint64_t *cur_set = pg.cur_set.data();
|
||||||
|
if (pg.state != PG_ACTIVE && op_data->scheme != POOL_SCHEME_REPLICATED)
|
||||||
|
{
|
||||||
|
pg_osd_set_state_t *object_state;
|
||||||
|
cur_set = get_object_osd_set(pg, cur_oid, pg.cur_set.data(), &object_state);
|
||||||
|
if (extend_missing_stripes(stripes, cur_set, pg.pg_data_size, pg.pg_size) < 0)
|
||||||
|
{
|
||||||
|
free(op_data->chain_reads);
|
||||||
|
op_data->chain_reads = NULL;
|
||||||
|
finish_op(cur_op, -EIO);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
op_data->degraded = 1;
|
||||||
|
}
|
||||||
|
if (op_data->scheme == POOL_SCHEME_REPLICATED)
|
||||||
|
{
|
||||||
|
n_subops++;
|
||||||
|
read_buffer_size += stripes[0].read_end - stripes[0].read_start;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int role = 0; role < pg.pg_size; role++)
|
||||||
|
{
|
||||||
|
if (stripes[role].read_end > 0 && cur_set[role] != 0)
|
||||||
|
n_subops++;
|
||||||
|
if (stripes[role].read_end > 0)
|
||||||
|
read_buffer_size += stripes[role].read_end - stripes[role].read_start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, read_buffer_size);
|
||||||
|
void *cur_buf = cur_op->buf;
|
||||||
|
for (int cri = 0; cri < chain_reads.size(); cri++)
|
||||||
|
{
|
||||||
|
osd_rmw_stripe_t *stripes = chain_stripes + cri*stripe_count;
|
||||||
|
for (int role = 0; role < stripe_count; role++)
|
||||||
|
{
|
||||||
|
if (stripes[role].read_end > 0)
|
||||||
|
{
|
||||||
|
stripes[role].read_buf = cur_buf;
|
||||||
|
stripes[role].bmp_buf = op_data->snapshot_bitmaps + (chain_reads[cri].chain_pos*stripe_count + role)*clean_entry_bitmap_size;
|
||||||
|
cur_buf += stripes[role].read_end - stripes[role].read_start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Submit all reads
|
||||||
|
op_data->fact_ver = UINT64_MAX;
|
||||||
|
op_data->done = op_data->errors = 0;
|
||||||
|
op_data->n_subops = n_subops;
|
||||||
|
if (!n_subops)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
op_data->subops = new osd_op_t[n_subops];
|
||||||
|
int cur_subops = 0;
|
||||||
|
for (int cri = 0; cri < chain_reads.size(); cri++)
|
||||||
|
{
|
||||||
|
osd_rmw_stripe_t *stripes = chain_stripes + cri*stripe_count;
|
||||||
|
if (op_data->scheme == POOL_SCHEME_REPLICATED && !stripes[0].req_end)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
object_id cur_oid = { .inode = chain_reads[cri].inode, .stripe = op_data->oid.stripe };
|
||||||
|
auto vo_it = pg.ver_override.find(cur_oid);
|
||||||
|
uint64_t target_ver = vo_it != pg.ver_override.end() ? vo_it->second : UINT64_MAX;
|
||||||
|
uint64_t *cur_set = pg.cur_set.data();
|
||||||
|
if (pg.state != PG_ACTIVE && op_data->scheme != POOL_SCHEME_REPLICATED)
|
||||||
|
{
|
||||||
|
pg_osd_set_state_t *object_state;
|
||||||
|
cur_set = get_object_osd_set(pg, cur_oid, pg.cur_set.data(), &object_state);
|
||||||
|
}
|
||||||
|
int zero_read = -1;
|
||||||
|
if (op_data->scheme == POOL_SCHEME_REPLICATED)
|
||||||
|
{
|
||||||
|
for (int role = 0; role < op_data->pg_size; role++)
|
||||||
|
if (cur_set[role] == this->osd_num || zero_read == -1)
|
||||||
|
zero_read = role;
|
||||||
|
}
|
||||||
|
cur_subops += submit_primary_subop_batch(SUBMIT_READ, chain_reads[cri].inode, target_ver, stripes, cur_set, cur_op, cur_subops, zero_read);
|
||||||
|
}
|
||||||
|
assert(cur_subops == n_subops);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void osd_t::send_chained_read_results(pg_t & pg, osd_op_t *cur_op)
|
||||||
|
{
|
||||||
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
int stripe_count = (pg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg.pg_size);
|
||||||
|
osd_rmw_stripe_t *chain_stripes = (osd_rmw_stripe_t*)(((void*)op_data->chain_reads) + sizeof(osd_chain_read_t) * op_data->chain_read_count);
|
||||||
|
// Reconstruct parts if needed
|
||||||
|
if (op_data->degraded)
|
||||||
|
{
|
||||||
|
int stripe_count = (pg.scheme == POOL_SCHEME_REPLICATED ? 1 : pg.pg_size);
|
||||||
|
for (int cri = 0; cri < op_data->chain_read_count; cri++)
|
||||||
|
{
|
||||||
|
// Reconstruct missing stripes
|
||||||
|
osd_rmw_stripe_t *stripes = chain_stripes + cri*stripe_count;
|
||||||
|
if (op_data->scheme == POOL_SCHEME_XOR)
|
||||||
|
{
|
||||||
|
reconstruct_stripes_xor(stripes, pg.pg_size, clean_entry_bitmap_size);
|
||||||
|
}
|
||||||
|
else if (op_data->scheme == POOL_SCHEME_JERASURE)
|
||||||
|
{
|
||||||
|
reconstruct_stripes_jerasure(stripes, pg.pg_size, pg.pg_data_size, clean_entry_bitmap_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Send bitmap
|
||||||
|
cur_op->reply.rw.bitmap_len = op_data->pg_data_size * clean_entry_bitmap_size;
|
||||||
|
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||||
|
// And finally compose the result
|
||||||
|
uint64_t sent = 0;
|
||||||
|
int prev_pos = 0, pos = 0;
|
||||||
|
bool prev_set = false;
|
||||||
|
int prev = (cur_op->req.rw.offset - op_data->oid.stripe) / bs_bitmap_granularity;
|
||||||
|
int end = prev + cur_op->req.rw.len/bs_bitmap_granularity;
|
||||||
|
int cur = prev;
|
||||||
|
while (cur <= end)
|
||||||
|
{
|
||||||
|
bool has_bit = false;
|
||||||
|
if (cur < end)
|
||||||
|
{
|
||||||
|
for (pos = 0; pos < op_data->chain_size; pos++)
|
||||||
|
{
|
||||||
|
has_bit = (((uint8_t*)op_data->snapshot_bitmaps)[pos*stripe_count*clean_entry_bitmap_size + cur/8] >> (cur%8)) & 1;
|
||||||
|
if (has_bit)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (has_bit != prev_set || pos != prev_pos || cur == end)
|
||||||
|
{
|
||||||
|
if (cur > prev)
|
||||||
|
{
|
||||||
|
// Send buffer in parts to avoid copying
|
||||||
|
if (!prev_set)
|
||||||
|
{
|
||||||
|
while ((cur-prev) > zero_buffer_size/bs_bitmap_granularity)
|
||||||
|
{
|
||||||
|
cur_op->iov.push_back(zero_buffer, zero_buffer_size);
|
||||||
|
sent += zero_buffer_size;
|
||||||
|
prev += zero_buffer_size/bs_bitmap_granularity;
|
||||||
|
}
|
||||||
|
cur_op->iov.push_back(zero_buffer, (cur-prev)*bs_bitmap_granularity);
|
||||||
|
sent += (cur-prev)*bs_bitmap_granularity;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
osd_rmw_stripe_t *stripes = chain_stripes + prev_pos*stripe_count;
|
||||||
|
while (cur > prev)
|
||||||
|
{
|
||||||
|
int role = prev*bs_bitmap_granularity/bs_block_size;
|
||||||
|
int role_start = prev*bs_bitmap_granularity - role*bs_block_size;
|
||||||
|
int role_end = cur*bs_bitmap_granularity - role*bs_block_size;
|
||||||
|
if (role_end > bs_block_size)
|
||||||
|
role_end = bs_block_size;
|
||||||
|
assert(stripes[role].read_buf);
|
||||||
|
cur_op->iov.push_back(
|
||||||
|
stripes[role].read_buf + (role_start - stripes[role].read_start),
|
||||||
|
role_end - role_start
|
||||||
|
);
|
||||||
|
sent += role_end - role_start;
|
||||||
|
prev += (role_end - role_start)/bs_bitmap_granularity;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prev = cur;
|
||||||
|
prev_pos = pos;
|
||||||
|
prev_set = has_bit;
|
||||||
|
}
|
||||||
|
cur++;
|
||||||
|
}
|
||||||
|
assert(sent == cur_op->req.rw.len);
|
||||||
|
free(op_data->chain_reads);
|
||||||
|
op_data->chain_reads = NULL;
|
||||||
|
}
|
|
@ -76,9 +76,6 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(!cur_op->op_data->subops);
|
assert(!cur_op->op_data->subops);
|
||||||
assert(!cur_op->op_data->unstable_write_osds);
|
|
||||||
assert(!cur_op->op_data->unstable_writes);
|
|
||||||
assert(!cur_op->op_data->dirty_pgs);
|
|
||||||
free(cur_op->op_data);
|
free(cur_op->op_data);
|
||||||
cur_op->op_data = NULL;
|
cur_op->op_data = NULL;
|
||||||
}
|
}
|
||||||
|
@ -106,7 +103,7 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_size, const uint64_t* osd_set, osd_op_t *cur_op)
|
void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, const uint64_t* osd_set, osd_op_t *cur_op)
|
||||||
{
|
{
|
||||||
bool wr = submit_type == SUBMIT_WRITE;
|
bool wr = submit_type == SUBMIT_WRITE;
|
||||||
osd_primary_op_data_t *op_data = cur_op->op_data;
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
@ -114,32 +111,34 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
|
||||||
bool rep = op_data->scheme == POOL_SCHEME_REPLICATED;
|
bool rep = op_data->scheme == POOL_SCHEME_REPLICATED;
|
||||||
// Allocate subops
|
// Allocate subops
|
||||||
int n_subops = 0, zero_read = -1;
|
int n_subops = 0, zero_read = -1;
|
||||||
for (int role = 0; role < pg_size; role++)
|
for (int role = 0; role < op_data->pg_size; role++)
|
||||||
{
|
{
|
||||||
if (osd_set[role] == this->osd_num || osd_set[role] != 0 && zero_read == -1)
|
if (osd_set[role] == this->osd_num || osd_set[role] != 0 && zero_read == -1)
|
||||||
{
|
|
||||||
zero_read = role;
|
zero_read = role;
|
||||||
}
|
|
||||||
if (osd_set[role] != 0 && (wr || !rep && stripes[role].read_end != 0))
|
if (osd_set[role] != 0 && (wr || !rep && stripes[role].read_end != 0))
|
||||||
{
|
|
||||||
n_subops++;
|
n_subops++;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (!n_subops && (submit_type == SUBMIT_RMW_READ || rep))
|
if (!n_subops && (submit_type == SUBMIT_RMW_READ || rep))
|
||||||
{
|
|
||||||
n_subops = 1;
|
n_subops = 1;
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
zero_read = -1;
|
zero_read = -1;
|
||||||
}
|
|
||||||
osd_op_t *subops = new osd_op_t[n_subops];
|
osd_op_t *subops = new osd_op_t[n_subops];
|
||||||
op_data->fact_ver = 0;
|
op_data->fact_ver = 0;
|
||||||
op_data->done = op_data->errors = 0;
|
op_data->done = op_data->errors = 0;
|
||||||
op_data->n_subops = n_subops;
|
op_data->n_subops = n_subops;
|
||||||
op_data->subops = subops;
|
op_data->subops = subops;
|
||||||
int i = 0;
|
int sent = submit_primary_subop_batch(submit_type, op_data->oid.inode, op_version, op_data->stripes, osd_set, cur_op, 0, zero_read);
|
||||||
for (int role = 0; role < pg_size; role++)
|
assert(sent == n_subops);
|
||||||
|
}
|
||||||
|
|
||||||
|
int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t op_version,
|
||||||
|
osd_rmw_stripe_t *stripes, const uint64_t* osd_set, osd_op_t *cur_op, int subop_idx, int zero_read)
|
||||||
|
{
|
||||||
|
bool wr = submit_type == SUBMIT_WRITE;
|
||||||
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
bool rep = op_data->scheme == POOL_SCHEME_REPLICATED;
|
||||||
|
int i = subop_idx;
|
||||||
|
for (int role = 0; role < op_data->pg_size; role++)
|
||||||
{
|
{
|
||||||
// We always submit zero-length writes to all replicas, even if the stripe is not modified
|
// We always submit zero-length writes to all replicas, even if the stripe is not modified
|
||||||
if (!(wr || !rep && stripes[role].read_end != 0 || zero_read == role))
|
if (!(wr || !rep && stripes[role].read_end != 0 || zero_read == role))
|
||||||
|
@ -150,20 +149,21 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
|
||||||
if (role_osd_num != 0)
|
if (role_osd_num != 0)
|
||||||
{
|
{
|
||||||
int stripe_num = rep ? 0 : role;
|
int stripe_num = rep ? 0 : role;
|
||||||
|
osd_op_t *subop = op_data->subops + i;
|
||||||
if (role_osd_num == this->osd_num)
|
if (role_osd_num == this->osd_num)
|
||||||
{
|
{
|
||||||
clock_gettime(CLOCK_REALTIME, &subops[i].tv_begin);
|
clock_gettime(CLOCK_REALTIME, &subop->tv_begin);
|
||||||
subops[i].op_type = (uint64_t)cur_op;
|
subop->op_type = (uint64_t)cur_op;
|
||||||
subops[i].bitmap = stripes[stripe_num].bmp_buf;
|
subop->bitmap = stripes[stripe_num].bmp_buf;
|
||||||
subops[i].bitmap_len = clean_entry_bitmap_size;
|
subop->bitmap_len = clean_entry_bitmap_size;
|
||||||
subops[i].bs_op = new blockstore_op_t({
|
subop->bs_op = new blockstore_op_t({
|
||||||
.opcode = (uint64_t)(wr ? (rep ? BS_OP_WRITE_STABLE : BS_OP_WRITE) : BS_OP_READ),
|
.opcode = (uint64_t)(wr ? (rep ? BS_OP_WRITE_STABLE : BS_OP_WRITE) : BS_OP_READ),
|
||||||
.callback = [subop = &subops[i], this](blockstore_op_t *bs_subop)
|
.callback = [subop, this](blockstore_op_t *bs_subop)
|
||||||
{
|
{
|
||||||
handle_primary_bs_subop(subop);
|
handle_primary_bs_subop(subop);
|
||||||
},
|
},
|
||||||
.oid = {
|
.oid = {
|
||||||
.inode = op_data->oid.inode,
|
.inode = inode,
|
||||||
.stripe = op_data->oid.stripe | stripe_num,
|
.stripe = op_data->oid.stripe | stripe_num,
|
||||||
},
|
},
|
||||||
.version = op_version,
|
.version = op_version,
|
||||||
|
@ -175,26 +175,26 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
|
||||||
#ifdef OSD_DEBUG
|
#ifdef OSD_DEBUG
|
||||||
printf(
|
printf(
|
||||||
"Submit %s to local: %lx:%lx v%lu %u-%u\n", wr ? "write" : "read",
|
"Submit %s to local: %lx:%lx v%lu %u-%u\n", wr ? "write" : "read",
|
||||||
op_data->oid.inode, op_data->oid.stripe | stripe_num, op_version,
|
inode, op_data->oid.stripe | stripe_num, op_version,
|
||||||
subops[i].bs_op->offset, subops[i].bs_op->len
|
subop->bs_op->offset, subop->bs_op->len
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
bs->enqueue_op(subops[i].bs_op);
|
bs->enqueue_op(subop->bs_op);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
subops[i].op_type = OSD_OP_OUT;
|
subop->op_type = OSD_OP_OUT;
|
||||||
subops[i].peer_fd = c_cli.osd_peer_fds.at(role_osd_num);
|
subop->peer_fd = c_cli.osd_peer_fds.at(role_osd_num);
|
||||||
subops[i].bitmap = stripes[stripe_num].bmp_buf;
|
subop->bitmap = stripes[stripe_num].bmp_buf;
|
||||||
subops[i].bitmap_len = clean_entry_bitmap_size;
|
subop->bitmap_len = clean_entry_bitmap_size;
|
||||||
subops[i].req.sec_rw = {
|
subop->req.sec_rw = {
|
||||||
.header = {
|
.header = {
|
||||||
.magic = SECONDARY_OSD_OP_MAGIC,
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
.id = c_cli.next_subop_id++,
|
.id = c_cli.next_subop_id++,
|
||||||
.opcode = (uint64_t)(wr ? (rep ? OSD_OP_SEC_WRITE_STABLE : OSD_OP_SEC_WRITE) : OSD_OP_SEC_READ),
|
.opcode = (uint64_t)(wr ? (rep ? OSD_OP_SEC_WRITE_STABLE : OSD_OP_SEC_WRITE) : OSD_OP_SEC_READ),
|
||||||
},
|
},
|
||||||
.oid = {
|
.oid = {
|
||||||
.inode = op_data->oid.inode,
|
.inode = inode,
|
||||||
.stripe = op_data->oid.stripe | stripe_num,
|
.stripe = op_data->oid.stripe | stripe_num,
|
||||||
},
|
},
|
||||||
.version = op_version,
|
.version = op_version,
|
||||||
|
@ -205,33 +205,34 @@ void osd_t::submit_primary_subops(int submit_type, uint64_t op_version, int pg_s
|
||||||
#ifdef OSD_DEBUG
|
#ifdef OSD_DEBUG
|
||||||
printf(
|
printf(
|
||||||
"Submit %s to osd %lu: %lx:%lx v%lu %u-%u\n", wr ? "write" : "read", role_osd_num,
|
"Submit %s to osd %lu: %lx:%lx v%lu %u-%u\n", wr ? "write" : "read", role_osd_num,
|
||||||
op_data->oid.inode, op_data->oid.stripe | stripe_num, op_version,
|
inode, op_data->oid.stripe | stripe_num, op_version,
|
||||||
subops[i].req.sec_rw.offset, subops[i].req.sec_rw.len
|
subop->req.sec_rw.offset, subop->req.sec_rw.len
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
if (wr)
|
if (wr)
|
||||||
{
|
{
|
||||||
if (stripes[stripe_num].write_end > stripes[stripe_num].write_start)
|
if (stripes[stripe_num].write_end > stripes[stripe_num].write_start)
|
||||||
{
|
{
|
||||||
subops[i].iov.push_back(stripes[stripe_num].write_buf, stripes[stripe_num].write_end - stripes[stripe_num].write_start);
|
subop->iov.push_back(stripes[stripe_num].write_buf, stripes[stripe_num].write_end - stripes[stripe_num].write_start);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (stripes[stripe_num].read_end > stripes[stripe_num].read_start)
|
if (stripes[stripe_num].read_end > stripes[stripe_num].read_start)
|
||||||
{
|
{
|
||||||
subops[i].iov.push_back(stripes[stripe_num].read_buf, stripes[stripe_num].read_end - stripes[stripe_num].read_start);
|
subop->iov.push_back(stripes[stripe_num].read_buf, stripes[stripe_num].read_end - stripes[stripe_num].read_start);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
subops[i].callback = [cur_op, this](osd_op_t *subop)
|
subop->callback = [cur_op, this](osd_op_t *subop)
|
||||||
{
|
{
|
||||||
handle_primary_subop(subop, cur_op);
|
handle_primary_subop(subop, cur_op);
|
||||||
};
|
};
|
||||||
c_cli.outbox_push(&subops[i]);
|
c_cli.outbox_push(subop);
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return i-subop_idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t bs_op_to_osd_op[] = {
|
static uint64_t bs_op_to_osd_op[] = {
|
||||||
|
@ -302,8 +303,13 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
|
||||||
{
|
{
|
||||||
uint64_t opcode = subop->req.hdr.opcode;
|
uint64_t opcode = subop->req.hdr.opcode;
|
||||||
int retval = subop->reply.hdr.retval;
|
int retval = subop->reply.hdr.retval;
|
||||||
int expected = opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE
|
int expected;
|
||||||
|| opcode == OSD_OP_SEC_WRITE_STABLE ? subop->req.sec_rw.len : 0;
|
if (opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE || opcode == OSD_OP_SEC_WRITE_STABLE)
|
||||||
|
expected = subop->req.sec_rw.len;
|
||||||
|
else if (opcode == OSD_OP_SEC_READ_BMP)
|
||||||
|
expected = subop->req.sec_read_bmp.len / sizeof(obj_ver_id) * (8 + clean_entry_bitmap_size);
|
||||||
|
else
|
||||||
|
expected = 0;
|
||||||
osd_primary_op_data_t *op_data = cur_op->op_data;
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
if (retval != expected)
|
if (retval != expected)
|
||||||
{
|
{
|
||||||
|
@ -330,14 +336,17 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
|
||||||
? c_cli.clients[subop->peer_fd]->osd_num : osd_num;
|
? c_cli.clients[subop->peer_fd]->osd_num : osd_num;
|
||||||
printf("subop %lu from osd %lu: version = %lu\n", opcode, peer_osd, version);
|
printf("subop %lu from osd %lu: version = %lu\n", opcode, peer_osd, version);
|
||||||
#endif
|
#endif
|
||||||
if (op_data->fact_ver != 0 && op_data->fact_ver != version)
|
if (op_data->fact_ver != UINT64_MAX)
|
||||||
{
|
{
|
||||||
throw std::runtime_error(
|
if (op_data->fact_ver != 0 && op_data->fact_ver != version)
|
||||||
"different fact_versions returned from "+std::string(osd_op_names[opcode])+
|
{
|
||||||
" subops: "+std::to_string(version)+" vs "+std::to_string(op_data->fact_ver)
|
throw std::runtime_error(
|
||||||
);
|
"different fact_versions returned from "+std::string(osd_op_names[opcode])+
|
||||||
|
" subops: "+std::to_string(version)+" vs "+std::to_string(op_data->fact_ver)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
op_data->fact_ver = version;
|
||||||
}
|
}
|
||||||
op_data->fact_ver = version;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((op_data->errors + op_data->done) >= op_data->n_subops)
|
if ((op_data->errors + op_data->done) >= op_data->n_subops)
|
||||||
|
|
|
@ -87,7 +87,7 @@ resume_1:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Read required blocks
|
// Read required blocks
|
||||||
submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, pg.pg_size, op_data->prev_set, cur_op);
|
submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, op_data->prev_set, cur_op);
|
||||||
resume_2:
|
resume_2:
|
||||||
op_data->st = 2;
|
op_data->st = 2;
|
||||||
return;
|
return;
|
||||||
|
@ -158,7 +158,7 @@ resume_10:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
submit_primary_subops(SUBMIT_WRITE, op_data->target_ver, pg.pg_size, pg.cur_set.data(), cur_op);
|
submit_primary_subops(SUBMIT_WRITE, op_data->target_ver, pg.cur_set.data(), cur_op);
|
||||||
resume_4:
|
resume_4:
|
||||||
op_data->st = 4;
|
op_data->st = 4;
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -246,20 +246,23 @@ void reconstruct_stripes_jerasure(osd_rmw_stripe_t *stripes, int pg_size, int pg
|
||||||
{
|
{
|
||||||
if (stripes[role].read_end != 0 && stripes[role].missing)
|
if (stripes[role].read_end != 0 && stripes[role].missing)
|
||||||
{
|
{
|
||||||
for (int other = 0; other < pg_size; other++)
|
if (stripes[role].read_end > stripes[role].read_start)
|
||||||
{
|
{
|
||||||
if (stripes[other].read_end != 0 && !stripes[other].missing)
|
for (int other = 0; other < pg_size; other++)
|
||||||
{
|
{
|
||||||
assert(stripes[other].read_start <= stripes[role].read_start);
|
if (stripes[other].read_end != 0 && !stripes[other].missing)
|
||||||
assert(stripes[other].read_end >= stripes[role].read_end);
|
{
|
||||||
data_ptrs[other] = (char*)(stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start));
|
assert(stripes[other].read_start <= stripes[role].read_start);
|
||||||
|
assert(stripes[other].read_end >= stripes[role].read_end);
|
||||||
|
data_ptrs[other] = (char*)(stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
data_ptrs[role] = (char*)stripes[role].read_buf;
|
||||||
|
jerasure_matrix_dotprod(
|
||||||
|
pg_minsize, OSD_JERASURE_W, decoding_matrix+(role*pg_minsize), dm_ids, role,
|
||||||
|
data_ptrs, data_ptrs+pg_minsize, stripes[role].read_end - stripes[role].read_start
|
||||||
|
);
|
||||||
}
|
}
|
||||||
data_ptrs[role] = (char*)stripes[role].read_buf;
|
|
||||||
jerasure_matrix_dotprod(
|
|
||||||
pg_minsize, OSD_JERASURE_W, decoding_matrix+(role*pg_minsize), dm_ids, role,
|
|
||||||
data_ptrs, data_ptrs+pg_minsize, stripes[role].read_end - stripes[role].read_start
|
|
||||||
);
|
|
||||||
for (int other = 0; other < pg_size; other++)
|
for (int other = 0; other < pg_size; other++)
|
||||||
{
|
{
|
||||||
if (stripes[other].read_end != 0 && !stripes[other].missing)
|
if (stripes[other].read_end != 0 && !stripes[other].missing)
|
||||||
|
|
Loading…
Reference in New Issue