Extract object state calculation to a separate file and slightly test it

blocking-uring-test
Vitaliy Filippov 2020-01-24 02:23:27 +03:00
parent d2a3f0c6dd
commit 98efdb78bd
7 changed files with 402 additions and 341 deletions

View File

@ -24,18 +24,21 @@ libblockstore.so: $(BLOCKSTORE_OBJS)
libfio_blockstore.so: ./libblockstore.so fio_engine.cpp json11.o
g++ $(CXXFLAGS) -shared -o libfio_blockstore.so fio_engine.cpp json11.o ./libblockstore.so -ltcmalloc_minimal -luring
OSD_OBJS := osd.o osd_exec_secondary.o osd_read.o osd_send.o osd_peering.o osd_peering_pg.o json11.o
osd_exec_secondary.o: osd_exec_secondary.cpp osd.h osd_ops.h
g++ $(CXXFLAGS) -c -o $@ $<
osd_read.o: osd_read.cpp osd.h osd_ops.h
g++ $(CXXFLAGS) -c -o $@ $<
osd_send.o: osd_send.cpp osd.h osd_ops.h
g++ $(CXXFLAGS) -c -o $@ $<
osd_peering.o: osd_peering.cpp osd.h osd_ops.h
osd_peering.o: osd_peering.cpp osd.h osd_ops.h osd_peering_pg.h
g++ $(CXXFLAGS) -c -o $@ $<
osd.o: osd.cpp osd.h osd_ops.h
osd_peering_pg.o: osd_peering_pg.cpp object_id.h osd_peering_pg.h
g++ $(CXXFLAGS) -c -o $@ $<
osd: ./libblockstore.so osd_main.cpp osd.h osd_ops.h osd.o osd_exec_secondary.o osd_read.o osd_send.o osd_peering.o json11.o
g++ $(CXXFLAGS) -o osd osd_main.cpp osd.o osd_exec_secondary.o osd_read.o osd_send.o osd_peering.o json11.o ./libblockstore.so -ltcmalloc_minimal -luring
osd.o: osd.cpp osd.h osd_ops.h osd_peering_pg.h
g++ $(CXXFLAGS) -c -o $@ $<
osd: ./libblockstore.so osd_main.cpp osd.h osd_ops.h $(OSD_OBJS)
g++ $(CXXFLAGS) -o osd osd_main.cpp $(OSD_OBJS) ./libblockstore.so -ltcmalloc_minimal -luring
stub_osd: stub_osd.cpp osd_ops.h
g++ $(CXXFLAGS) -o stub_osd stub_osd.cpp -ltcmalloc_minimal
@ -44,7 +47,7 @@ libfio_sec_osd.so: fio_sec_osd.cpp osd_ops.h
test_blockstore: ./libblockstore.so test_blockstore.cpp
g++ $(CXXFLAGS) -o test_blockstore test_blockstore.cpp ./libblockstore.so -ltcmalloc_minimal -luring
test: test.cpp
g++ $(CXXFLAGS) -o test test.cpp -luring
test: test.cpp osd_peering_pg.o
g++ $(CXXFLAGS) -o test test.cpp osd_peering_pg.o -luring
test_allocator: test_allocator.cpp allocator.o
g++ $(CXXFLAGS) -o test_allocator test_allocator.cpp allocator.o

View File

@ -14,7 +14,6 @@
#include <vector>
#include <list>
#include <deque>
#include <set>
#include <new>
#include "sparsepp/sparsepp/spp.h"

161
osd.h
View File

@ -10,18 +10,15 @@
#include <arpa/inet.h>
#include <malloc.h>
#include <set>
#include <deque>
#include "blockstore.h"
#include "ringloop.h"
#include "osd_ops.h"
#include "osd_peering_pg.h"
#include "sparsepp/sparsepp/spp.h"
#define STRIPE_NUM(stripe) ((stripe) >> 4)
#define STRIPE_REPLICA(stripe) ((stripe) & 0xf)
#define OSD_OP_IN 0
#define OSD_OP_OUT 1
@ -34,8 +31,13 @@
#define CL_WRITE_DATA 3
#define MAX_EPOLL_EVENTS 16
#define PEER_CONNECTING 1
#define PEER_CONNECTED 2
//#define OSD_STUB
// FIXME: add types for pg_num and osd_num?
struct osd_op_t
{
int op_type;
@ -57,8 +59,13 @@ struct osd_op_t
~osd_op_t();
};
#define PEER_CONNECTING 1
#define PEER_CONNECTED 2
struct osd_peer_def_t
{
uint64_t osd_num = 0;
std::string addr;
int port = 0;
time_t last_connect_attempt = 0;
};
struct osd_client_t
{
@ -96,144 +103,6 @@ struct osd_client_t
int write_state = 0;
};
struct osd_obj_loc_t
{
uint64_t role;
uint64_t osd_num;
bool stable;
};
inline bool operator < (const osd_obj_loc_t &a, const osd_obj_loc_t &b)
{
return a.role < b.role || a.role == b.role && a.osd_num < b.osd_num;
}
struct osd_obj_state_t
{
std::vector<osd_obj_loc_t> loc;
uint64_t state = 0;
uint64_t object_count = 0;
};
struct osd_ver_override_t
{
uint64_t max_ver;
uint64_t target_ver;
};
inline bool operator < (const osd_obj_state_t &a, const osd_obj_state_t &b)
{
return a.loc < b.loc;
}
namespace std
{
template<> struct hash<osd_obj_state_t>
{
inline size_t operator()(const osd_obj_state_t &s) const
{
size_t seed = 0;
for (int i = 0; i < s.loc.size(); i++)
{
// Copy-pasted from spp::hash_combine()
seed ^= (s.loc[i].role + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
seed ^= (s.loc[i].osd_num + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
seed ^= ((s.loc[i].stable ? 1 : 0) + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
}
return seed;
}
};
}
// Placement group states
// Exactly one of these:
#define PG_OFFLINE (1<<0)
#define PG_PEERING (1<<1)
#define PG_INCOMPLETE (1<<2)
#define PG_ACTIVE (1<<3)
// Plus any of these:
#define PG_HAS_UNFOUND (1<<4)
#define PG_HAS_DEGRADED (1<<5)
#define PG_HAS_MISPLACED (1<<6)
// OSD object states
#define OBJ_CLEAN 0x01
#define OBJ_MISPLACED 0x02
#define OBJ_DEGRADED 0x03
#define OBJ_INCOMPLETE 0x04
#define OBJ_NONSTABILIZED 0x10000
#define OBJ_UNDERWRITTEN 0x20000
#define OBJ_OVERCOPIED 0x40000
#define OBJ_BUGGY 0x80000
class osd_t;
struct osd_pg_peering_state_t
{
osd_t* self;
// FIXME: add types for pg_num and osd_num?
uint64_t pg_num;
std::unordered_map<uint64_t, osd_op_t*> list_ops;
int list_done = 0;
};
struct osd_pg_t
{
int state;
uint64_t pg_size = 3, pg_minsize = 2;
uint64_t pg_num;
// target_set = (role => osd_num). role starts from zero
std::vector<uint64_t> target_set;
// moved object map. by default, each object is considered to reside on the target_set.
// this map stores all objects that differ.
// this map may consume up to ~ (raw storage / object size) * 24 bytes in the worst case scenario
// which is up to ~192 MB per 1 TB in the worst case scenario
std::set<osd_obj_state_t> state_dict;
spp::sparse_hash_map<object_id, const osd_obj_state_t*> obj_states;
spp::sparse_hash_map<object_id, osd_ver_override_t> ver_override;
osd_pg_peering_state_t *peering_state = NULL;
};
struct obj_ver_role
{
object_id oid;
uint64_t version;
uint64_t osd_num;
bool is_stable;
};
inline bool operator < (const obj_ver_role & a, const obj_ver_role & b)
{
return a.oid < b.oid ||
// object versions go in descending order
a.oid == b.oid && a.version > b.version ||
a.oid == b.oid && a.version == b.version ||
a.oid == b.oid && a.version == b.version && a.osd_num < b.osd_num;
}
// Max 64 replicas
#define STRIPE_MASK 0x3F
#define STRIPE_SHIFT 6
struct osd_obj_state_check_t
{
int start = 0;
object_id oid = { 0 };
uint64_t max_ver = 0;
uint64_t target_ver = 0;
uint64_t n_copies = 0, has_roles = 0, n_roles = 0, n_stable = 0, n_matched = 0;
bool is_buggy = false;
osd_obj_state_t state_obj;
};
struct osd_peer_def_t
{
uint64_t osd_num = 0;
std::string addr;
int port = 0;
time_t last_connect_attempt = 0;
};
class osd_t
{
// config
@ -250,7 +119,7 @@ class osd_t
// peer OSDs
std::map<uint64_t, int> osd_peer_fds;
std::vector<osd_pg_t> pgs;
std::vector<pg_t> pgs;
int peering_state = 0;
unsigned pg_count = 0;
@ -292,8 +161,6 @@ class osd_t
void init_primary();
void handle_peers();
void start_pg_peering(int i);
void calc_object_states(osd_pg_t &pg);
void remember_object(osd_pg_t &pg, osd_obj_state_check_t &st, std::vector<obj_ver_role> &all, int end);
// op execution
void exec_op(osd_op_t *cur_op);

View File

@ -14,12 +14,10 @@ void osd_t::init_primary()
peers.push_back(parse_peer(config["peer2"]));
if (peers[1].osd_num == peers[0].osd_num)
throw std::runtime_error("peer1 and peer2 osd numbers are the same");
pgs.push_back((osd_pg_t){
pgs.push_back((pg_t){
.state = PG_OFFLINE,
.pg_num = 1,
.target_set = { 1, 2, 3 },
.obj_states = spp::sparse_hash_map<object_id, const osd_obj_state_t*>(),
.ver_override = spp::sparse_hash_map<object_id, osd_ver_override_t>(),
});
pg_count = 1;
peering_state = 1;
@ -172,7 +170,7 @@ void osd_t::handle_peers()
}
else if (pgs[i].peering_state->list_done >= 3)
{
calc_object_states(pgs[i]);
pgs[i].calc_object_states();
peering_state = 0;
}
}
@ -183,30 +181,36 @@ void osd_t::handle_peers()
void osd_t::start_pg_peering(int pg_idx)
{
auto & pg = pgs[pg_idx];
auto ps = pg.peering_state = new osd_pg_peering_state_t();
ps->self = this;
ps->pg_num = pg_idx; // FIXME probably shouldn't be pg_idx
auto ps = pg.peering_state = new pg_peering_state_t();
{
uint64_t osd_num = this->osd_num;
osd_op_t *op = new osd_op_t();
op->op_type = 0;
op->peer_fd = 0;
op->bs_op.opcode = BS_OP_LIST;
op->bs_op.callback = [ps, op](blockstore_op_t *bs_op)
op->bs_op.callback = [ps, op, osd_num](blockstore_op_t *bs_op)
{
if (op->bs_op.retval < 0)
{
throw std::runtime_error("OP_LIST failed");
}
printf(
"Got object list from OSD %lu (local): %d objects (%lu of them stable)\n",
ps->self->osd_num, bs_op->retval, bs_op->version
osd_num, bs_op->retval, bs_op->version
);
op->buf = op->bs_op.buf;
op->reply.hdr.retval = op->bs_op.retval;
op->reply.sec_list.stable_count = op->bs_op.version;
ps->list_results[osd_num] = {
.buf = (obj_ver_id*)op->bs_op.buf,
.total_count = (uint64_t)op->bs_op.retval,
.stable_count = op->bs_op.version,
};
ps->list_done++;
delete op;
};
pg.peering_state->list_ops[osd_num] = op;
bs->enqueue_op(&op->bs_op);
}
for (int i = 0; i < peers.size(); i++)
{
uint64_t osd_num = peers[i].osd_num;
auto & cl = clients[osd_peer_fds[peers[i].osd_num]];
osd_op_t *op = new osd_op_t();
op->op_type = OSD_OP_OUT;
@ -222,161 +226,25 @@ void osd_t::start_pg_peering(int pg_idx)
.pgtotal = 1,
},
};
op->callback = [ps](osd_op_t *op)
op->callback = [ps, osd_num](osd_op_t *op)
{
if (op->reply.hdr.retval < 0)
{
throw std::runtime_error("OP_LIST failed");
}
printf(
"Got object list from OSD %lu: %ld objects (%lu of them stable)\n",
ps->self->clients[op->peer_fd].osd_num, op->reply.hdr.retval,
op->reply.sec_list.stable_count
osd_num, op->reply.hdr.retval, op->reply.sec_list.stable_count
);
ps->list_results[osd_num] = {
.buf = (obj_ver_id*)op->buf,
.total_count = (uint64_t)op->reply.hdr.retval,
.stable_count = op->reply.sec_list.stable_count,
};
op->buf = NULL;
ps->list_done++;
delete op;
};
pg.peering_state->list_ops[cl.osd_num] = op;
outbox_push(cl, op);
}
}
void osd_t::remember_object(osd_pg_t &pg, osd_obj_state_check_t &st, std::vector<obj_ver_role> &all, int end)
{
// Remember the decision
uint64_t state = 0;
if (st.n_roles == pg.pg_size)
{
if (st.n_matched == pg.pg_size)
state = OBJ_CLEAN;
else
state = OBJ_MISPLACED;
}
else if (st.n_roles < pg.pg_minsize)
state = OBJ_INCOMPLETE;
else
state = OBJ_DEGRADED;
if (st.n_copies > pg.pg_size)
state |= OBJ_OVERCOPIED;
if (st.n_stable < st.n_copies)
state |= OBJ_NONSTABILIZED;
if (st.target_ver < st.max_ver)
state |= OBJ_UNDERWRITTEN;
if (st.is_buggy)
state |= OBJ_BUGGY;
if (state != OBJ_CLEAN)
{
st.state_obj.state = state;
st.state_obj.loc.clear();
for (int i = st.start; i < end; i++)
{
st.state_obj.loc.push_back((osd_obj_loc_t){
.role = (all[i].oid.stripe & STRIPE_MASK),
.osd_num = all[i].osd_num,
.stable = all[i].is_stable,
});
}
std::sort(st.state_obj.loc.begin(), st.state_obj.loc.end());
auto ins = pg.state_dict.insert(st.state_obj);
pg.obj_states[st.oid] = &(*(ins.first));
if (state & OBJ_UNDERWRITTEN)
{
pg.ver_override[st.oid] = {
.max_ver = st.max_ver,
.target_ver = st.target_ver,
};
}
}
}
void osd_t::calc_object_states(osd_pg_t &pg)
{
// Copy all object lists into one array
std::vector<obj_ver_role> all;
auto ps = pg.peering_state;
for (auto e: ps->list_ops)
{
osd_op_t* op = e.second;
auto nstab = op->reply.sec_list.stable_count;
auto n = op->reply.hdr.retval;
auto osd_num = clients[op->peer_fd].osd_num;
all.resize(all.size() + n);
obj_ver_id *ov = (obj_ver_id*)op->buf;
for (uint64_t i = 0; i < n; i++, ov++)
{
all[i] = {
.oid = ov->oid,
.version = ov->version,
.osd_num = osd_num,
.is_stable = i < nstab,
};
}
free(op->buf);
op->buf = NULL;
}
// Sort
std::sort(all.begin(), all.end());
// Walk over it and check object states
int replica = 0;
osd_obj_state_check_t st;
for (int i = 0; i < all.size(); i++)
{
if (st.oid.inode != all[i].oid.inode ||
st.oid.stripe != (all[i].oid.stripe >> STRIPE_SHIFT))
{
if (st.oid.inode != 0)
{
// Remember object state
remember_object(pg, st, all, i);
}
st.start = i;
st.oid = { .inode = all[i].oid.inode, .stripe = all[i].oid.stripe >> STRIPE_SHIFT };
st.max_ver = st.target_ver = all[i].version;
st.has_roles = st.n_copies = st.n_roles = st.n_stable = st.n_matched = 0;
st.is_buggy = false;
}
if (st.target_ver != all[i].version)
{
if (st.n_stable > 0 || st.n_roles >= pg.pg_minsize)
{
// Version is either recoverable or stable, choose it as target and skip previous versions
remember_object(pg, st, all, i);
while (i < all.size() && st.oid.inode == all[i].oid.inode &&
st.oid.stripe == (all[i].oid.stripe >> STRIPE_SHIFT))
{
i++;
}
continue;
}
else
{
// Remember that there are newer unrecoverable versions
st.target_ver = all[i].version;
st.has_roles = st.n_copies = st.n_roles = st.n_stable = st.n_matched = 0;
}
}
replica = (all[i].oid.stripe & STRIPE_MASK);
st.n_copies++;
if (replica >= pg.pg_size)
{
// FIXME In the future, check it against the PG epoch number to handle replication factor/scheme changes
st.is_buggy = true;
}
else
{
if (all[i].is_stable)
{
st.n_stable++;
}
else if (pg.target_set[replica] == all[i].osd_num)
{
st.n_matched++;
}
if (!(st.has_roles & (1 << replica)))
{
st.has_roles = st.has_roles | (1 << replica);
st.n_roles++;
}
}
}
if (st.oid.inode != 0)
{
// Remember object state
remember_object(pg, st, all, all.size());
}
}

162
osd_peering_pg.cpp Normal file
View File

@ -0,0 +1,162 @@
#include "osd_peering_pg.h"
void pg_t::remember_object(pg_obj_state_check_t &st, std::vector<obj_ver_role> &all, int end)
{
auto & pg = *this;
// Remember the decision
uint64_t state = 0;
if (st.n_roles == pg.pg_size)
{
if (st.n_matched == pg.pg_size)
state = OBJ_CLEAN;
else
state = OBJ_MISPLACED;
}
else if (st.n_roles < pg.pg_minsize)
state = OBJ_INCOMPLETE;
else
state = OBJ_DEGRADED;
if (st.n_copies > pg.pg_size)
state |= OBJ_OVERCOPIED;
if (st.n_stable < st.n_copies)
state |= OBJ_NONSTABILIZED;
if (st.target_ver < st.max_ver)
state |= OBJ_UNDERWRITTEN;
if (st.is_buggy)
state |= OBJ_BUGGY;
if (state != OBJ_CLEAN)
{
st.osd_set.clear();
for (int i = st.start; i < end; i++)
{
st.osd_set.push_back((pg_obj_loc_t){
.role = (all[i].oid.stripe & STRIPE_MASK),
.osd_num = all[i].osd_num,
.stable = all[i].is_stable,
});
}
std::sort(st.osd_set.begin(), st.osd_set.end());
auto it = pg.state_dict.find(st.osd_set);
if (it == pg.state_dict.end())
{
pg.state_dict[st.osd_set] = {
.osd_set = st.osd_set,
.state = state,
.object_count = 1,
};
it = pg.state_dict.find(st.osd_set);
}
else
it->second.object_count++;
pg.obj_states[st.oid] = &it->second;
if (state & OBJ_UNDERWRITTEN)
{
pg.ver_override[st.oid] = {
.max_ver = st.max_ver,
.target_ver = st.target_ver,
};
}
}
else
pg.clean_count++;
}
void pg_t::calc_object_states()
{
auto & pg = *this;
// Copy all object lists into one array
std::vector<obj_ver_role> all;
auto ps = pg.peering_state;
for (auto it: ps->list_results)
{
auto nstab = it.second.stable_count;
auto n = it.second.total_count;
auto osd_num = it.first;
uint64_t start = all.size();
all.resize(start + n);
obj_ver_id *ov = it.second.buf;
for (uint64_t i = 0; i < n; i++, ov++)
{
all[start+i] = {
.oid = ov->oid,
.version = ov->version,
.osd_num = osd_num,
.is_stable = i < nstab,
};
}
free(it.second.buf);
it.second.buf = NULL;
}
ps->list_results.clear();
// Sort
std::sort(all.begin(), all.end());
// Walk over it and check object states
pg.clean_count = 0;
int replica = 0;
pg_obj_state_check_t st;
for (int i = 0; i < all.size(); i++)
{
if (st.oid.inode != all[i].oid.inode ||
st.oid.stripe != (all[i].oid.stripe >> STRIPE_SHIFT))
{
if (st.oid.inode != 0)
{
// Remember object state
remember_object(st, all, i);
}
st.start = i;
st.oid = { .inode = all[i].oid.inode, .stripe = all[i].oid.stripe >> STRIPE_SHIFT };
st.max_ver = st.target_ver = all[i].version;
st.has_roles = st.n_copies = st.n_roles = st.n_stable = st.n_matched = 0;
st.is_buggy = false;
}
if (st.target_ver != all[i].version)
{
if (st.n_stable > 0 || st.n_roles >= pg.pg_minsize)
{
// Version is either recoverable or stable, choose it as target and skip previous versions
remember_object(st, all, i);
while (i < all.size() && st.oid.inode == all[i].oid.inode &&
st.oid.stripe == (all[i].oid.stripe >> STRIPE_SHIFT))
{
i++;
}
continue;
}
else
{
// Remember that there are newer unrecoverable versions
st.target_ver = all[i].version;
st.has_roles = st.n_copies = st.n_roles = st.n_stable = st.n_matched = 0;
}
}
replica = (all[i].oid.stripe & STRIPE_MASK);
st.n_copies++;
if (replica >= pg.pg_size)
{
// FIXME In the future, check it against the PG epoch number to handle replication factor/scheme changes
st.is_buggy = true;
}
else
{
if (all[i].is_stable)
{
st.n_stable++;
}
if (pg.target_set[replica] == all[i].osd_num)
{
st.n_matched++;
}
if (!(st.has_roles & (1 << replica)))
{
st.has_roles = st.has_roles | (1 << replica);
st.n_roles++;
}
}
}
if (st.oid.inode != 0)
{
// Remember object state
remember_object(st, all, all.size());
}
}

142
osd_peering_pg.h Normal file
View File

@ -0,0 +1,142 @@
#include <map>
#include <vector>
#include <algorithm>
#include "object_id.h"
#include "sparsepp/sparsepp/spp.h"
// Placement group states
// Exactly one of these:
#define PG_OFFLINE (1<<0)
#define PG_PEERING (1<<1)
#define PG_INCOMPLETE (1<<2)
#define PG_ACTIVE (1<<3)
// Plus any of these:
#define PG_HAS_UNFOUND (1<<4)
#define PG_HAS_DEGRADED (1<<5)
#define PG_HAS_MISPLACED (1<<6)
// OSD object states
#define OBJ_CLEAN 0x01
#define OBJ_MISPLACED 0x02
#define OBJ_DEGRADED 0x03
#define OBJ_INCOMPLETE 0x04
#define OBJ_NONSTABILIZED 0x10000
#define OBJ_UNDERWRITTEN 0x20000
#define OBJ_OVERCOPIED 0x40000
#define OBJ_BUGGY 0x80000
// Max 64 replicas
#define STRIPE_MASK 0x3F
#define STRIPE_SHIFT 6
struct pg_obj_loc_t
{
uint64_t role;
uint64_t osd_num;
bool stable;
};
typedef std::vector<pg_obj_loc_t> pg_osd_set_t;
struct pg_osd_set_state_t
{
pg_osd_set_t osd_set;
uint64_t state = 0;
uint64_t object_count = 0;
};
struct pg_ver_override_t
{
uint64_t max_ver;
uint64_t target_ver;
};
struct pg_list_result_t
{
obj_ver_id *buf;
uint64_t total_count;
uint64_t stable_count;
};
struct pg_peering_state_t
{
// osd_num -> list result
spp::sparse_hash_map<uint64_t, pg_list_result_t> list_results;
int list_done = 0;
};
struct pg_obj_state_check_t
{
int start = 0;
object_id oid = { 0 };
uint64_t max_ver = 0;
uint64_t target_ver = 0;
uint64_t n_copies = 0, has_roles = 0, n_roles = 0, n_stable = 0, n_matched = 0;
bool is_buggy = false;
pg_osd_set_t osd_set;
};
struct obj_ver_role
{
object_id oid;
uint64_t version;
uint64_t osd_num;
bool is_stable;
};
struct pg_t
{
int state;
uint64_t pg_size = 3, pg_minsize = 2;
uint64_t pg_num;
uint64_t clean_count = 0;
// target_set = (role => osd_num). role starts from zero
std::vector<uint64_t> target_set;
// moved object map. by default, each object is considered to reside on the target_set.
// this map stores all objects that differ.
// this map may consume up to ~ (raw storage / object size) * 24 bytes in the worst case scenario
// which is up to ~192 MB per 1 TB in the worst case scenario
std::map<pg_osd_set_t, pg_osd_set_state_t> state_dict;
spp::sparse_hash_map<object_id, pg_osd_set_state_t*> obj_states;
spp::sparse_hash_map<object_id, pg_ver_override_t> ver_override;
pg_peering_state_t *peering_state = NULL;
void calc_object_states();
void remember_object(pg_obj_state_check_t &st, std::vector<obj_ver_role> &all, int end);
};
inline bool operator < (const pg_obj_loc_t &a, const pg_obj_loc_t &b)
{
return a.role < b.role || a.role == b.role && a.osd_num < b.osd_num ||
a.role == b.role && a.osd_num == b.osd_num && a.stable < b.stable;
}
inline bool operator < (const obj_ver_role & a, const obj_ver_role & b)
{
return a.oid < b.oid ||
// object versions come in descending order
a.oid == b.oid && a.version > b.version ||
a.oid == b.oid && a.version == b.version ||
a.oid == b.oid && a.version == b.version && a.osd_num < b.osd_num;
}
namespace std
{
template<> struct hash<pg_osd_set_t>
{
inline size_t operator()(const pg_osd_set_t &s) const
{
size_t seed = 0;
for (auto e: s)
{
// Copy-pasted from spp::hash_combine()
seed ^= (e.role + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
seed ^= (e.osd_num + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
seed ^= ((e.stable ? 1 : 0) + 0xc6a4a7935bd1e995 + (seed << 6) + (seed >> 2));
}
return seed;
}
};
}

View File

@ -25,6 +25,7 @@
#include "blockstore.h"
#include "blockstore_impl.h"
#include "osd_peering_pg.h"
static int setup_context(unsigned entries, struct io_uring *ring)
{
@ -285,23 +286,7 @@ int main03(int argc, char *argv[])
return 0;
}
struct obj_ver_role
{
object_id oid;
uint64_t version;
uint32_t osd_num;
uint32_t is_stable;
};
inline bool operator < (const obj_ver_role & a, const obj_ver_role & b)
{
return a.oid < b.oid ||
a.oid == b.oid && a.version < b.version ||
a.oid == b.oid && a.version == b.version ||
a.oid == b.oid && a.version == b.version && a.osd_num < b.osd_num;
}
int main(int argc, char *argv[])
int main04(int argc, char *argv[])
{
/*spp::sparse_hash_set<obj_ver_id> osd1, osd2;
// fill takes 18.9 s
@ -336,19 +321,54 @@ int main(int argc, char *argv[])
{
to_sort[i] = {
.oid = (object_id){
.inode = rand() % 500,
.stripe = rand(),
.inode = (uint64_t)(rand() % 500),
.stripe = (uint64_t)rand(),
},
.version = rand(),
.osd_num = rand() % 16,
.version = (uint64_t)rand(),
.osd_num = (uint64_t)(rand() % 16),
};
}
printf("Sorting\n");
// sorting the whole array takes 7 s
//std::sort(to_sort.begin(), to_sort.end());
// sorting in 3 parts... almost the same, 6 s
std::sort(to_sort.begin(), to_sort.begin() + to_sort.size()/3);
std::sort(to_sort.begin() + to_sort.size()/3, to_sort.begin() + to_sort.size()*2/3);
std::sort(to_sort.begin() + to_sort.size()*2/3, to_sort.end());
std::sort(to_sort.begin(), to_sort.end());
return 0;
}
int main(int argc, char *argv[])
{
// FIXME extract this into a test
pg_t pg = {
.state = PG_PEERING,
.pg_num = 1,
.target_set = { 1, 2, 3 },
.peering_state = new pg_peering_state_t(),
};
pg.peering_state->list_done = 3;
for (uint64_t osd_num = 1; osd_num <= 3; osd_num++)
{
pg_list_result_t r = {
.buf = (obj_ver_id*)malloc(sizeof(obj_ver_id) * 1024*1024*8),
.total_count = 1024*1024*8,
.stable_count = (uint64_t)(1024*1024*8 - (osd_num == 1 ? 10 : 0)),
};
for (uint64_t i = 0; i < r.total_count; i++)
{
r.buf[i] = {
.oid = {
.inode = 1,
.stripe = (i << STRIPE_SHIFT) | (osd_num-1),
},
.version = (uint64_t)(osd_num == 1 && i >= r.total_count - 10 ? 2 : 1),
};
}
pg.peering_state->list_results[osd_num] = r;
}
pg.calc_object_states();
printf("deviation variants=%ld clean=%lu\n", pg.state_dict.size(), pg.clean_count);
for (auto it: pg.state_dict)
{
printf("dev: state=%lx\n", it.second.state);
}
return 0;
}