Report PG history synchronously during write
This has 2 effects: 1) OSD sets aren't added into PG history until actual write attempts anymore which removes unneeded extra osd_sets in PG history 2) New OSD sets are reported synchronously and can't be lost on PG restarts happening at the same time with reconfigurationrm-left-on-dead
parent
37a6aff2fa
commit
a4dfa519af
|
@ -885,7 +885,6 @@ void osd_t::report_pg_states()
|
|||
if (pg.history_changed)
|
||||
{
|
||||
// Prevent race conditions (for the case when the monitor is updating this key at the same time)
|
||||
// FIXME: target_history updates may be lost on PG re-peering
|
||||
pg.history_changed = false;
|
||||
std::string history_key = base64_encode(st_cli.etcd_prefix+"/pg/history/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num));
|
||||
json11::Json::object history_value = {
|
||||
|
|
|
@ -86,21 +86,9 @@ void pg_obj_state_check_t::walk()
|
|||
}
|
||||
if (pg->pg_cursize < pg->pg_size)
|
||||
{
|
||||
// Report PG history and activate
|
||||
// Activate as degraded
|
||||
// Current OSD set will be added into target_history on first write
|
||||
pg->state |= PG_DEGRADED | PG_PEERED;
|
||||
std::vector<osd_num_t> history_set;
|
||||
for (auto peer_osd: pg->cur_set)
|
||||
{
|
||||
if (peer_osd != 0)
|
||||
history_set.push_back(peer_osd);
|
||||
}
|
||||
std::sort(history_set.begin(), history_set.end());
|
||||
auto it = std::lower_bound(pg->target_history.begin(), pg->target_history.end(), history_set);
|
||||
if (it == pg->target_history.end() || *it != history_set)
|
||||
{
|
||||
pg->target_history.insert(it, history_set);
|
||||
pg->history_changed = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -438,7 +426,7 @@ void pg_t::calc_object_states(int log_level)
|
|||
std::sort(st.list.begin(), st.list.end());
|
||||
// Walk over it and check object states
|
||||
st.walk();
|
||||
if (this->state & (PG_DEGRADED|PG_LEFT_ON_DEAD))
|
||||
if (this->state != PG_ACTIVE)
|
||||
{
|
||||
assert(epoch != (((uint64_t)1 << PG_EPOCH_BITS)-1));
|
||||
epoch++;
|
||||
|
|
|
@ -155,9 +155,21 @@ resume_3:
|
|||
if (pg.epoch > pg.reported_epoch)
|
||||
{
|
||||
// Report newer epoch before writing
|
||||
// FIXME: We may report only one PG state here...
|
||||
// FIXME: We don't have to report all changed PG states here
|
||||
this->pg_state_dirty.insert({ .pool_id = pg.pool_id, .pg_num = pg.pg_num });
|
||||
pg.history_changed = true;
|
||||
if (pg.state != PG_ACTIVE)
|
||||
{
|
||||
// Check that current OSD set is in history and/or add it there
|
||||
std::vector<osd_num_t> history_set;
|
||||
for (auto peer_osd: pg.cur_set)
|
||||
if (peer_osd != 0)
|
||||
history_set.push_back(peer_osd);
|
||||
std::sort(history_set.begin(), history_set.end());
|
||||
auto it = std::lower_bound(pg.target_history.begin(), pg.target_history.end(), history_set);
|
||||
if (it == pg.target_history.end() || *it != history_set)
|
||||
pg.target_history.insert(it, history_set);
|
||||
pg.history_changed = true;
|
||||
}
|
||||
report_pg_states();
|
||||
resume_10:
|
||||
if (pg.epoch > pg.reported_epoch)
|
||||
|
@ -166,6 +178,12 @@ resume_10:
|
|||
return;
|
||||
}
|
||||
}
|
||||
// Recheck PG state after reporting history - maybe it's already stopping/restarting
|
||||
if (pg.state & (PG_STOPPING|PG_REPEERING))
|
||||
{
|
||||
pg_cancel_write_queue(pg, cur_op, op_data->oid, -EPIPE);
|
||||
return;
|
||||
}
|
||||
submit_primary_subops(SUBMIT_WRITE, op_data->target_ver, pg.cur_set.data(), cur_op);
|
||||
resume_4:
|
||||
op_data->st = 4;
|
||||
|
|
Loading…
Reference in New Issue