Re-peer PGs after stopping the peer

Fixes the bug where two peers killed at once have lead to PG state PG_DEGRADED|PG_HAS_INCOMPLETE instead of PG_INCOMPLETE
trace-sqes
Vitaliy Filippov 2020-05-23 18:41:28 +03:00
parent e614a98543
commit 7df384031a
1 changed files with 8 additions and 4 deletions

12
osd.cpp
View File

@ -401,12 +401,12 @@ void osd_t::cancel_op(osd_op_t *op)
void osd_t::stop_client(int peer_fd)
{
// FIXME Fix the bug where sometimes a dead peer is undetected which leads to PG DEGRADED|HAS_INCOMPLETE!
auto it = clients.find(peer_fd);
if (it == clients.end())
{
return;
}
uint64_t repeer_osd = 0;
osd_client_t cl = it->second;
if (cl.peer_state == PEER_CONNECTED)
{
@ -415,7 +415,8 @@ void osd_t::stop_client(int peer_fd)
// Reload configuration from etcd when the connection is dropped
printf("[OSD %lu] Stopping client %d (OSD peer %lu)\n", osd_num, peer_fd, cl.osd_num);
st_cli.peer_states.erase(cl.osd_num);
repeer_pgs(cl.osd_num);
repeer_osd = cl.osd_num;
peering_state |= OSD_CONNECTING_PEERS;
}
else
{
@ -429,10 +430,9 @@ void osd_t::stop_client(int peer_fd)
}
if (cl.osd_num)
{
osd_peer_fds.erase(cl.osd_num);
// Cancel outbound operations
cancel_osd_ops(cl);
osd_peer_fds.erase(cl.osd_num);
peering_state |= OSD_CONNECTING_PEERS;
}
if (cl.read_op)
{
@ -456,6 +456,10 @@ void osd_t::stop_client(int peer_fd)
}
free(cl.in_buf);
close(peer_fd);
if (repeer_osd)
{
repeer_pgs(repeer_osd);
}
}
void osd_t::exec_op(osd_op_t *cur_op)