diff --git a/src/osd_peering.cpp b/src/osd_peering.cpp index 1e918fe6..1b6fe26c 100644 --- a/src/osd_peering.cpp +++ b/src/osd_peering.cpp @@ -473,7 +473,7 @@ bool osd_t::stop_pg(pg_t & pg) delete pg.peering_state; pg.peering_state = NULL; } - if (pg.state & PG_STOPPING) + if (pg.state & (PG_STOPPING | PG_OFFLINE)) { return false; } diff --git a/src/osd_primary.cpp b/src/osd_primary.cpp index 90965548..905ec15e 100644 --- a/src/osd_primary.cpp +++ b/src/osd_primary.cpp @@ -391,19 +391,19 @@ continue_others: // Remove version override pg.ver_override.erase(op_data->oid); object_id oid = op_data->oid; + // Remove the operation from queue before calling finish_op so it doesn't see the completed operation in queue + auto next_it = pg.write_queue.find(oid); + if (next_it != pg.write_queue.end() && next_it->second == cur_op) + { + pg.write_queue.erase(next_it++); + } + // finish_op would invalidate next_it if it cleared pg.write_queue, but it doesn't do that :) finish_op(cur_op, cur_op->reply.hdr.retval); // Continue other write operations to the same object - auto next_it = pg.write_queue.find(oid); - auto this_it = next_it; - if (this_it != pg.write_queue.end() && this_it->second == cur_op) + if (next_it != pg.write_queue.end() && next_it->first == oid) { - next_it++; - pg.write_queue.erase(this_it); - if (next_it != pg.write_queue.end() && next_it->first == oid) - { - osd_op_t *next_op = next_it->second; - continue_primary_write(next_op); - } + osd_op_t *next_op = next_it->second; + continue_primary_write(next_op); } } diff --git a/src/osd_primary_subops.cpp b/src/osd_primary_subops.cpp index c0ecb91b..8f0988b8 100644 --- a/src/osd_primary_subops.cpp +++ b/src/osd_primary_subops.cpp @@ -43,7 +43,9 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval) auto & pg = pgs.at({ .pool_id = INODE_POOL(cur_op->op_data->oid.inode), .pg_num = cur_op->op_data->pg_num }); pg.inflight--; assert(pg.inflight >= 0); - if ((pg.state & PG_STOPPING) && pg.inflight == 0 && !pg.flush_batch) + if ((pg.state & PG_STOPPING) && pg.inflight == 0 && !pg.flush_batch && + // We must either forget all PG's unstable writes or wait for it to become clean + dirty_pgs.find({ .pool_id = pg.pool_id, .pg_num = pg.pg_num }) == dirty_pgs.end()) { finish_stop_pg(pg); }