diff --git a/blockstore_init.cpp b/blockstore_init.cpp index 93b9ae0b..c3dbcc67 100644 --- a/blockstore_init.cpp +++ b/blockstore_init.cpp @@ -624,6 +624,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u .journal_sector = proc_pos, }); bs->journal.used_sectors[proc_pos]++; + // Deletions are treated as immediately stable, because + // "2-phase commit" (write->stabilize) isn't sufficient for them anyway + bs->mark_stable(ov); } } started = true; diff --git a/blockstore_sync.cpp b/blockstore_sync.cpp index 3f3899de..2e97660c 100644 --- a/blockstore_sync.cpp +++ b/blockstore_sync.cpp @@ -275,7 +275,16 @@ void blockstore_impl_t::ack_one_sync(blockstore_op_t *op) #endif auto & unstab = unstable_writes[it->oid]; unstab = unstab < it->version ? it->version : unstab; - dirty_db[*it].state = dirty_db[*it].state == ST_DEL_WRITTEN ? ST_DEL_SYNCED : ST_J_SYNCED; + if (dirty_db[*it].state == ST_DEL_WRITTEN) + { + dirty_db[*it].state = ST_DEL_SYNCED; + // Deletions are treated as immediately stable + mark_stable(*it); + } + else /* == ST_J_WRITTEN */ + { + dirty_db[*it].state = ST_J_SYNCED; + } } in_progress_syncs.erase(PRIV(op)->in_progress_ptr); op->retval = 0; diff --git a/blockstore_write.cpp b/blockstore_write.cpp index fcb41bef..b4f674cd 100644 --- a/blockstore_write.cpp +++ b/blockstore_write.cpp @@ -355,6 +355,11 @@ resume_4: else if (dirty_it->second.state == ST_DEL_SUBMITTED) { dirty_it->second.state = imm ? ST_DEL_SYNCED : ST_DEL_WRITTEN; + if (imm) + { + // Deletions are treated as immediately stable + mark_stable(dirty_it->first); + } } if (immediate_commit == IMMEDIATE_ALL) { diff --git a/osd.h b/osd.h index e52e5b6e..61a48999 100644 --- a/osd.h +++ b/osd.h @@ -193,7 +193,7 @@ class osd_t void continue_primary_del(osd_op_t *cur_op); bool check_write_queue(osd_op_t *cur_op, pg_t & pg); void remove_object_from_state(object_id & oid, pg_osd_set_state_t *object_state, pg_t &pg); - bool finalize_primary_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state); + bool remember_unstable_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state); void handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op); void handle_primary_bs_subop(osd_op_t *subop); void add_bs_subop_stats(osd_op_t *subop); diff --git a/osd_primary.cpp b/osd_primary.cpp index a1663f4b..d00fa3e7 100644 --- a/osd_primary.cpp +++ b/osd_primary.cpp @@ -284,7 +284,7 @@ resume_9: // FIXME: Check for immediate_commit == IMMEDIATE_SMALL resume_6: resume_7: - if (!finalize_primary_write(cur_op, pg, pg.cur_loc_set, 6)) + if (!remember_unstable_write(cur_op, pg, pg.cur_loc_set, 6)) { return; } @@ -305,7 +305,7 @@ resume_7: } } -bool osd_t::finalize_primary_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state) +bool osd_t::remember_unstable_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state) { osd_primary_op_data_t *op_data = cur_op->op_data; if (op_data->st == base_state) @@ -598,8 +598,6 @@ void osd_t::continue_primary_del(osd_op_t *cur_op) else if (op_data->st == 3) goto resume_3; else if (op_data->st == 4) goto resume_4; else if (op_data->st == 5) goto resume_5; - else if (op_data->st == 6) goto resume_6; - else if (op_data->st == 7) goto resume_7; assert(op_data->st == 0); // Delete is forbidden even in active PGs if they're also degraded or have previous dead OSDs if (pg.state & (PG_DEGRADED | PG_LEFT_ON_DEAD)) @@ -641,12 +639,6 @@ resume_5: } // Remove version override pg.ver_override.erase(op_data->oid); -resume_6: -resume_7: - if (!finalize_primary_write(cur_op, pg, op_data->object_state ? op_data->object_state->osd_set : pg.cur_loc_set, 6)) - { - return; - } // Adjust PG stats after "instant stabilize", because we need object_state above if (!op_data->object_state) {