diff --git a/src/blockstore_impl.h b/src/blockstore_impl.h index d4d411233..8503f81ce 100644 --- a/src/blockstore_impl.h +++ b/src/blockstore_impl.h @@ -284,7 +284,7 @@ class blockstore_impl_t // Stabilize int dequeue_stable(blockstore_op_t *op); int continue_stable(blockstore_op_t *op); - void mark_stable(const obj_ver_id & ov); + void mark_stable(const obj_ver_id & ov, bool forget_dirty = false); void handle_stable_event(ring_data_t *data, blockstore_op_t *op); void stabilize_object(object_id oid, uint64_t max_ver); diff --git a/src/blockstore_init.cpp b/src/blockstore_init.cpp index 2a1be17e7..3978112e2 100644 --- a/src/blockstore_init.cpp +++ b/src/blockstore_init.cpp @@ -564,7 +564,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u unstab = unstab < ov.version ? ov.version : unstab; if (je->type == JE_SMALL_WRITE_INSTANT) { - bs->mark_stable(ov); + bs->mark_stable(ov, true); } } } @@ -620,7 +620,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u if (bs->data_alloc->get(je->big_write.location >> bs->block_order)) { // This is probably a big_write that's already flushed and freed, but it may - // also indicate a bug. So we remember such entries and recheck them afterwards + // also indicate a bug. So we remember such entries and recheck them afterwards. + // If it's not a bug they won't be present after reading the whole journal. dirty_it->second.location = UINT64_MAX; double_allocs.push_back(ov); } @@ -646,7 +647,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u unstab = unstab < ov.version ? ov.version : unstab; if (je->type == JE_BIG_WRITE_INSTANT) { - bs->mark_stable(ov); + bs->mark_stable(ov, true); } } } @@ -660,7 +661,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u .oid = je->stable.oid, .version = je->stable.version, }; - bs->mark_stable(ov); + bs->mark_stable(ov, true); } else if (je->type == JE_ROLLBACK) { @@ -716,7 +717,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u bs->journal.used_sectors[proc_pos]++; // Deletions are treated as immediately stable, because // "2-phase commit" (write->stabilize) isn't sufficient for them anyway - bs->mark_stable(ov); + bs->mark_stable(ov, true); } // Ignore delete if neither preceding dirty entries nor the clean one are present } @@ -748,7 +749,10 @@ void blockstore_init_journal::erase_dirty_object(blockstore_dirty_db_t::iterator break; } } - bs->erase_dirty(dirty_it, dirty_end, UINT64_MAX); + auto clean_it = bs->clean_db.find(oid); + uint64_t clean_loc = clean_it != bs->clean_db.end() + ? clean_it->second.location : UINT64_MAX; + bs->erase_dirty(dirty_it, dirty_end, clean_loc); // Remove it from the flusher's queue, too // Otherwise it may end up referring to a small unstable write after reading the rest of the journal bs->flusher->remove_flush(oid); diff --git a/src/blockstore_stable.cpp b/src/blockstore_stable.cpp index b9ae9564d..cd676b90d 100644 --- a/src/blockstore_stable.cpp +++ b/src/blockstore_stable.cpp @@ -176,7 +176,7 @@ resume_5: return 2; } -void blockstore_impl_t::mark_stable(const obj_ver_id & v) +void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty) { auto dirty_it = dirty_db.find(v); if (dirty_it != dirty_db.end()) @@ -187,7 +187,27 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v) { dirty_it->second.state = (dirty_it->second.state & ~BS_ST_WORKFLOW_MASK) | BS_ST_STABLE; } - else if (IS_STABLE(dirty_it->second.state)) + if (forget_dirty && (IS_BIG_WRITE(dirty_it->second.state) || + IS_DELETE(dirty_it->second.state))) + { + // Big write overrides all previous dirty entries + auto erase_end = dirty_it; + while (dirty_it != dirty_db.begin()) + { + dirty_it--; + if (dirty_it->first.oid != v.oid) + { + dirty_it++; + break; + } + } + auto clean_it = clean_db.find(v.oid); + uint64_t clean_loc = clean_it != clean_db.end() + ? clean_it->second.location : UINT64_MAX; + erase_dirty(dirty_it, erase_end, clean_loc); + break; + } + if (IS_STABLE(dirty_it->second.state)) { break; }