Forget all dirty_entries before stable big_write or delete during initialisation

This fixes a 'double_alloc' assertion in the following case:
- big_write object #1 v1 to block #100
- big_write object #1 v2 to block #101
- big_write object #2 v1 to block #100
rel-0.5
Vitaliy Filippov 2021-04-04 00:09:53 +03:00
parent 75a6a556b5
commit 307c1731c1
3 changed files with 33 additions and 9 deletions

View File

@ -284,7 +284,7 @@ class blockstore_impl_t
// Stabilize
int dequeue_stable(blockstore_op_t *op);
int continue_stable(blockstore_op_t *op);
void mark_stable(const obj_ver_id & ov);
void mark_stable(const obj_ver_id & ov, bool forget_dirty = false);
void handle_stable_event(ring_data_t *data, blockstore_op_t *op);
void stabilize_object(object_id oid, uint64_t max_ver);

View File

@ -564,7 +564,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
unstab = unstab < ov.version ? ov.version : unstab;
if (je->type == JE_SMALL_WRITE_INSTANT)
{
bs->mark_stable(ov);
bs->mark_stable(ov, true);
}
}
}
@ -620,7 +620,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
if (bs->data_alloc->get(je->big_write.location >> bs->block_order))
{
// This is probably a big_write that's already flushed and freed, but it may
// also indicate a bug. So we remember such entries and recheck them afterwards
// also indicate a bug. So we remember such entries and recheck them afterwards.
// If it's not a bug they won't be present after reading the whole journal.
dirty_it->second.location = UINT64_MAX;
double_allocs.push_back(ov);
}
@ -646,7 +647,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
unstab = unstab < ov.version ? ov.version : unstab;
if (je->type == JE_BIG_WRITE_INSTANT)
{
bs->mark_stable(ov);
bs->mark_stable(ov, true);
}
}
}
@ -660,7 +661,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
.oid = je->stable.oid,
.version = je->stable.version,
};
bs->mark_stable(ov);
bs->mark_stable(ov, true);
}
else if (je->type == JE_ROLLBACK)
{
@ -716,7 +717,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
bs->journal.used_sectors[proc_pos]++;
// Deletions are treated as immediately stable, because
// "2-phase commit" (write->stabilize) isn't sufficient for them anyway
bs->mark_stable(ov);
bs->mark_stable(ov, true);
}
// Ignore delete if neither preceding dirty entries nor the clean one are present
}
@ -748,7 +749,10 @@ void blockstore_init_journal::erase_dirty_object(blockstore_dirty_db_t::iterator
break;
}
}
bs->erase_dirty(dirty_it, dirty_end, UINT64_MAX);
auto clean_it = bs->clean_db.find(oid);
uint64_t clean_loc = clean_it != bs->clean_db.end()
? clean_it->second.location : UINT64_MAX;
bs->erase_dirty(dirty_it, dirty_end, clean_loc);
// Remove it from the flusher's queue, too
// Otherwise it may end up referring to a small unstable write after reading the rest of the journal
bs->flusher->remove_flush(oid);

View File

@ -176,7 +176,7 @@ resume_5:
return 2;
}
void blockstore_impl_t::mark_stable(const obj_ver_id & v)
void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty)
{
auto dirty_it = dirty_db.find(v);
if (dirty_it != dirty_db.end())
@ -187,7 +187,27 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v)
{
dirty_it->second.state = (dirty_it->second.state & ~BS_ST_WORKFLOW_MASK) | BS_ST_STABLE;
}
else if (IS_STABLE(dirty_it->second.state))
if (forget_dirty && (IS_BIG_WRITE(dirty_it->second.state) ||
IS_DELETE(dirty_it->second.state)))
{
// Big write overrides all previous dirty entries
auto erase_end = dirty_it;
while (dirty_it != dirty_db.begin())
{
dirty_it--;
if (dirty_it->first.oid != v.oid)
{
dirty_it++;
break;
}
}
auto clean_it = clean_db.find(v.oid);
uint64_t clean_loc = clean_it != clean_db.end()
? clean_it->second.location : UINT64_MAX;
erase_dirty(dirty_it, erase_end, clean_loc);
break;
}
if (IS_STABLE(dirty_it->second.state))
{
break;
}