From b0ad1e1e6de1b416be12fce233ea133e22cb9bf8 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Tue, 24 Mar 2020 02:37:44 +0300 Subject: [PATCH] Remember writes as "unsynced" only after completing them Previously BS_OP_SYNC could take unfinished writes and add them into the journal before they were actually completed. This was leading to crashes with the message "BUG: Unexpected dirty_entry 2000000000001:9f2a0000 v3 unstable state during flush: 338" --- src/blockstore_flush.cpp | 2 +- src/blockstore_impl.h | 1 + src/blockstore_sync.cpp | 1 + src/blockstore_write.cpp | 53 ++++++++++++++++++++-------------------- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/src/blockstore_flush.cpp b/src/blockstore_flush.cpp index 0e2169c3..3af87685 100644 --- a/src/blockstore_flush.cpp +++ b/src/blockstore_flush.cpp @@ -646,7 +646,7 @@ bool journal_flusher_co::scan_dirty(int wait_base) { char err[1024]; snprintf( - err, 1024, "BUG: Unexpected dirty_entry %lx:%lx v%lu unstable state during flush: %d", + err, 1024, "BUG: Unexpected dirty_entry %lx:%lx v%lu unstable state during flush: 0x%x", dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version, dirty_it->second.state ); throw std::runtime_error(err); diff --git a/src/blockstore_impl.h b/src/blockstore_impl.h index b27d93e9..21a97d49 100644 --- a/src/blockstore_impl.h +++ b/src/blockstore_impl.h @@ -210,6 +210,7 @@ class blockstore_impl_t blockstore_dirty_db_t dirty_db; std::vector submit_queue; std::vector unsynced_big_writes, unsynced_small_writes; + int unsynced_big_write_count = 0; allocator *data_alloc = NULL; uint8_t *zero_object; diff --git a/src/blockstore_sync.cpp b/src/blockstore_sync.cpp index bcce8607..1bb0b51d 100644 --- a/src/blockstore_sync.cpp +++ b/src/blockstore_sync.cpp @@ -24,6 +24,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog if (PRIV(op)->op_state == 0) { stop_sync_submitted = false; + unsynced_big_write_count -= unsynced_big_writes.size(); PRIV(op)->sync_big_writes.swap(unsynced_big_writes); PRIV(op)->sync_small_writes.swap(unsynced_small_writes); PRIV(op)->sync_small_checked = 0; diff --git a/src/blockstore_write.cpp b/src/blockstore_write.cpp index d7007b73..812b24d9 100644 --- a/src/blockstore_write.cpp +++ b/src/blockstore_write.cpp @@ -201,7 +201,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op) if ((dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE) { blockstore_journal_check_t space_check(this); - if (!space_check.check_available(op, unsynced_big_writes.size() + 1, sizeof(journal_entry_big_write), JOURNAL_STABILIZE_RESERVATION)) + if (!space_check.check_available(op, unsynced_big_write_count + 1, sizeof(journal_entry_big_write), JOURNAL_STABILIZE_RESERVATION)) { return 0; } @@ -250,11 +250,8 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op) PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0; if (immediate_commit != IMMEDIATE_ALL) { - // Remember big write as unsynced - unsynced_big_writes.push_back((obj_ver_id){ - .oid = op->oid, - .version = op->version, - }); + // Increase the counter, but don't save into unsynced_writes yet (can't sync until the write is finished) + unsynced_big_write_count++; PRIV(op)->op_state = 3; } else @@ -267,7 +264,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op) // Small (journaled) write // First check if the journal has sufficient space blockstore_journal_check_t space_check(this); - if (unsynced_big_writes.size() && !space_check.check_available(op, unsynced_big_writes.size(), sizeof(journal_entry_big_write), 0) + if (unsynced_big_write_count && !space_check.check_available(op, unsynced_big_write_count, sizeof(journal_entry_big_write), 0) || !space_check.check_available(op, 1, sizeof(journal_entry_small_write), op->len + JOURNAL_STABILIZE_RESERVATION)) { return 0; @@ -359,14 +356,6 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op) { journal.next_free = journal_block_size; } - if (immediate_commit == IMMEDIATE_NONE) - { - // Remember small write as unsynced - unsynced_small_writes.push_back((obj_ver_id){ - .oid = op->oid, - .version = op->version, - }); - } if (!PRIV(op)->pending_ops) { PRIV(op)->op_state = 4; @@ -431,7 +420,7 @@ resume_2: resume_4: // Switch object state #ifdef BLOCKSTORE_DEBUG - printf("Ack write %lx:%lx v%lu = state %x\n", op->oid.inode, op->oid.stripe, op->version, dirty_it->second.state); + printf("Ack write %lx:%lx v%lu = state 0x%x\n", op->oid.inode, op->oid.stripe, op->version, dirty_it->second.state); #endif bool imm = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE ? (immediate_commit == IMMEDIATE_ALL) @@ -445,11 +434,31 @@ resume_4: | (imm ? BS_ST_SYNCED : BS_ST_WRITTEN); if (imm && ((dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_DELETE || (dirty_it->second.state & BS_ST_INSTANT))) { - // Deletions are treated as immediately stable + // Deletions and 'instant' operations are treated as immediately stable mark_stable(dirty_it->first); } - if (immediate_commit == IMMEDIATE_ALL) + if (!imm) { + if ((dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE) + { + // Remember big write as unsynced + unsynced_big_writes.push_back((obj_ver_id){ + .oid = op->oid, + .version = op->version, + }); + } + else + { + // Remember small write as unsynced + unsynced_small_writes.push_back((obj_ver_id){ + .oid = op->oid, + .version = op->version, + }); + } + } + if (imm && (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE) + { + // Unblock small writes dirty_it++; while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid) { @@ -583,14 +592,6 @@ int blockstore_impl_t::dequeue_del(blockstore_op_t *op) PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector; PRIV(op)->pending_ops++; } - else - { - // Remember delete as unsynced - unsynced_small_writes.push_back((obj_ver_id){ - .oid = op->oid, - .version = op->version, - }); - } if (!PRIV(op)->pending_ops) { PRIV(op)->op_state = 4;