diff --git a/blockstore_flush.cpp b/blockstore_flush.cpp index d278c43ce..529eb6470 100644 --- a/blockstore_flush.cpp +++ b/blockstore_flush.cpp @@ -635,14 +635,16 @@ bool journal_flusher_co::fsync_batch(bool fsync_meta, int wait_base) goto resume_1; else if (wait_state == wait_base+2) goto resume_2; - if (!bs->disable_fsync) + if (!(fsync_meta ? bs->disable_meta_fsync : bs->disable_journal_fsync)) { cur_sync = flusher->syncs.end(); while (cur_sync != flusher->syncs.begin()) { cur_sync--; if (cur_sync->fsync_meta == fsync_meta && cur_sync->state == 0) + { goto sync_found; + } } cur_sync = flusher->syncs.emplace(flusher->syncs.end(), (flusher_sync_t){ .fsync_meta = fsync_meta, diff --git a/blockstore_impl.h b/blockstore_impl.h index 105336269..0553dcf92 100644 --- a/blockstore_impl.h +++ b/blockstore_impl.h @@ -194,9 +194,8 @@ class blockstore_impl_t // Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment uint64_t bitmap_granularity = 4096; bool readonly = false; - // FIXME: separate flags for data, metadata and journal // It is safe to disable fsync() if drive write cache is writethrough - bool disable_fsync = false; + bool disable_data_fsync = false, disable_meta_fsync = false, disable_journal_fsync = false; bool inmemory_meta = false; int flusher_count; /******* END OF OPTIONS *******/ diff --git a/blockstore_init.cpp b/blockstore_init.cpp index e5b1555ee..4a068a396 100644 --- a/blockstore_init.cpp +++ b/blockstore_init.cpp @@ -251,7 +251,7 @@ resume_1: data->callback = simple_callback; my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset); wait_count++; - if (!bs->disable_fsync) + if (!bs->disable_journal_fsync) { GET_SQE(); my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC); @@ -331,7 +331,7 @@ resume_1: data->callback = simple_callback; wait_count++; my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + init_write_sector); - if (!bs->disable_fsync) + if (!bs->disable_journal_fsync) { GET_SQE(); data->iov = { 0 }; diff --git a/blockstore_open.cpp b/blockstore_open.cpp index 036bda6a5..85e3320e5 100644 --- a/blockstore_open.cpp +++ b/blockstore_open.cpp @@ -22,9 +22,17 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config) { readonly = true; } - if (config["disable_fsync"] == "true" || config["disable_fsync"] == "1" || config["disable_fsync"] == "yes") + if (config["disable_data_fsync"] == "true" || config["disable_data_fsync"] == "1" || config["disable_data_fsync"] == "yes") { - disable_fsync = true; + disable_data_fsync = true; + } + if (config["disable_meta_fsync"] == "true" || config["disable_meta_fsync"] == "1" || config["disable_meta_fsync"] == "yes") + { + disable_meta_fsync = true; + } + if (config["disable_journal_fsync"] == "true" || config["disable_journal_fsync"] == "1" || config["disable_journal_fsync"] == "yes") + { + disable_journal_fsync = true; } metadata_buf_size = strtoull(config["meta_buf_size"].c_str(), NULL, 10); cfg_journal_size = strtoull(config["journal_size"].c_str(), NULL, 10); @@ -265,6 +273,7 @@ void blockstore_impl_t::open_meta() else { meta_fd = data_fd; + disable_meta_fsync = disable_data_fsync; meta_size = 0; if (meta_offset >= data_size) { @@ -287,6 +296,7 @@ void blockstore_impl_t::open_journal() else { journal.fd = meta_fd; + disable_journal_fsync = disable_meta_fsync; journal.device_size = 0; if (journal.offset >= data_size) { diff --git a/blockstore_sync.cpp b/blockstore_sync.cpp index 48ce61a6b..5bbfc436d 100644 --- a/blockstore_sync.cpp +++ b/blockstore_sync.cpp @@ -39,7 +39,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op) if (PRIV(op)->sync_state == SYNC_HAS_SMALL) { // No big writes, just fsync the journal - int n_sqes = disable_fsync ? 0 : 1; + int n_sqes = disable_journal_fsync ? 0 : 1; if (journal.sector_info[journal.cur_sector].dirty) { n_sqes++; @@ -61,7 +61,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op) { PRIV(op)->min_used_journal_sector = PRIV(op)->max_used_journal_sector = 0; } - if (!disable_fsync) + if (!disable_journal_fsync) { ring_data_t *data = ((ring_data_t*)sqes[s]->user_data); my_uring_prep_fsync(sqes[s++], journal.fd, IORING_FSYNC_DATASYNC); @@ -79,7 +79,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op) else if (PRIV(op)->sync_state == SYNC_HAS_BIG) { // 1st step: fsync data - if (!disable_fsync) + if (!disable_data_fsync) { BS_SUBMIT_GET_SQE(sqe, data); my_uring_prep_fsync(sqe, data_fd, IORING_FSYNC_DATASYNC); @@ -104,8 +104,8 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op) return 0; } // Get SQEs. Don't bother about merging, submit each journal sector as a separate request - struct io_uring_sqe *sqe[space_check.sectors_required + (disable_fsync ? 0 : 1)]; - for (int i = 0; i < space_check.sectors_required + (disable_fsync ? 0 : 1); i++) + struct io_uring_sqe *sqe[space_check.sectors_required + (disable_journal_fsync ? 0 : 1)]; + for (int i = 0; i < space_check.sectors_required + (disable_journal_fsync ? 0 : 1); i++) { BS_SUBMIT_GET_SQE_DECL(sqe[i]); } @@ -148,7 +148,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op) } PRIV(op)->max_used_journal_sector = 1 + journal.cur_sector; // ... And a journal fsync - if (!disable_fsync) + if (!disable_journal_fsync) { my_uring_prep_fsync(sqe[s], journal.fd, IORING_FSYNC_DATASYNC); struct ring_data_t *data = ((ring_data_t*)sqe[s]->user_data); @@ -157,7 +157,9 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op) PRIV(op)->pending_ops = 1 + s; } else + { PRIV(op)->pending_ops = s; + } PRIV(op)->sync_state = SYNC_JOURNAL_SYNC_SENT; ringloop->submit(); }