forked from vitalif/vitastor
Implement journal write batching and slightly refactor journal writes
Slightly reduces WA. For example, in 4K T1Q128 replicated randwrite tests WA is reduced from ~3.6 to ~3.1, in T1Q64 from ~3.8 to ~3.4. Only effective without no_same_sector_overwrites.master
parent
999bed8514
commit
f93491bc6c
|
@ -235,6 +235,12 @@ void blockstore_impl_t::loop()
|
||||||
{
|
{
|
||||||
throw std::runtime_error(std::string("io_uring_submit: ") + strerror(-ret));
|
throw std::runtime_error(std::string("io_uring_submit: ") + strerror(-ret));
|
||||||
}
|
}
|
||||||
|
for (auto s: journal.submitting_sectors)
|
||||||
|
{
|
||||||
|
// Mark journal sector writes as submitted
|
||||||
|
journal.sector_info[s].submit_id = 0;
|
||||||
|
}
|
||||||
|
journal.submitting_sectors.clear();
|
||||||
if ((initial_ring_space - ringloop->space_left()) > 0)
|
if ((initial_ring_space - ringloop->space_left()) > 0)
|
||||||
{
|
{
|
||||||
live = true;
|
live = true;
|
||||||
|
|
|
@ -54,6 +54,14 @@
|
||||||
#define IS_BIG_WRITE(st) (((st) & 0x0F) == BS_ST_BIG_WRITE)
|
#define IS_BIG_WRITE(st) (((st) & 0x0F) == BS_ST_BIG_WRITE)
|
||||||
#define IS_DELETE(st) (((st) & 0x0F) == BS_ST_DELETE)
|
#define IS_DELETE(st) (((st) & 0x0F) == BS_ST_DELETE)
|
||||||
|
|
||||||
|
#define BS_SUBMIT_CHECK_SQES(n) \
|
||||||
|
if (ringloop->space_left() < (n))\
|
||||||
|
{\
|
||||||
|
/* Pause until there are more requests available */\
|
||||||
|
PRIV(op)->wait_for = WAIT_SQE;\
|
||||||
|
return 0;\
|
||||||
|
}
|
||||||
|
|
||||||
#define BS_SUBMIT_GET_SQE(sqe, data) \
|
#define BS_SUBMIT_GET_SQE(sqe, data) \
|
||||||
BS_SUBMIT_GET_ONLY_SQE(sqe); \
|
BS_SUBMIT_GET_ONLY_SQE(sqe); \
|
||||||
struct ring_data_t *data = ((ring_data_t*)sqe->user_data)
|
struct ring_data_t *data = ((ring_data_t*)sqe->user_data)
|
||||||
|
@ -170,7 +178,7 @@ struct blockstore_op_private_t
|
||||||
std::vector<fulfill_read_t> read_vec;
|
std::vector<fulfill_read_t> read_vec;
|
||||||
|
|
||||||
// Sync, write
|
// Sync, write
|
||||||
uint64_t min_flushed_journal_sector, max_flushed_journal_sector;
|
int min_flushed_journal_sector, max_flushed_journal_sector;
|
||||||
|
|
||||||
// Write
|
// Write
|
||||||
struct iovec iov_zerofill[3];
|
struct iovec iov_zerofill[3];
|
||||||
|
@ -283,6 +291,10 @@ class blockstore_impl_t
|
||||||
void open_journal();
|
void open_journal();
|
||||||
uint8_t* get_clean_entry_bitmap(uint64_t block_loc, int offset);
|
uint8_t* get_clean_entry_bitmap(uint64_t block_loc, int offset);
|
||||||
|
|
||||||
|
// Journaling
|
||||||
|
void prepare_journal_sector_write(int sector, blockstore_op_t *op);
|
||||||
|
void handle_journal_write(ring_data_t *data, uint64_t flush_id);
|
||||||
|
|
||||||
// Asynchronous init
|
// Asynchronous init
|
||||||
int initialized;
|
int initialized;
|
||||||
int metadata_buf_size;
|
int metadata_buf_size;
|
||||||
|
@ -310,21 +322,18 @@ class blockstore_impl_t
|
||||||
|
|
||||||
// Sync
|
// Sync
|
||||||
int continue_sync(blockstore_op_t *op, bool queue_has_in_progress_sync);
|
int continue_sync(blockstore_op_t *op, bool queue_has_in_progress_sync);
|
||||||
void handle_sync_event(ring_data_t *data, blockstore_op_t *op);
|
|
||||||
void ack_sync(blockstore_op_t *op);
|
void ack_sync(blockstore_op_t *op);
|
||||||
|
|
||||||
// Stabilize
|
// Stabilize
|
||||||
int dequeue_stable(blockstore_op_t *op);
|
int dequeue_stable(blockstore_op_t *op);
|
||||||
int continue_stable(blockstore_op_t *op);
|
int continue_stable(blockstore_op_t *op);
|
||||||
void mark_stable(const obj_ver_id & ov, bool forget_dirty = false);
|
void mark_stable(const obj_ver_id & ov, bool forget_dirty = false);
|
||||||
void handle_stable_event(ring_data_t *data, blockstore_op_t *op);
|
|
||||||
void stabilize_object(object_id oid, uint64_t max_ver);
|
void stabilize_object(object_id oid, uint64_t max_ver);
|
||||||
|
|
||||||
// Rollback
|
// Rollback
|
||||||
int dequeue_rollback(blockstore_op_t *op);
|
int dequeue_rollback(blockstore_op_t *op);
|
||||||
int continue_rollback(blockstore_op_t *op);
|
int continue_rollback(blockstore_op_t *op);
|
||||||
void mark_rolled_back(const obj_ver_id & ov);
|
void mark_rolled_back(const obj_ver_id & ov);
|
||||||
void handle_rollback_event(ring_data_t *data, blockstore_op_t *op);
|
|
||||||
void erase_dirty(blockstore_dirty_db_t::iterator dirty_start, blockstore_dirty_db_t::iterator dirty_end, uint64_t clean_loc);
|
void erase_dirty(blockstore_dirty_db_t::iterator dirty_start, blockstore_dirty_db_t::iterator dirty_end, uint64_t clean_loc);
|
||||||
|
|
||||||
// List
|
// List
|
||||||
|
|
|
@ -153,22 +153,73 @@ journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type,
|
||||||
return je;
|
return je;
|
||||||
}
|
}
|
||||||
|
|
||||||
void prepare_journal_sector_write(journal_t & journal, int cur_sector, io_uring_sqe *sqe, std::function<void(ring_data_t*)> cb)
|
void blockstore_impl_t::prepare_journal_sector_write(int cur_sector, blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
|
// Don't submit the same sector twice in the same batch
|
||||||
|
if (!journal.sector_info[cur_sector].submit_id)
|
||||||
|
{
|
||||||
|
io_uring_sqe *sqe = get_sqe();
|
||||||
|
// Caller must ensure availability of an SQE
|
||||||
|
assert(sqe != NULL);
|
||||||
|
ring_data_t *data = ((ring_data_t*)sqe->user_data);
|
||||||
|
journal.sector_info[cur_sector].written = true;
|
||||||
|
journal.sector_info[cur_sector].submit_id = ++journal.submit_id;
|
||||||
|
journal.submitting_sectors.push_back(cur_sector);
|
||||||
|
journal.sector_info[cur_sector].flush_count++;
|
||||||
|
data->iov = (struct iovec){
|
||||||
|
(journal.inmemory
|
||||||
|
? journal.buffer + journal.sector_info[cur_sector].offset
|
||||||
|
: journal.sector_buf + journal.block_size*cur_sector),
|
||||||
|
journal.block_size
|
||||||
|
};
|
||||||
|
data->callback = [this, flush_id = journal.submit_id](ring_data_t *data) { handle_journal_write(data, flush_id); };
|
||||||
|
my_uring_prep_writev(
|
||||||
|
sqe, journal.fd, &data->iov, 1, journal.offset + journal.sector_info[cur_sector].offset
|
||||||
|
);
|
||||||
|
}
|
||||||
journal.sector_info[cur_sector].dirty = false;
|
journal.sector_info[cur_sector].dirty = false;
|
||||||
journal.sector_info[cur_sector].written = true;
|
// But always remember that this operation has to wait until this exact journal write is finished
|
||||||
journal.sector_info[cur_sector].flush_count++;
|
journal.flushing_ops.insert((pending_journaling_t){
|
||||||
ring_data_t *data = ((ring_data_t*)sqe->user_data);
|
.flush_id = journal.sector_info[cur_sector].submit_id,
|
||||||
data->iov = (struct iovec){
|
.sector = cur_sector,
|
||||||
(journal.inmemory
|
.op = op,
|
||||||
? journal.buffer + journal.sector_info[cur_sector].offset
|
});
|
||||||
: journal.sector_buf + journal.block_size*cur_sector),
|
auto priv = PRIV(op);
|
||||||
journal.block_size
|
priv->pending_ops++;
|
||||||
};
|
if (!priv->min_flushed_journal_sector)
|
||||||
data->callback = cb;
|
priv->min_flushed_journal_sector = 1+cur_sector;
|
||||||
my_uring_prep_writev(
|
priv->max_flushed_journal_sector = 1+cur_sector;
|
||||||
sqe, journal.fd, &data->iov, 1, journal.offset + journal.sector_info[cur_sector].offset
|
}
|
||||||
);
|
|
||||||
|
void blockstore_impl_t::handle_journal_write(ring_data_t *data, uint64_t flush_id)
|
||||||
|
{
|
||||||
|
live = true;
|
||||||
|
if (data->res != data->iov.iov_len)
|
||||||
|
{
|
||||||
|
// FIXME: our state becomes corrupted after a write error. maybe do something better than just die
|
||||||
|
throw std::runtime_error(
|
||||||
|
"journal write failed ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
|
||||||
|
"). in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
auto fl_it = journal.flushing_ops.upper_bound((pending_journaling_t){ .flush_id = flush_id });
|
||||||
|
if (fl_it != journal.flushing_ops.end() && fl_it->flush_id == flush_id)
|
||||||
|
{
|
||||||
|
journal.sector_info[fl_it->sector].flush_count--;
|
||||||
|
}
|
||||||
|
while (fl_it != journal.flushing_ops.end() && fl_it->flush_id == flush_id)
|
||||||
|
{
|
||||||
|
auto priv = PRIV(fl_it->op);
|
||||||
|
priv->pending_ops--;
|
||||||
|
assert(priv->pending_ops >= 0);
|
||||||
|
if (priv->pending_ops == 0)
|
||||||
|
{
|
||||||
|
release_journal_sectors(fl_it->op);
|
||||||
|
priv->op_state++;
|
||||||
|
ringloop->wakeup();
|
||||||
|
}
|
||||||
|
journal.flushing_ops.erase(fl_it++);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
journal_t::~journal_t()
|
journal_t::~journal_t()
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "crc32c.h"
|
#include "crc32c.h"
|
||||||
|
#include <set>
|
||||||
|
|
||||||
#define MIN_JOURNAL_SIZE 4*1024*1024
|
#define MIN_JOURNAL_SIZE 4*1024*1024
|
||||||
#define JOURNAL_MAGIC 0x4A33
|
#define JOURNAL_MAGIC 0x4A33
|
||||||
|
@ -145,8 +146,21 @@ struct journal_sector_info_t
|
||||||
uint64_t flush_count;
|
uint64_t flush_count;
|
||||||
bool written;
|
bool written;
|
||||||
bool dirty;
|
bool dirty;
|
||||||
|
uint64_t submit_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct pending_journaling_t
|
||||||
|
{
|
||||||
|
uint64_t flush_id;
|
||||||
|
int sector;
|
||||||
|
blockstore_op_t *op;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline bool operator < (const pending_journaling_t & a, const pending_journaling_t & b)
|
||||||
|
{
|
||||||
|
return a.flush_id < b.flush_id || a.flush_id == b.flush_id && a.op < b.op;
|
||||||
|
}
|
||||||
|
|
||||||
struct journal_t
|
struct journal_t
|
||||||
{
|
{
|
||||||
int fd;
|
int fd;
|
||||||
|
@ -172,6 +186,9 @@ struct journal_t
|
||||||
bool no_same_sector_overwrites = false;
|
bool no_same_sector_overwrites = false;
|
||||||
int cur_sector = 0;
|
int cur_sector = 0;
|
||||||
int in_sector_pos = 0;
|
int in_sector_pos = 0;
|
||||||
|
std::vector<int> submitting_sectors;
|
||||||
|
std::set<pending_journaling_t> flushing_ops;
|
||||||
|
uint64_t submit_id = 0;
|
||||||
|
|
||||||
// Used sector map
|
// Used sector map
|
||||||
// May use ~ 80 MB per 1 GB of used journal space in the worst case
|
// May use ~ 80 MB per 1 GB of used journal space in the worst case
|
||||||
|
@ -200,5 +217,3 @@ struct blockstore_journal_check_t
|
||||||
};
|
};
|
||||||
|
|
||||||
journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type, uint32_t size);
|
journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type, uint32_t size);
|
||||||
|
|
||||||
void prepare_journal_sector_write(journal_t & journal, int sector, io_uring_sqe *sqe, std::function<void(ring_data_t*)> cb);
|
|
||||||
|
|
|
@ -74,24 +74,17 @@ skip_ov:
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// There is sufficient space. Get SQEs
|
// There is sufficient space. Check SQEs
|
||||||
struct io_uring_sqe *sqe[space_check.sectors_to_write];
|
BS_SUBMIT_CHECK_SQES(space_check.sectors_to_write);
|
||||||
for (i = 0; i < space_check.sectors_to_write; i++)
|
|
||||||
{
|
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe[i]);
|
|
||||||
}
|
|
||||||
// Prepare and submit journal entries
|
// Prepare and submit journal entries
|
||||||
auto cb = [this, op](ring_data_t *data) { handle_rollback_event(data, op); };
|
int s = 0;
|
||||||
int s = 0, cur_sector = -1;
|
|
||||||
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
||||||
{
|
{
|
||||||
if (!journal.entry_fits(sizeof(journal_entry_rollback)) &&
|
if (!journal.entry_fits(sizeof(journal_entry_rollback)) &&
|
||||||
journal.sector_info[journal.cur_sector].dirty)
|
journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
if (cur_sector == -1)
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
PRIV(op)->min_flushed_journal_sector = 1 + journal.cur_sector;
|
s++;
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe[s++], cb);
|
|
||||||
cur_sector = journal.cur_sector;
|
|
||||||
}
|
}
|
||||||
journal_entry_rollback *je = (journal_entry_rollback*)
|
journal_entry_rollback *je = (journal_entry_rollback*)
|
||||||
prefill_single_journal_entry(journal, JE_ROLLBACK, sizeof(journal_entry_rollback));
|
prefill_single_journal_entry(journal, JE_ROLLBACK, sizeof(journal_entry_rollback));
|
||||||
|
@ -100,12 +93,9 @@ skip_ov:
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
}
|
}
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe[s++], cb);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
|
s++;
|
||||||
assert(s == space_check.sectors_to_write);
|
assert(s == space_check.sectors_to_write);
|
||||||
if (cur_sector == -1)
|
|
||||||
PRIV(op)->min_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->pending_ops = s;
|
|
||||||
PRIV(op)->op_state = 1;
|
PRIV(op)->op_state = 1;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -114,30 +104,23 @@ int blockstore_impl_t::continue_rollback(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
if (PRIV(op)->op_state == 2)
|
if (PRIV(op)->op_state == 2)
|
||||||
goto resume_2;
|
goto resume_2;
|
||||||
else if (PRIV(op)->op_state == 3)
|
else if (PRIV(op)->op_state == 4)
|
||||||
goto resume_3;
|
goto resume_4;
|
||||||
else if (PRIV(op)->op_state == 5)
|
|
||||||
goto resume_5;
|
|
||||||
else
|
else
|
||||||
return 1;
|
return 1;
|
||||||
resume_2:
|
resume_2:
|
||||||
// Release used journal sectors
|
|
||||||
release_journal_sectors(op);
|
|
||||||
resume_3:
|
|
||||||
if (!disable_journal_fsync)
|
if (!disable_journal_fsync)
|
||||||
{
|
{
|
||||||
io_uring_sqe *sqe;
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe);
|
|
||||||
ring_data_t *data = ((ring_data_t*)sqe->user_data);
|
|
||||||
my_uring_prep_fsync(sqe, journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, journal.fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_rollback_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
PRIV(op)->pending_ops = 1;
|
PRIV(op)->pending_ops = 1;
|
||||||
PRIV(op)->op_state = 4;
|
PRIV(op)->op_state = 3;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
resume_5:
|
resume_4:
|
||||||
obj_ver_id* v;
|
obj_ver_id* v;
|
||||||
int i;
|
int i;
|
||||||
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
||||||
|
@ -196,24 +179,6 @@ void blockstore_impl_t::mark_rolled_back(const obj_ver_id & ov)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void blockstore_impl_t::handle_rollback_event(ring_data_t *data, blockstore_op_t *op)
|
|
||||||
{
|
|
||||||
live = true;
|
|
||||||
if (data->res != data->iov.iov_len)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(
|
|
||||||
"write operation failed ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
|
|
||||||
"). in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
PRIV(op)->pending_ops--;
|
|
||||||
if (PRIV(op)->pending_ops == 0)
|
|
||||||
{
|
|
||||||
PRIV(op)->op_state++;
|
|
||||||
ringloop->wakeup();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start, blockstore_dirty_db_t::iterator dirty_end, uint64_t clean_loc)
|
void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start, blockstore_dirty_db_t::iterator dirty_end, uint64_t clean_loc)
|
||||||
{
|
{
|
||||||
if (dirty_end == dirty_start)
|
if (dirty_end == dirty_start)
|
||||||
|
|
|
@ -97,25 +97,18 @@ int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// There is sufficient space. Get SQEs
|
// There is sufficient space. Check SQEs
|
||||||
struct io_uring_sqe *sqe[space_check.sectors_to_write];
|
BS_SUBMIT_CHECK_SQES(space_check.sectors_to_write);
|
||||||
for (i = 0; i < space_check.sectors_to_write; i++)
|
|
||||||
{
|
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe[i]);
|
|
||||||
}
|
|
||||||
// Prepare and submit journal entries
|
// Prepare and submit journal entries
|
||||||
auto cb = [this, op](ring_data_t *data) { handle_stable_event(data, op); };
|
int s = 0;
|
||||||
int s = 0, cur_sector = -1;
|
|
||||||
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
||||||
{
|
{
|
||||||
// FIXME: Only stabilize versions that aren't stable yet
|
// FIXME: Only stabilize versions that aren't stable yet
|
||||||
if (!journal.entry_fits(sizeof(journal_entry_stable)) &&
|
if (!journal.entry_fits(sizeof(journal_entry_stable)) &&
|
||||||
journal.sector_info[journal.cur_sector].dirty)
|
journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
if (cur_sector == -1)
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
PRIV(op)->min_flushed_journal_sector = 1 + journal.cur_sector;
|
s++;
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe[s++], cb);
|
|
||||||
cur_sector = journal.cur_sector;
|
|
||||||
}
|
}
|
||||||
journal_entry_stable *je = (journal_entry_stable*)
|
journal_entry_stable *je = (journal_entry_stable*)
|
||||||
prefill_single_journal_entry(journal, JE_STABLE, sizeof(journal_entry_stable));
|
prefill_single_journal_entry(journal, JE_STABLE, sizeof(journal_entry_stable));
|
||||||
|
@ -124,12 +117,9 @@ int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
}
|
}
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe[s++], cb);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
|
s++;
|
||||||
assert(s == space_check.sectors_to_write);
|
assert(s == space_check.sectors_to_write);
|
||||||
if (cur_sector == -1)
|
|
||||||
PRIV(op)->min_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->pending_ops = s;
|
|
||||||
PRIV(op)->op_state = 1;
|
PRIV(op)->op_state = 1;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -138,30 +128,23 @@ int blockstore_impl_t::continue_stable(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
if (PRIV(op)->op_state == 2)
|
if (PRIV(op)->op_state == 2)
|
||||||
goto resume_2;
|
goto resume_2;
|
||||||
else if (PRIV(op)->op_state == 3)
|
else if (PRIV(op)->op_state == 4)
|
||||||
goto resume_3;
|
goto resume_4;
|
||||||
else if (PRIV(op)->op_state == 5)
|
|
||||||
goto resume_5;
|
|
||||||
else
|
else
|
||||||
return 1;
|
return 1;
|
||||||
resume_2:
|
resume_2:
|
||||||
// Release used journal sectors
|
|
||||||
release_journal_sectors(op);
|
|
||||||
resume_3:
|
|
||||||
if (!disable_journal_fsync)
|
if (!disable_journal_fsync)
|
||||||
{
|
{
|
||||||
io_uring_sqe *sqe;
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe);
|
|
||||||
ring_data_t *data = ((ring_data_t*)sqe->user_data);
|
|
||||||
my_uring_prep_fsync(sqe, journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, journal.fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_stable_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
PRIV(op)->pending_ops = 1;
|
PRIV(op)->pending_ops = 1;
|
||||||
PRIV(op)->op_state = 4;
|
PRIV(op)->op_state = 3;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
resume_5:
|
resume_4:
|
||||||
// Mark dirty_db entries as stable, acknowledge op completion
|
// Mark dirty_db entries as stable, acknowledge op completion
|
||||||
obj_ver_id* v;
|
obj_ver_id* v;
|
||||||
int i;
|
int i;
|
||||||
|
@ -257,21 +240,3 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty)
|
||||||
unstable_writes.erase(unstab_it);
|
unstable_writes.erase(unstab_it);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void blockstore_impl_t::handle_stable_event(ring_data_t *data, blockstore_op_t *op)
|
|
||||||
{
|
|
||||||
live = true;
|
|
||||||
if (data->res != data->iov.iov_len)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(
|
|
||||||
"write operation failed ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
|
|
||||||
"). in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
PRIV(op)->pending_ops--;
|
|
||||||
if (PRIV(op)->pending_ops == 0)
|
|
||||||
{
|
|
||||||
PRIV(op)->op_state++;
|
|
||||||
ringloop->wakeup();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -44,10 +44,8 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
if (journal.sector_info[journal.cur_sector].dirty)
|
if (journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
// Write out the last journal sector if it happens to be dirty
|
// Write out the last journal sector if it happens to be dirty
|
||||||
BS_SUBMIT_GET_ONLY_SQE(sqe);
|
BS_SUBMIT_CHECK_SQES(1);
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe, [this, op](ring_data_t *data) { handle_sync_event(data, op); });
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->pending_ops = 1;
|
|
||||||
PRIV(op)->op_state = SYNC_JOURNAL_WRITE_SENT;
|
PRIV(op)->op_state = SYNC_JOURNAL_WRITE_SENT;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -64,7 +62,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
BS_SUBMIT_GET_SQE(sqe, data);
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
my_uring_prep_fsync(sqe, data_fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, data_fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_sync_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
PRIV(op)->pending_ops = 1;
|
PRIV(op)->pending_ops = 1;
|
||||||
PRIV(op)->op_state = SYNC_DATA_SYNC_SENT;
|
PRIV(op)->op_state = SYNC_DATA_SYNC_SENT;
|
||||||
|
@ -85,24 +83,18 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Get SQEs. Don't bother about merging, submit each journal sector as a separate request
|
// Check SQEs. Don't bother about merging, submit each journal sector as a separate request
|
||||||
struct io_uring_sqe *sqe[space_check.sectors_to_write];
|
BS_SUBMIT_CHECK_SQES(space_check.sectors_to_write);
|
||||||
for (int i = 0; i < space_check.sectors_to_write; i++)
|
|
||||||
{
|
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe[i]);
|
|
||||||
}
|
|
||||||
// Prepare and submit journal entries
|
// Prepare and submit journal entries
|
||||||
auto it = PRIV(op)->sync_big_writes.begin();
|
auto it = PRIV(op)->sync_big_writes.begin();
|
||||||
int s = 0, cur_sector = -1;
|
int s = 0;
|
||||||
while (it != PRIV(op)->sync_big_writes.end())
|
while (it != PRIV(op)->sync_big_writes.end())
|
||||||
{
|
{
|
||||||
if (!journal.entry_fits(sizeof(journal_entry_big_write) + clean_entry_bitmap_size) &&
|
if (!journal.entry_fits(sizeof(journal_entry_big_write) + clean_entry_bitmap_size) &&
|
||||||
journal.sector_info[journal.cur_sector].dirty)
|
journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
if (cur_sector == -1)
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
PRIV(op)->min_flushed_journal_sector = 1 + journal.cur_sector;
|
s++;
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe[s++], [this, op](ring_data_t *data) { handle_sync_event(data, op); });
|
|
||||||
cur_sector = journal.cur_sector;
|
|
||||||
}
|
}
|
||||||
auto & dirty_entry = dirty_db.at(*it);
|
auto & dirty_entry = dirty_db.at(*it);
|
||||||
journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
|
journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
|
||||||
|
@ -129,12 +121,9 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
it++;
|
it++;
|
||||||
}
|
}
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe[s++], [this, op](ring_data_t *data) { handle_sync_event(data, op); });
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
|
s++;
|
||||||
assert(s == space_check.sectors_to_write);
|
assert(s == space_check.sectors_to_write);
|
||||||
if (cur_sector == -1)
|
|
||||||
PRIV(op)->min_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->pending_ops = s;
|
|
||||||
PRIV(op)->op_state = SYNC_JOURNAL_WRITE_SENT;
|
PRIV(op)->op_state = SYNC_JOURNAL_WRITE_SENT;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -145,7 +134,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
BS_SUBMIT_GET_SQE(sqe, data);
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
my_uring_prep_fsync(sqe, journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, journal.fd, IORING_FSYNC_DATASYNC);
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
data->callback = [this, op](ring_data_t *data) { handle_sync_event(data, op); };
|
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||||
PRIV(op)->pending_ops = 1;
|
PRIV(op)->pending_ops = 1;
|
||||||
PRIV(op)->op_state = SYNC_JOURNAL_SYNC_SENT;
|
PRIV(op)->op_state = SYNC_JOURNAL_SYNC_SENT;
|
||||||
|
@ -164,42 +153,6 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void blockstore_impl_t::handle_sync_event(ring_data_t *data, blockstore_op_t *op)
|
|
||||||
{
|
|
||||||
live = true;
|
|
||||||
if (data->res != data->iov.iov_len)
|
|
||||||
{
|
|
||||||
throw std::runtime_error(
|
|
||||||
"write operation failed ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
|
|
||||||
"). in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
PRIV(op)->pending_ops--;
|
|
||||||
if (PRIV(op)->pending_ops == 0)
|
|
||||||
{
|
|
||||||
// Release used journal sectors
|
|
||||||
release_journal_sectors(op);
|
|
||||||
// Handle states
|
|
||||||
if (PRIV(op)->op_state == SYNC_DATA_SYNC_SENT)
|
|
||||||
{
|
|
||||||
PRIV(op)->op_state = SYNC_DATA_SYNC_DONE;
|
|
||||||
}
|
|
||||||
else if (PRIV(op)->op_state == SYNC_JOURNAL_WRITE_SENT)
|
|
||||||
{
|
|
||||||
PRIV(op)->op_state = SYNC_JOURNAL_WRITE_DONE;
|
|
||||||
}
|
|
||||||
else if (PRIV(op)->op_state == SYNC_JOURNAL_SYNC_SENT)
|
|
||||||
{
|
|
||||||
PRIV(op)->op_state = SYNC_DONE;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw std::runtime_error("BUG: unexpected sync op state");
|
|
||||||
}
|
|
||||||
ringloop->wakeup();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void blockstore_impl_t::ack_sync(blockstore_op_t *op)
|
void blockstore_impl_t::ack_sync(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
// Handle states
|
// Handle states
|
||||||
|
|
|
@ -268,8 +268,8 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
cancel_all_writes(op, dirty_it, -ENOSPC);
|
cancel_all_writes(op, dirty_it, -ENOSPC);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
write_iodepth++;
|
|
||||||
BS_SUBMIT_GET_SQE(sqe, data);
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
|
write_iodepth++;
|
||||||
dirty_it->second.location = loc << block_order;
|
dirty_it->second.location = loc << block_order;
|
||||||
dirty_it->second.state = (dirty_it->second.state & ~BS_ST_WORKFLOW_MASK) | BS_ST_SUBMITTED;
|
dirty_it->second.state = (dirty_it->second.state & ~BS_ST_WORKFLOW_MASK) | BS_ST_SUBMITTED;
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
@ -324,29 +324,21 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
write_iodepth++;
|
// There is sufficient space. Check SQE(s)
|
||||||
// There is sufficient space. Get SQE(s)
|
BS_SUBMIT_CHECK_SQES(
|
||||||
struct io_uring_sqe *sqe1 = NULL;
|
|
||||||
if (immediate_commit != IMMEDIATE_NONE ||
|
|
||||||
!journal.entry_fits(sizeof(journal_entry_small_write) + clean_entry_bitmap_size))
|
|
||||||
{
|
|
||||||
// Write current journal sector only if it's dirty and full, or in the immediate_commit mode
|
// Write current journal sector only if it's dirty and full, or in the immediate_commit mode
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe1);
|
(immediate_commit != IMMEDIATE_NONE ||
|
||||||
}
|
!journal.entry_fits(sizeof(journal_entry_small_write) + clean_entry_bitmap_size) ? 1 : 0) +
|
||||||
struct io_uring_sqe *sqe2 = NULL;
|
(op->len > 0 ? 1 : 0)
|
||||||
if (op->len > 0)
|
);
|
||||||
{
|
write_iodepth++;
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe2);
|
|
||||||
}
|
|
||||||
// Got SQEs. Prepare previous journal sector write if required
|
// Got SQEs. Prepare previous journal sector write if required
|
||||||
auto cb = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
auto cb = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||||
if (immediate_commit == IMMEDIATE_NONE)
|
if (immediate_commit == IMMEDIATE_NONE)
|
||||||
{
|
{
|
||||||
if (sqe1)
|
if (!journal.entry_fits(sizeof(journal_entry_small_write) + clean_entry_bitmap_size))
|
||||||
{
|
{
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe1, cb);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->pending_ops++;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -380,9 +372,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
if (immediate_commit != IMMEDIATE_NONE)
|
if (immediate_commit != IMMEDIATE_NONE)
|
||||||
{
|
{
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe1, cb);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->pending_ops++;
|
|
||||||
}
|
}
|
||||||
if (op->len > 0)
|
if (op->len > 0)
|
||||||
{
|
{
|
||||||
|
@ -392,7 +382,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
// Copy data
|
// Copy data
|
||||||
memcpy(journal.buffer + journal.next_free, op->buf, op->len);
|
memcpy(journal.buffer + journal.next_free, op->buf, op->len);
|
||||||
}
|
}
|
||||||
ring_data_t *data2 = ((ring_data_t*)sqe2->user_data);
|
BS_SUBMIT_GET_SQE(sqe2, data2);
|
||||||
data2->iov = (struct iovec){ op->buf, op->len };
|
data2->iov = (struct iovec){ op->buf, op->len };
|
||||||
data2->callback = cb;
|
data2->callback = cb;
|
||||||
my_uring_prep_writev(
|
my_uring_prep_writev(
|
||||||
|
@ -441,13 +431,12 @@ int blockstore_impl_t::continue_write(blockstore_op_t *op)
|
||||||
resume_2:
|
resume_2:
|
||||||
// Only for the immediate_commit mode: prepare and submit big_write journal entry
|
// Only for the immediate_commit mode: prepare and submit big_write journal entry
|
||||||
{
|
{
|
||||||
|
BS_SUBMIT_CHECK_SQES(1);
|
||||||
auto dirty_it = dirty_db.find((obj_ver_id){
|
auto dirty_it = dirty_db.find((obj_ver_id){
|
||||||
.oid = op->oid,
|
.oid = op->oid,
|
||||||
.version = op->version,
|
.version = op->version,
|
||||||
});
|
});
|
||||||
assert(dirty_it != dirty_db.end());
|
assert(dirty_it != dirty_db.end());
|
||||||
io_uring_sqe *sqe = NULL;
|
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe);
|
|
||||||
journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
|
journal_entry_big_write *je = (journal_entry_big_write*)prefill_single_journal_entry(
|
||||||
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
|
||||||
sizeof(journal_entry_big_write) + clean_entry_bitmap_size
|
sizeof(journal_entry_big_write) + clean_entry_bitmap_size
|
||||||
|
@ -469,10 +458,7 @@ resume_2:
|
||||||
memcpy((void*)(je+1), (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), clean_entry_bitmap_size);
|
memcpy((void*)(je+1), (clean_entry_bitmap_size > sizeof(void*) ? dirty_it->second.bitmap : &dirty_it->second.bitmap), clean_entry_bitmap_size);
|
||||||
je->crc32 = je_crc32((journal_entry*)je);
|
je->crc32 = je_crc32((journal_entry*)je);
|
||||||
journal.crc32_last = je->crc32;
|
journal.crc32_last = je->crc32;
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe,
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
[this, op](ring_data_t *data) { handle_write_event(data, op); });
|
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->pending_ops = 1;
|
|
||||||
PRIV(op)->op_state = 3;
|
PRIV(op)->op_state = 3;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -587,6 +573,7 @@ void blockstore_impl_t::handle_write_event(ring_data_t *data, blockstore_op_t *o
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
PRIV(op)->pending_ops--;
|
PRIV(op)->pending_ops--;
|
||||||
|
assert(PRIV(op)->pending_ops >= 0);
|
||||||
if (PRIV(op)->pending_ops == 0)
|
if (PRIV(op)->pending_ops == 0)
|
||||||
{
|
{
|
||||||
release_journal_sectors(op);
|
release_journal_sectors(op);
|
||||||
|
@ -604,7 +591,6 @@ void blockstore_impl_t::release_journal_sectors(blockstore_op_t *op)
|
||||||
uint64_t s = PRIV(op)->min_flushed_journal_sector;
|
uint64_t s = PRIV(op)->min_flushed_journal_sector;
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
journal.sector_info[s-1].flush_count--;
|
|
||||||
if (s != (1+journal.cur_sector) && journal.sector_info[s-1].flush_count == 0)
|
if (s != (1+journal.cur_sector) && journal.sector_info[s-1].flush_count == 0)
|
||||||
{
|
{
|
||||||
// We know for sure that we won't write into this sector anymore
|
// We know for sure that we won't write into this sector anymore
|
||||||
|
@ -644,23 +630,19 @@ int blockstore_impl_t::dequeue_del(blockstore_op_t *op)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
write_iodepth++;
|
write_iodepth++;
|
||||||
io_uring_sqe *sqe = NULL;
|
// Write current journal sector only if it's dirty and full, or in the immediate_commit mode
|
||||||
if (immediate_commit != IMMEDIATE_NONE ||
|
BS_SUBMIT_CHECK_SQES(
|
||||||
(journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
|
(immediate_commit != IMMEDIATE_NONE ||
|
||||||
journal.sector_info[journal.cur_sector].dirty)
|
(journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
|
||||||
{
|
journal.sector_info[journal.cur_sector].dirty) ? 1 : 0
|
||||||
// Write current journal sector only if it's dirty and full, or in the immediate_commit mode
|
);
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe);
|
|
||||||
}
|
|
||||||
auto cb = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
|
||||||
// Prepare journal sector write
|
// Prepare journal sector write
|
||||||
if (immediate_commit == IMMEDIATE_NONE)
|
if (immediate_commit == IMMEDIATE_NONE)
|
||||||
{
|
{
|
||||||
if (sqe)
|
if ((journal_block_size - journal.in_sector_pos) < sizeof(journal_entry_del) &&
|
||||||
|
journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe, cb);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->pending_ops++;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -687,9 +669,7 @@ int blockstore_impl_t::dequeue_del(blockstore_op_t *op)
|
||||||
dirty_it->second.state = BS_ST_DELETE | BS_ST_SUBMITTED;
|
dirty_it->second.state = BS_ST_DELETE | BS_ST_SUBMITTED;
|
||||||
if (immediate_commit != IMMEDIATE_NONE)
|
if (immediate_commit != IMMEDIATE_NONE)
|
||||||
{
|
{
|
||||||
prepare_journal_sector_write(journal, journal.cur_sector, sqe, cb);
|
prepare_journal_sector_write(journal.cur_sector, op);
|
||||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 1 + journal.cur_sector;
|
|
||||||
PRIV(op)->pending_ops++;
|
|
||||||
}
|
}
|
||||||
if (!PRIV(op)->pending_ops)
|
if (!PRIV(op)->pending_ops)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue