Fix sync confirmations and some pipeline-stall bugs

blocking-uring-test
Vitaliy Filippov 2019-11-27 18:04:52 +03:00
parent 78807eb244
commit e2b91968c5
7 changed files with 72 additions and 49 deletions

View File

@ -108,18 +108,30 @@ void blockstore::loop()
continue_sync(*cur_sync++); continue_sync(*cur_sync++);
} }
auto cur = submit_queue.begin(); auto cur = submit_queue.begin();
bool has_writes = false; int has_writes = 0;
while (cur != submit_queue.end()) while (cur != submit_queue.end())
{ {
auto op_ptr = cur; auto op_ptr = cur;
auto op = *(cur++); auto op = *(cur++);
// FIXME: This needs some simplification
// Writes should not block reads if the ring is not null and if reads don't depend on them
// In all other cases we should stop submission
if (op->wait_for) if (op->wait_for)
{ {
check_wait(op); check_wait(op);
if (op->wait_for == WAIT_SQE) if (op->wait_for == WAIT_SQE)
{
break; break;
}
else if (op->wait_for) else if (op->wait_for)
{
if ((op->flags & OP_TYPE_MASK) == OP_WRITE ||
(op->flags & OP_TYPE_MASK) == OP_DELETE)
{
has_writes = 2;
}
continue; continue;
}
} }
unsigned ring_space = io_uring_sq_space_left(&ringloop->ring); unsigned ring_space = io_uring_sq_space_left(&ringloop->ring);
unsigned prev_sqe_pos = ringloop->ring.sq.sqe_tail; unsigned prev_sqe_pos = ringloop->ring.sq.sqe_tail;
@ -131,8 +143,13 @@ void blockstore::loop()
else if ((op->flags & OP_TYPE_MASK) == OP_WRITE || else if ((op->flags & OP_TYPE_MASK) == OP_WRITE ||
(op->flags & OP_TYPE_MASK) == OP_DELETE) (op->flags & OP_TYPE_MASK) == OP_DELETE)
{ {
has_writes = true; if (has_writes == 2)
{
// Some writes could not be submitted
break;
}
dequeue_op = dequeue_write(op); dequeue_op = dequeue_write(op);
has_writes = dequeue_op ? 1 : 2;
} }
else if ((op->flags & OP_TYPE_MASK) == OP_SYNC) else if ((op->flags & OP_TYPE_MASK) == OP_SYNC)
{ {
@ -229,7 +246,7 @@ void blockstore::check_wait(blockstore_operation *op)
} }
else if (op->wait_for == WAIT_JOURNAL) else if (op->wait_for == WAIT_JOURNAL)
{ {
if (journal.used_start < op->wait_detail) if (journal.used_start == op->wait_detail)
{ {
// do not submit // do not submit
return; return;

View File

@ -312,6 +312,7 @@ class blockstore
int dequeue_sync(blockstore_operation *op); int dequeue_sync(blockstore_operation *op);
void handle_sync_event(ring_data_t *data, blockstore_operation *op); void handle_sync_event(ring_data_t *data, blockstore_operation *op);
int continue_sync(blockstore_operation *op); int continue_sync(blockstore_operation *op);
void ack_one_sync(blockstore_operation *op);
int ack_sync(blockstore_operation *op); int ack_sync(blockstore_operation *op);
// Stabilize // Stabilize

View File

@ -63,7 +63,8 @@ void journal_flusher_t::queue_flush(obj_ver_id ov)
auto it = flush_versions.find(ov.oid); auto it = flush_versions.find(ov.oid);
if (it != flush_versions.end()) if (it != flush_versions.end())
{ {
it->second = ov.version; if (it->second < ov.version)
it->second = ov.version;
} }
else else
{ {
@ -77,7 +78,8 @@ void journal_flusher_t::unshift_flush(obj_ver_id ov)
auto it = flush_versions.find(ov.oid); auto it = flush_versions.find(ov.oid);
if (it != flush_versions.end()) if (it != flush_versions.end())
{ {
it->second = ov.version; if (it->second < ov.version)
it->second = ov.version;
} }
else else
{ {
@ -199,7 +201,12 @@ resume_0:
} }
else if (!IS_STABLE(dirty_it->second.state)) else if (!IS_STABLE(dirty_it->second.state))
{ {
throw std::runtime_error("BUG: Unexpected dirty_entry state during flush: " + std::to_string(dirty_it->second.state)); char err[1024];
snprintf(
err, 1024, "BUG: Unexpected dirty_entry %lu:%lu v%lu state during flush: %d",
dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version, dirty_it->second.state
);
throw std::runtime_error(err);
} }
if (dirty_it == bs->dirty_db.begin()) if (dirty_it == bs->dirty_db.begin())
{ {
@ -217,7 +224,7 @@ resume_0:
flusher->active_flushers--; flusher->active_flushers--;
flusher->active_until_sync--; flusher->active_until_sync--;
repeat_it = flusher->sync_to_repeat.find(cur.oid); repeat_it = flusher->sync_to_repeat.find(cur.oid);
if (repeat_it->second != 0) if (repeat_it->second > cur.version)
{ {
// Requeue version // Requeue version
flusher->unshift_flush({ .oid = cur.oid, .version = repeat_it->second }); flusher->unshift_flush({ .oid = cur.oid, .version = repeat_it->second });
@ -475,12 +482,12 @@ resume_0:
wait_state = 0; wait_state = 0;
flusher->active_flushers--; flusher->active_flushers--;
repeat_it = flusher->sync_to_repeat.find(cur.oid); repeat_it = flusher->sync_to_repeat.find(cur.oid);
if (repeat_it->second != 0) if (repeat_it->second > cur.version)
{ {
// Requeue version // Requeue version
flusher->unshift_flush({ .oid = cur.oid, .version = repeat_it->second }); flusher->unshift_flush({ .oid = cur.oid, .version = repeat_it->second });
flusher->sync_to_repeat.erase(repeat_it);
} }
flusher->sync_to_repeat.erase(repeat_it);
goto resume_0; goto resume_0;
} }
} }

View File

@ -55,9 +55,9 @@ int blockstore_journal_check_t::check_available(blockstore_operation *op, int re
} }
if (!right_dir && next_pos >= bs->journal.used_start-512) if (!right_dir && next_pos >= bs->journal.used_start-512)
{ {
// No space in the journal. Wait for it. // No space in the journal. Wait until used_start changes.
op->wait_for = WAIT_JOURNAL; op->wait_for = WAIT_JOURNAL;
op->wait_detail = next_pos; op->wait_detail = bs->journal.used_start;
return 0; return 0;
} }
return 1; return 1;

View File

@ -140,6 +140,7 @@ void blockstore::handle_sync_event(ring_data_t *data, blockstore_operation *op)
if (op->sync_state == SYNC_DATA_SYNC_SENT) if (op->sync_state == SYNC_DATA_SYNC_SENT)
{ {
op->sync_state = SYNC_DATA_SYNC_DONE; op->sync_state = SYNC_DATA_SYNC_DONE;
// FIXME: This is not needed, in fact
for (auto it = op->sync_big_writes.begin(); it != op->sync_big_writes.end(); it++) for (auto it = op->sync_big_writes.begin(); it != op->sync_big_writes.end(); it++)
{ {
dirty_db[*it].state = ST_D_SYNCED; dirty_db[*it].state = ST_D_SYNCED;
@ -148,18 +149,6 @@ void blockstore::handle_sync_event(ring_data_t *data, blockstore_operation *op)
else if (op->sync_state == SYNC_JOURNAL_SYNC_SENT) else if (op->sync_state == SYNC_JOURNAL_SYNC_SENT)
{ {
op->sync_state = SYNC_DONE; op->sync_state = SYNC_DONE;
for (auto it = op->sync_big_writes.begin(); it != op->sync_big_writes.end(); it++)
{
auto & unstab = unstable_writes[it->oid];
unstab = !unstab || unstab > it->version ? it->version : unstab;
dirty_db[*it].state = ST_D_META_SYNCED;
}
for (auto it = op->sync_small_writes.begin(); it != op->sync_small_writes.end(); it++)
{
auto & unstab = unstable_writes[it->oid];
unstab = !unstab || unstab > it->version ? it->version : unstab;
dirty_db[*it].state = ST_J_SYNCED;
}
ack_sync(op); ack_sync(op);
} }
else else
@ -177,6 +166,8 @@ int blockstore::ack_sync(blockstore_operation *op)
auto it = op->in_progress_ptr; auto it = op->in_progress_ptr;
int done_syncs = 1; int done_syncs = 1;
++it; ++it;
// Acknowledge sync
ack_one_sync(op);
while (it != in_progress_syncs.end()) while (it != in_progress_syncs.end())
{ {
auto & next_sync = *it++; auto & next_sync = *it++;
@ -185,16 +176,30 @@ int blockstore::ack_sync(blockstore_operation *op)
{ {
done_syncs++; done_syncs++;
// Acknowledge next_sync // Acknowledge next_sync
in_progress_syncs.erase(next_sync->in_progress_ptr); ack_one_sync(next_sync);
next_sync->retval = 0;
next_sync->callback(next_sync);
} }
} }
// Acknowledge sync
in_progress_syncs.erase(op->in_progress_ptr);
op->retval = 0;
op->callback(op);
return 1; return 1;
} }
return 0; return 0;
} }
void blockstore::ack_one_sync(blockstore_operation *op)
{
// Handle states
for (auto it = op->sync_big_writes.begin(); it != op->sync_big_writes.end(); it++)
{
auto & unstab = unstable_writes[it->oid];
unstab = unstab < it->version ? it->version : unstab;
dirty_db[*it].state = ST_D_META_SYNCED;
}
for (auto it = op->sync_small_writes.begin(); it != op->sync_small_writes.end(); it++)
{
auto & unstab = unstable_writes[it->oid];
unstab = unstab < it->version ? it->version : unstab;
dirty_db[*it].state = ST_J_SYNCED;
}
in_progress_syncs.erase(op->in_progress_ptr);
op->retval = 0;
op->callback(op);
}

View File

@ -41,24 +41,6 @@ void blockstore::enqueue_write(blockstore_operation *op)
.len = op->len, .len = op->len,
.journal_sector = 0, .journal_sector = 0,
}); });
// Remember write as unsynced here, so external consumers could get
// the list of dirty objects to sync just before issuing a SYNC request
if (op->len == block_size || op->version == 1)
{
// Remember big write as unsynced
unsynced_big_writes.push_back((obj_ver_id){
.oid = op->oid,
.version = op->version,
});
}
else
{
// Remember small write as unsynced
unsynced_small_writes.push_back((obj_ver_id){
.oid = op->oid,
.version = op->version,
});
}
} }
// First step of the write algorithm: dequeue operation and submit initial write(s) // First step of the write algorithm: dequeue operation and submit initial write(s)
@ -109,6 +91,11 @@ int blockstore::dequeue_write(blockstore_operation *op)
); );
op->pending_ops = 1; op->pending_ops = 1;
op->min_used_journal_sector = op->max_used_journal_sector = 0; op->min_used_journal_sector = op->max_used_journal_sector = 0;
// Remember big write as unsynced
unsynced_big_writes.push_back((obj_ver_id){
.oid = op->oid,
.version = op->version,
});
} }
else else
{ {
@ -154,6 +141,11 @@ int blockstore::dequeue_write(blockstore_operation *op)
dirty_it->second.state = ST_J_SUBMITTED; dirty_it->second.state = ST_J_SUBMITTED;
journal.next_free += op->len; journal.next_free += op->len;
op->pending_ops = 2; op->pending_ops = 2;
// Remember small write as unsynced
unsynced_small_writes.push_back((obj_ver_id){
.oid = op->oid,
.version = op->version,
});
} }
return 1; return 1;
} }

View File

@ -178,7 +178,7 @@ static enum fio_q_status bs_queue(struct thread_data *td, struct io_u *io)
.version = it->second, .version = it->second,
}; };
} }
bsd->bs->enqueue_op(op); bsd->bs->unstable_writes.clear();
op->callback = [io, n](blockstore_operation *op) op->callback = [io, n](blockstore_operation *op)
{ {
io->error = op->retval < 0 ? -op->retval : 0; io->error = op->retval < 0 ? -op->retval : 0;
@ -191,6 +191,7 @@ static enum fio_q_status bs_queue(struct thread_data *td, struct io_u *io)
printf("--- OP_SYNC %llx n=%d retval=%d\n", io, n, op->retval); printf("--- OP_SYNC %llx n=%d retval=%d\n", io, n, op->retval);
delete op; delete op;
}; };
bsd->bs->enqueue_op(op);
} }
else else
{ {