Handle all io_uring events using lambdas

blocking-uring-test
Vitaliy Filippov 2019-11-13 21:17:04 +03:00
parent 7739f628cb
commit 1c6b9778a4
14 changed files with 43 additions and 76 deletions

View File

@ -3,7 +3,6 @@
blockstore::blockstore(spp::sparse_hash_map<std::string, std::string> & config, ring_loop_t *ringloop) blockstore::blockstore(spp::sparse_hash_map<std::string, std::string> & config, ring_loop_t *ringloop)
{ {
this->ringloop = ringloop; this->ringloop = ringloop;
ring_consumer.handle_event = [this](ring_data_t *d) { handle_event(d); };
ring_consumer.loop = [this]() { loop(); }; ring_consumer.loop = [this]() { loop(); };
ringloop->register_consumer(ring_consumer); ringloop->register_consumer(ring_consumer);
initialized = 0; initialized = 0;
@ -56,48 +55,6 @@ blockstore::~blockstore()
free(journal.sector_info); free(journal.sector_info);
} }
// main event loop - handle requests
void blockstore::handle_event(ring_data_t *data)
{
if (initialized != 10)
{
if (metadata_init_reader)
{
metadata_init_reader->handle_event(data);
}
else if (journal_init_reader)
{
journal_init_reader->handle_event(data);
}
}
else
{
struct blockstore_operation* op = (struct blockstore_operation*)data->op;
if ((op->flags & OP_TYPE_MASK) == OP_READ)
{
handle_read_event(data, op);
}
else if ((op->flags & OP_TYPE_MASK) == OP_WRITE ||
(op->flags & OP_TYPE_MASK) == OP_DELETE)
{
handle_write_event(data, op);
}
else if ((op->flags & OP_TYPE_MASK) == OP_SYNC)
{
handle_sync_event(data, op);
}
else if ((op->flags & OP_TYPE_MASK) == OP_STABLE)
{
handle_stable_event(data, op);
}
else if ((op->flags & OP_TYPE_MASK) == OP_INTERNAL_FLUSH)
{
// Operation is not a blockstore_operation at all
}
}
}
// main event loop - produce requests // main event loop - produce requests
void blockstore::loop() void blockstore::loop()
{ {

View File

@ -188,7 +188,6 @@ public:
#define OP_SYNC 3 #define OP_SYNC 3
#define OP_STABLE 4 #define OP_STABLE 4
#define OP_DELETE 5 #define OP_DELETE 5
#define OP_INTERNAL_FLUSH 6
#define OP_TYPE_MASK 0x7 #define OP_TYPE_MASK 0x7
// Suspend operation until there are more free SQEs // Suspend operation until there are more free SQEs
@ -221,7 +220,7 @@ struct blockstore_operation
// FIXME: Move internal fields somewhere // FIXME: Move internal fields somewhere
friend class blockstore; friend class blockstore;
friend class blockstore_journal_check_t; friend class blockstore_journal_check_t;
friend void prepare_journal_sector_write(blockstore_operation *op, journal_t & journal, io_uring_sqe *sqe); friend void prepare_journal_sector_write(journal_t & journal, io_uring_sqe *sqe, std::function<void(ring_data_t*)> cb);
private: private:
// Wait status // Wait status
int wait_for; int wait_for;
@ -328,7 +327,6 @@ public:
~blockstore(); ~blockstore();
// Event loop // Event loop
void handle_event(ring_data_t* data);
void loop(); void loop();
// Returns true when it's safe to destroy the instance. If destroying the instance // Returns true when it's safe to destroy the instance. If destroying the instance

View File

@ -88,7 +88,10 @@ void journal_flusher_co::loop()
v.insert(it, (copy_buffer_t){ .offset = offset, .len = submit_len, .buf = memalign(512, submit_len) }); v.insert(it, (copy_buffer_t){ .offset = offset, .len = submit_len, .buf = memalign(512, submit_len) });
data = ((ring_data_t*)sqe->user_data); data = ((ring_data_t*)sqe->user_data);
data->iov = (struct iovec){ v.end()->buf, (size_t)submit_len }; data->iov = (struct iovec){ v.end()->buf, (size_t)submit_len };
data->op = this; data->callback = [this](ring_data_t* data)
{
wait_count--;
};
io_uring_prep_readv( io_uring_prep_readv(
sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + dirty_it->second.location + offset sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + dirty_it->second.location + offset
); );
@ -156,7 +159,11 @@ void journal_flusher_co::loop()
} }
data = ((ring_data_t*)sqe->user_data); data = ((ring_data_t*)sqe->user_data);
data->iov = (struct iovec){ meta_it->second.buf, 512 }; data->iov = (struct iovec){ meta_it->second.buf, 512 };
data->op = this; data->callback = [this](ring_data_t* data)
{
wait_count--;
};
io_uring_prep_writev( io_uring_prep_writev(
sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + meta_sector sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + meta_sector
); );
@ -181,7 +188,10 @@ void journal_flusher_co::loop()
} }
data = ((ring_data_t*)sqe->user_data); data = ((ring_data_t*)sqe->user_data);
data->iov = (struct iovec){ it->buf, (size_t)it->len }; data->iov = (struct iovec){ it->buf, (size_t)it->len };
data->op = this; data->callback = [this](ring_data_t* data)
{
wait_count--;
};
io_uring_prep_writev( io_uring_prep_writev(
sqe, bs->data_fd, &data->iov, 1, bs->data_offset + clean_loc + it->offset sqe, bs->data_fd, &data->iov, 1, bs->data_offset + clean_loc + it->offset
); );
@ -210,7 +220,10 @@ void journal_flusher_co::loop()
} }
data = ((ring_data_t*)sqe->user_data); data = ((ring_data_t*)sqe->user_data);
data->iov = (struct iovec){ meta_it->second.buf, 512 }; data->iov = (struct iovec){ meta_it->second.buf, 512 };
data->op = this; data->callback = [this](ring_data_t* data)
{
wait_count--;
};
io_uring_prep_writev( io_uring_prep_writev(
sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + meta_sector sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + meta_sector
); );

View File

@ -15,7 +15,7 @@ struct meta_sector_t
class journal_flusher_t; class journal_flusher_t;
// Journal flusher coroutine // Journal flusher coroutine
class journal_flusher_co struct journal_flusher_co
{ {
blockstore *bs; blockstore *bs;
journal_flusher_t *flusher; journal_flusher_t *flusher;

View File

@ -42,6 +42,7 @@ int blockstore_init_meta::loop()
metadata_buffer + (prev == 1 ? bs->metadata_buf_size : 0), metadata_buffer + (prev == 1 ? bs->metadata_buf_size : 0),
bs->meta_len - metadata_read > bs->metadata_buf_size ? bs->metadata_buf_size : bs->meta_len - metadata_read, bs->meta_len - metadata_read > bs->metadata_buf_size ? bs->metadata_buf_size : bs->meta_len - metadata_read,
}; };
data->callback = [this](ring_data_t *data) { handle_event(data); };
io_uring_prep_readv(sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + metadata_read); io_uring_prep_readv(sqe, bs->meta_fd, &data->iov, 1, bs->meta_offset + metadata_read);
bs->ringloop->submit(); bs->ringloop->submit();
submitted = (prev == 1 ? 2 : 1); submitted = (prev == 1 ? 2 : 1);
@ -179,6 +180,7 @@ int blockstore_init_journal::loop()
} }
struct ring_data_t *data = ((ring_data_t*)sqe->user_data); struct ring_data_t *data = ((ring_data_t*)sqe->user_data);
data->iov = { journal_buffer, 512 }; data->iov = { journal_buffer, 512 };
data->callback = [this](ring_data_t *data) { handle_event(data); };
io_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset); io_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
bs->ringloop->submit(); bs->ringloop->submit();
step = 1; step = 1;
@ -211,6 +213,7 @@ int blockstore_init_journal::loop()
journal_buffer + (done_buf == 1 ? JOURNAL_BUFFER_SIZE : 0), journal_buffer + (done_buf == 1 ? JOURNAL_BUFFER_SIZE : 0),
end - journal_pos < JOURNAL_BUFFER_SIZE ? end - journal_pos : JOURNAL_BUFFER_SIZE, end - journal_pos < JOURNAL_BUFFER_SIZE ? end - journal_pos : JOURNAL_BUFFER_SIZE,
}; };
data->callback = [this](ring_data_t *data) { handle_event(data); };
io_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + journal_pos); io_uring_prep_readv(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + journal_pos);
bs->ringloop->submit(); bs->ringloop->submit();
submitted = done_buf == 1 ? 2 : 1; submitted = done_buf == 1 ? 2 : 1;

View File

@ -7,9 +7,9 @@ class blockstore_init_meta
uint64_t metadata_read = 0; uint64_t metadata_read = 0;
int prev = 0, prev_done = 0, done_len = 0, submitted = 0, done_cnt = 0; int prev = 0, prev_done = 0, done_len = 0, submitted = 0, done_cnt = 0;
void handle_entries(struct clean_disk_entry* entries, int count); void handle_entries(struct clean_disk_entry* entries, int count);
void handle_event(ring_data_t *data);
public: public:
blockstore_init_meta(blockstore *bs); blockstore_init_meta(blockstore *bs);
void handle_event(ring_data_t *data);
int loop(); int loop();
}; };
@ -24,8 +24,8 @@ class blockstore_init_journal
bool wrapped = false; bool wrapped = false;
int submitted = 0, done_buf = 0, done_len = 0; int submitted = 0, done_buf = 0, done_len = 0;
int handle_journal_part(void *buf, uint64_t len); int handle_journal_part(void *buf, uint64_t len);
void handle_event(ring_data_t *data);
public: public:
blockstore_init_journal(blockstore* bs); blockstore_init_journal(blockstore* bs);
void handle_event(ring_data_t *data);
int loop(); int loop();
}; };

View File

@ -47,12 +47,12 @@ int blockstore_journal_check_t::check_available(blockstore_operation *op, int re
return 1; return 1;
} }
void prepare_journal_sector_write(blockstore_operation *op, journal_t & journal, io_uring_sqe *sqe) void prepare_journal_sector_write(journal_t & journal, io_uring_sqe *sqe, std::function<void(ring_data_t*)> cb)
{ {
journal.sector_info[journal.cur_sector].usage_count++; journal.sector_info[journal.cur_sector].usage_count++;
struct ring_data_t *data = ((ring_data_t*)sqe->user_data); ring_data_t *data = ((ring_data_t*)sqe->user_data);
data->iov = (struct iovec){ journal.sector_buf + 512*journal.cur_sector, 512 }; data->iov = (struct iovec){ journal.sector_buf + 512*journal.cur_sector, 512 };
data->op = op; data->callback = cb;
io_uring_prep_writev( io_uring_prep_writev(
sqe, journal.fd, &data->iov, 1, journal.offset + journal.sector_info[journal.cur_sector].offset sqe, journal.fd, &data->iov, 1, journal.offset + journal.sector_info[journal.cur_sector].offset
); );

View File

@ -157,4 +157,4 @@ inline journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t
} }
// FIXME: make inline // FIXME: make inline
void prepare_journal_sector_write(blockstore_operation *op, journal_t & journal, io_uring_sqe *sqe); void prepare_journal_sector_write(journal_t & journal, io_uring_sqe *sqe, std::function<void(ring_data_t*)> cb);

View File

@ -31,7 +31,7 @@ int blockstore::fulfill_read_push(blockstore_operation *op, uint64_t &fulfilled,
&data->iov, 1, &data->iov, 1,
(IS_JOURNAL(item_state) ? journal.offset : data_offset) + item_location + cur_start - item_start (IS_JOURNAL(item_state) ? journal.offset : data_offset) + item_location + cur_start - item_start
); );
data->op = op; data->callback = [this, op](ring_data_t *data) { handle_read_event(data, op); };
fulfilled += cur_end-cur_start; fulfilled += cur_end-cur_start;
} }
return 1; return 1;

View File

@ -85,6 +85,7 @@ int blockstore::dequeue_stable(blockstore_operation *op)
BS_SUBMIT_GET_SQE_DECL(sqe[i]); BS_SUBMIT_GET_SQE_DECL(sqe[i]);
} }
// Prepare and submit journal entries // Prepare and submit journal entries
auto cb = [this, op](ring_data_t *data) { handle_stable_event(data, op); };
int s = 0, cur_sector = -1; int s = 0, cur_sector = -1;
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++) for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
{ {
@ -99,7 +100,7 @@ int blockstore::dequeue_stable(blockstore_operation *op)
if (cur_sector == -1) if (cur_sector == -1)
op->min_used_journal_sector = 1 + journal.cur_sector; op->min_used_journal_sector = 1 + journal.cur_sector;
cur_sector = journal.cur_sector; cur_sector = journal.cur_sector;
prepare_journal_sector_write(op, journal, sqe[s++]); prepare_journal_sector_write(journal, sqe[s++], cb);
} }
} }
op->max_used_journal_sector = 1 + journal.cur_sector; op->max_used_journal_sector = 1 + journal.cur_sector;

View File

@ -37,12 +37,13 @@ int blockstore::dequeue_sync(blockstore_operation *op)
int blockstore::continue_sync(blockstore_operation *op) int blockstore::continue_sync(blockstore_operation *op)
{ {
auto cb = [this, op](ring_data_t *data) { handle_sync_event(data, op); };
if (op->sync_state == SYNC_HAS_SMALL) if (op->sync_state == SYNC_HAS_SMALL)
{ {
// No big writes, just fsync the journal // No big writes, just fsync the journal
BS_SUBMIT_GET_SQE(sqe, data); BS_SUBMIT_GET_SQE(sqe, data);
io_uring_prep_fsync(sqe, journal.fd, 0); io_uring_prep_fsync(sqe, journal.fd, 0);
data->op = op; data->callback = cb;
op->pending_ops = 1; op->pending_ops = 1;
op->sync_state = SYNC_JOURNAL_SYNC_SENT; op->sync_state = SYNC_JOURNAL_SYNC_SENT;
} }
@ -51,7 +52,7 @@ int blockstore::continue_sync(blockstore_operation *op)
// 1st step: fsync data // 1st step: fsync data
BS_SUBMIT_GET_SQE(sqe, data); BS_SUBMIT_GET_SQE(sqe, data);
io_uring_prep_fsync(sqe, data_fd, 0); io_uring_prep_fsync(sqe, data_fd, 0);
data->op = op; data->callback = cb;
op->pending_ops = 1; op->pending_ops = 1;
op->sync_state = SYNC_DATA_SYNC_SENT; op->sync_state = SYNC_DATA_SYNC_SENT;
} }
@ -88,14 +89,14 @@ int blockstore::continue_sync(blockstore_operation *op)
if (cur_sector == -1) if (cur_sector == -1)
op->min_used_journal_sector = 1 + journal.cur_sector; op->min_used_journal_sector = 1 + journal.cur_sector;
cur_sector = journal.cur_sector; cur_sector = journal.cur_sector;
prepare_journal_sector_write(op, journal, sqe[s++]); prepare_journal_sector_write(journal, sqe[s++], cb);
} }
} }
op->max_used_journal_sector = 1 + journal.cur_sector; op->max_used_journal_sector = 1 + journal.cur_sector;
// ... And a journal fsync // ... And a journal fsync
io_uring_prep_fsync(sqe[s], journal.fd, 0); io_uring_prep_fsync(sqe[s], journal.fd, 0);
struct ring_data_t *data = ((ring_data_t*)sqe[s]->user_data); struct ring_data_t *data = ((ring_data_t*)sqe[s]->user_data);
data->op = op; data->callback = cb;
op->pending_ops = 1 + s; op->pending_ops = 1 + s;
op->sync_state = SYNC_JOURNAL_SYNC_SENT; op->sync_state = SYNC_JOURNAL_SYNC_SENT;
} }

View File

@ -58,6 +58,7 @@ void blockstore::enqueue_write(blockstore_operation *op)
// First step of the write algorithm: dequeue operation and submit initial write(s) // First step of the write algorithm: dequeue operation and submit initial write(s)
int blockstore::dequeue_write(blockstore_operation *op) int blockstore::dequeue_write(blockstore_operation *op)
{ {
auto cb = [this, op](ring_data_t *data) { handle_write_event(data, op); };
auto dirty_it = dirty_db.find((obj_ver_id){ auto dirty_it = dirty_db.find((obj_ver_id){
.oid = op->oid, .oid = op->oid,
.version = op->version, .version = op->version,
@ -94,7 +95,7 @@ int blockstore::dequeue_write(blockstore_operation *op)
vcnt = 1; vcnt = 1;
op->iov_zerofill[0] = (struct iovec){ op->buf, op->len }; op->iov_zerofill[0] = (struct iovec){ op->buf, op->len };
} }
data->op = op; data->callback = cb;
io_uring_prep_writev( io_uring_prep_writev(
sqe, data_fd, op->iov_zerofill, vcnt, data_offset + (loc << block_order) sqe, data_fd, op->iov_zerofill, vcnt, data_offset + (loc << block_order)
); );
@ -127,12 +128,12 @@ int blockstore::dequeue_write(blockstore_operation *op)
je->len = op->len; je->len = op->len;
je->crc32 = je_crc32((journal_entry*)je); je->crc32 = je_crc32((journal_entry*)je);
journal.crc32_last = je->crc32; journal.crc32_last = je->crc32;
prepare_journal_sector_write(op, journal, sqe1); prepare_journal_sector_write(journal, sqe1, cb);
op->min_used_journal_sector = op->max_used_journal_sector = 1 + journal.cur_sector; op->min_used_journal_sector = op->max_used_journal_sector = 1 + journal.cur_sector;
// Prepare journal data write // Prepare journal data write
journal.next_free = (journal.next_free + op->len) < journal.len ? journal.next_free : 512; journal.next_free = (journal.next_free + op->len) < journal.len ? journal.next_free : 512;
data2->iov = (struct iovec){ op->buf, op->len }; data2->iov = (struct iovec){ op->buf, op->len };
data2->op = op; data2->callback = cb;
io_uring_prep_writev( io_uring_prep_writev(
sqe2, journal.fd, &data2->iov, 1, journal.offset + journal.next_free sqe2, journal.fd, &data2->iov, 1, journal.offset + journal.next_free
); );

View File

@ -52,7 +52,6 @@ void ring_loop_t::unregister_consumer(int number)
{ {
if (number < consumers.size()) if (number < consumers.size())
{ {
consumers[number].handle_event = NULL;
consumers[number].loop = NULL; consumers[number].loop = NULL;
} }
} }
@ -67,14 +66,9 @@ void ring_loop_t::loop(bool sleep)
while ((io_uring_peek_cqe(ring, &cqe), cqe)) while ((io_uring_peek_cqe(ring, &cqe), cqe))
{ {
struct ring_data_t *d = (struct ring_data_t*)cqe->user_data; struct ring_data_t *d = (struct ring_data_t*)cqe->user_data;
if (d->source < consumers.size()) if (d->callback)
{ {
d->res = cqe->res; d->callback(d);
ring_consumer_t & c = consumers[d->source];
if (c.handle_event != NULL)
{
c.handle_event(d);
}
} }
io_uring_cqe_seen(ring, cqe); io_uring_cqe_seen(ring, cqe);
} }

View File

@ -15,13 +15,12 @@ struct ring_data_t
uint64_t source; uint64_t source;
struct iovec iov; // for single-entry read/write operations struct iovec iov; // for single-entry read/write operations
int res; int res;
void *op; std::function<void(ring_data_t*)> callback;
}; };
struct ring_consumer_t struct ring_consumer_t
{ {
int number; int number;
std::function<void(ring_data_t*)> handle_event;
std::function<void(void)> loop; std::function<void(void)> loop;
}; };