forked from vitalif/vitastor
Add ring_loop
parent
c959948c82
commit
351366d228
2
Makefile
2
Makefile
|
@ -1,4 +1,4 @@
|
||||||
all: allocator.o blockstore.o blockstore_init.o blockstore_open.o blockstore_read.o crc32c.o test
|
all: allocator.o blockstore.o blockstore_init.o blockstore_open.o blockstore_read.o crc32c.o ringloop.o test
|
||||||
crc32c.o: crc32c.c
|
crc32c.o: crc32c.c
|
||||||
gcc -c -o $@ $<
|
gcc -c -o $@ $<
|
||||||
%.o: %.cpp
|
%.o: %.cpp
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
blockstore::blockstore(spp::sparse_hash_map<std::string, std::string> & config, io_uring *ring)
|
blockstore::blockstore(spp::sparse_hash_map<std::string, std::string> & config, io_uring *ring)
|
||||||
{
|
{
|
||||||
this->ring = ring;
|
this->ring = ring;
|
||||||
|
ring_data = (struct ring_data_t*)malloc(sizeof(ring_data_t) * ring->sq.ring_sz);
|
||||||
initialized = 0;
|
initialized = 0;
|
||||||
block_order = stoull(config["block_size_order"]);
|
block_order = stoull(config["block_size_order"]);
|
||||||
block_size = 1 << block_order;
|
block_size = 1 << block_order;
|
||||||
|
@ -35,6 +36,7 @@ blockstore::blockstore(spp::sparse_hash_map<std::string, std::string> & config,
|
||||||
|
|
||||||
blockstore::~blockstore()
|
blockstore::~blockstore()
|
||||||
{
|
{
|
||||||
|
free(ring_data);
|
||||||
if (data_fd >= 0)
|
if (data_fd >= 0)
|
||||||
close(data_fd);
|
close(data_fd);
|
||||||
if (meta_fd >= 0 && meta_fd != data_fd)
|
if (meta_fd >= 0 && meta_fd != data_fd)
|
||||||
|
@ -43,6 +45,16 @@ blockstore::~blockstore()
|
||||||
close(journal_fd);
|
close(journal_fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct io_uring_sqe* blockstore::get_sqe()
|
||||||
|
{
|
||||||
|
struct io_uring_sqe* sqe = io_uring_get_sqe(ring);
|
||||||
|
if (sqe)
|
||||||
|
{
|
||||||
|
io_uring_sqe_set_data(sqe, ring_data + (sqe - ring->sq.sqes));
|
||||||
|
}
|
||||||
|
return sqe;
|
||||||
|
}
|
||||||
|
|
||||||
// must be called in the event loop until it returns 0
|
// must be called in the event loop until it returns 0
|
||||||
int blockstore::init_loop()
|
int blockstore::init_loop()
|
||||||
{
|
{
|
||||||
|
@ -74,3 +86,27 @@ int blockstore::init_loop()
|
||||||
journal_init_reader = NULL;
|
journal_init_reader = NULL;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// main event loop
|
||||||
|
int blockstore::main_loop()
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
struct io_uring_cqe *cqe;
|
||||||
|
io_uring_peek_cqe(ring, &cqe);
|
||||||
|
if (cqe)
|
||||||
|
{
|
||||||
|
struct ring_data *d = cqe->user_data;
|
||||||
|
if (d->source == SRC_BLOCKSTORE)
|
||||||
|
{
|
||||||
|
handle_event();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// someone else
|
||||||
|
}
|
||||||
|
io_uring_cqe_seen(ring, cqe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
17
blockstore.h
17
blockstore.h
|
@ -140,6 +140,13 @@ struct blockstore_operation
|
||||||
uint64_t wait_version;
|
uint64_t wait_version;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*struct ring_data_t
|
||||||
|
{
|
||||||
|
uint64_t source;
|
||||||
|
struct iovec iov; // for single-entry read/write operations
|
||||||
|
void *op;
|
||||||
|
};*/
|
||||||
|
|
||||||
class blockstore;
|
class blockstore;
|
||||||
|
|
||||||
#include "blockstore_init.h"
|
#include "blockstore_init.h"
|
||||||
|
@ -167,6 +174,9 @@ public:
|
||||||
uint32_t journal_crc32_last;
|
uint32_t journal_crc32_last;
|
||||||
|
|
||||||
struct io_uring *ring;
|
struct io_uring *ring;
|
||||||
|
struct ring_data_t *ring_data;
|
||||||
|
|
||||||
|
struct io_uring_sqe* get_sqe();
|
||||||
|
|
||||||
blockstore(spp::sparse_hash_map<std::string, std::string> & config, struct io_uring *ring);
|
blockstore(spp::sparse_hash_map<std::string, std::string> & config, struct io_uring *ring);
|
||||||
~blockstore();
|
~blockstore();
|
||||||
|
@ -183,10 +193,13 @@ public:
|
||||||
blockstore_init_journal* journal_init_reader;
|
blockstore_init_journal* journal_init_reader;
|
||||||
int init_loop();
|
int init_loop();
|
||||||
|
|
||||||
|
// Event loop
|
||||||
|
int main_loop();
|
||||||
|
|
||||||
// Read
|
// Read
|
||||||
int read(blockstore_operation *read_op);
|
int read(blockstore_operation *read_op);
|
||||||
int fulfill_read(blockstore_operation & read_op, uint32_t item_start, uint32_t item_end,
|
int fulfill_read(blockstore_operation *read_op, uint32_t item_start, uint32_t item_end,
|
||||||
uint32_t item_state, uint64_t item_version, uint64_t item_location);
|
uint32_t item_state, uint64_t item_version, uint64_t item_location);
|
||||||
int fulfill_read_push(blockstore_operation & read_op, uint32_t item_start,
|
int fulfill_read_push(blockstore_operation *read_op, uint32_t item_start,
|
||||||
uint32_t item_state, uint64_t item_version, uint64_t item_location, uint32_t cur_start, uint32_t cur_end);
|
uint32_t item_state, uint64_t item_version, uint64_t item_location, uint32_t cur_start, uint32_t cur_end);
|
||||||
};
|
};
|
||||||
|
|
|
@ -32,6 +32,7 @@ int blockstore_init_meta::read_loop()
|
||||||
done_len = cqe->res;
|
done_len = cqe->res;
|
||||||
metadata_read += cqe->res;
|
metadata_read += cqe->res;
|
||||||
submitted = 0;
|
submitted = 0;
|
||||||
|
io_uring_cqe_seen(bs->ring, cqe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!submitted)
|
if (!submitted)
|
||||||
|
@ -160,6 +161,7 @@ int blockstore_init_journal::read_loop()
|
||||||
crc32_last = je->crc32_replaced;
|
crc32_last = je->crc32_replaced;
|
||||||
step = 2;
|
step = 2;
|
||||||
}
|
}
|
||||||
|
io_uring_cqe_seen(bs->ring, cqe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (step == 2 || step == 3)
|
if (step == 2 || step == 3)
|
||||||
|
@ -189,6 +191,7 @@ int blockstore_init_journal::read_loop()
|
||||||
wrapped = true;
|
wrapped = true;
|
||||||
}
|
}
|
||||||
submitted = 0;
|
submitted = 0;
|
||||||
|
io_uring_cqe_seen(bs->ring, cqe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!submitted)
|
if (!submitted)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#include "blockstore.h"
|
#include "blockstore.h"
|
||||||
|
|
||||||
int blockstore::fulfill_read_push(blockstore_operation & read_op, uint32_t item_start,
|
int blockstore::fulfill_read_push(blockstore_operation *read_op, uint32_t item_start,
|
||||||
uint32_t item_state, uint64_t item_version, uint64_t item_location, uint32_t cur_start, uint32_t cur_end)
|
uint32_t item_state, uint64_t item_version, uint64_t item_location, uint32_t cur_start, uint32_t cur_end)
|
||||||
{
|
{
|
||||||
if (cur_end > cur_start)
|
if (cur_end > cur_start)
|
||||||
|
@ -8,49 +8,50 @@ int blockstore::fulfill_read_push(blockstore_operation & read_op, uint32_t item_
|
||||||
if (item_state == ST_IN_FLIGHT)
|
if (item_state == ST_IN_FLIGHT)
|
||||||
{
|
{
|
||||||
// Pause until it's written somewhere
|
// Pause until it's written somewhere
|
||||||
read_op.wait_for = WAIT_IN_FLIGHT;
|
read_op->wait_for = WAIT_IN_FLIGHT;
|
||||||
read_op.wait_version = item_version;
|
read_op->wait_version = item_version;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
else if (item_state == ST_DEL_WRITTEN || item_state == ST_DEL_SYNCED || item_state == ST_DEL_MOVED)
|
else if (item_state == ST_DEL_WRITTEN || item_state == ST_DEL_SYNCED || item_state == ST_DEL_MOVED)
|
||||||
{
|
{
|
||||||
// item is unallocated - return zeroes
|
// item is unallocated - return zeroes
|
||||||
memset(read_op.buf + cur_start - read_op.offset, 0, cur_end - cur_start);
|
memset(read_op->buf + cur_start - read_op->offset, 0, cur_end - cur_start);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
struct io_uring_sqe *sqe = io_uring_get_sqe(ring);
|
struct io_uring_sqe *sqe = get_sqe();
|
||||||
if (!sqe)
|
if (!sqe)
|
||||||
{
|
{
|
||||||
// Pause until there are more requests available
|
// Pause until there are more requests available
|
||||||
read_op.wait_for = WAIT_SQE;
|
read_op->wait_for = WAIT_SQE;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
read_op.read_vec[cur_start] = (struct iovec){
|
read_op->read_vec[cur_start] = (struct iovec){
|
||||||
read_op.buf + cur_start - read_op.offset,
|
read_op->buf + cur_start - read_op->offset,
|
||||||
cur_end - cur_start
|
cur_end - cur_start
|
||||||
};
|
};
|
||||||
|
// Тут 2 вопроса - 1) куда сохранить iovec 2) как потом сопоставить i/o и cqe
|
||||||
io_uring_prep_readv(
|
io_uring_prep_readv(
|
||||||
sqe,
|
sqe,
|
||||||
IS_JOURNAL(item_state) ? journal_fd : data_fd,
|
IS_JOURNAL(item_state) ? journal_fd : data_fd,
|
||||||
// FIXME: &read_op.read_vec is forbidden
|
// FIXME: &read_op->read_vec is forbidden
|
||||||
&read_op.read_vec[cur_start], 1,
|
&read_op->read_vec[cur_start], 1,
|
||||||
(IS_JOURNAL(item_state) ? journal_offset : data_offset) + item_location + cur_start - item_start
|
(IS_JOURNAL(item_state) ? journal_offset : data_offset) + item_location + cur_start - item_start
|
||||||
);
|
);
|
||||||
io_uring_sqe_set_data(sqe, 0/*read op link*/);
|
((ring_data_t*)(sqe->user_data))->op = read_op;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int blockstore::fulfill_read(blockstore_operation & read_op, uint32_t item_start, uint32_t item_end,
|
int blockstore::fulfill_read(blockstore_operation *read_op, uint32_t item_start, uint32_t item_end,
|
||||||
uint32_t item_state, uint64_t item_version, uint64_t item_location)
|
uint32_t item_state, uint64_t item_version, uint64_t item_location)
|
||||||
{
|
{
|
||||||
uint32_t cur_start = item_start;
|
uint32_t cur_start = item_start;
|
||||||
if (cur_start < read_op.offset + read_op.len && item_end > read_op.offset)
|
if (cur_start < read_op->offset + read_op->len && item_end > read_op->offset)
|
||||||
{
|
{
|
||||||
cur_start = cur_start < read_op.offset ? read_op.offset : cur_start;
|
cur_start = cur_start < read_op->offset ? read_op->offset : cur_start;
|
||||||
item_end = item_end > read_op.offset + read_op.len ? read_op.offset + read_op.len : item_end;
|
item_end = item_end > read_op->offset + read_op->len ? read_op->offset + read_op->len : item_end;
|
||||||
auto fulfill_near = read_op.read_vec.lower_bound(cur_start);
|
auto fulfill_near = read_op->read_vec.lower_bound(cur_start);
|
||||||
if (fulfill_near != read_op.read_vec.begin())
|
if (fulfill_near != read_op->read_vec.begin())
|
||||||
{
|
{
|
||||||
fulfill_near--;
|
fulfill_near--;
|
||||||
if (fulfill_near->first + fulfill_near->second.iov_len <= cur_start)
|
if (fulfill_near->first + fulfill_near->second.iov_len <= cur_start)
|
||||||
|
@ -58,7 +59,7 @@ int blockstore::fulfill_read(blockstore_operation & read_op, uint32_t item_start
|
||||||
fulfill_near++;
|
fulfill_near++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (fulfill_near != read_op.read_vec.end() && fulfill_near->first < item_end)
|
while (fulfill_near != read_op->read_vec.end() && fulfill_near->first < item_end)
|
||||||
{
|
{
|
||||||
if (fulfill_read_push(read_op, item_start, item_state, item_version, item_location, cur_start, fulfill_near->first) < 0)
|
if (fulfill_read_push(read_op, item_start, item_state, item_version, item_location, cur_start, fulfill_near->first) < 0)
|
||||||
{
|
{
|
||||||
|
@ -95,7 +96,7 @@ int blockstore::read(blockstore_operation *read_op)
|
||||||
{
|
{
|
||||||
if (read_op->flags == OP_READ_DIRTY || IS_STABLE(dirty[i].state))
|
if (read_op->flags == OP_READ_DIRTY || IS_STABLE(dirty[i].state))
|
||||||
{
|
{
|
||||||
if (fulfill_read(*read_op, dirty[i].offset, dirty[i].offset + dirty[i].size, dirty[i].state, dirty[i].version, dirty[i].location) < 0)
|
if (fulfill_read(read_op, dirty[i].offset, dirty[i].offset + dirty[i].size, dirty[i].state, dirty[i].version, dirty[i].location) < 0)
|
||||||
{
|
{
|
||||||
// need to wait for something, undo added requests and requeue op
|
// need to wait for something, undo added requests and requeue op
|
||||||
ring->sq.sqe_tail = prev_sqe_pos;
|
ring->sq.sqe_tail = prev_sqe_pos;
|
||||||
|
@ -108,7 +109,7 @@ int blockstore::read(blockstore_operation *read_op)
|
||||||
}
|
}
|
||||||
if (clean_it != object_db.end())
|
if (clean_it != object_db.end())
|
||||||
{
|
{
|
||||||
if (fulfill_read(*read_op, 0, block_size, ST_CURRENT, 0, clean_it->second.location) < 0)
|
if (fulfill_read(read_op, 0, block_size, ST_CURRENT, 0, clean_it->second.location) < 0)
|
||||||
{
|
{
|
||||||
// need to wait for something, undo added requests and requeue op
|
// need to wait for something, undo added requests and requeue op
|
||||||
ring->sq.sqe_tail = prev_sqe_pos;
|
ring->sq.sqe_tail = prev_sqe_pos;
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
#include "ringloop.h"
|
||||||
|
|
||||||
|
ring_loop_t::ring_loop_t(int qd)
|
||||||
|
{
|
||||||
|
int ret = io_uring_queue_init(qd, ring, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
{
|
||||||
|
throw new std::runtime_error(std::string("io_uring_queue_init: ") + strerror(-ret));
|
||||||
|
}
|
||||||
|
ring_data = (struct ring_data_t*)malloc(sizeof(ring_data_t) * ring->sq.ring_sz);
|
||||||
|
if (!ring_data)
|
||||||
|
{
|
||||||
|
throw new std::bad_alloc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ring_loop_t::~ring_loop_t()
|
||||||
|
{
|
||||||
|
free(ring_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct io_uring_sqe* ring_loop_t::get_sqe()
|
||||||
|
{
|
||||||
|
struct io_uring_sqe* sqe = io_uring_get_sqe(ring);
|
||||||
|
if (sqe)
|
||||||
|
{
|
||||||
|
io_uring_sqe_set_data(sqe, ring_data + (sqe - ring->sq.sqes));
|
||||||
|
}
|
||||||
|
return sqe;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ring_loop_t::register_consumer(ring_consumer_t & consumer)
|
||||||
|
{
|
||||||
|
consumer.number = consumers.size();
|
||||||
|
consumers.push_back(consumer);
|
||||||
|
return consumer.number;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ring_loop_t::unregister_consumer(int number)
|
||||||
|
{
|
||||||
|
if (number < consumers.size())
|
||||||
|
{
|
||||||
|
consumers[number].handle_event = NULL;
|
||||||
|
consumers[number].loop = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ring_loop_t::loop(bool sleep)
|
||||||
|
{
|
||||||
|
struct io_uring_cqe *cqe;
|
||||||
|
if (sleep)
|
||||||
|
{
|
||||||
|
io_uring_wait_cqe(ring, &cqe);
|
||||||
|
}
|
||||||
|
while ((io_uring_peek_cqe(ring, &cqe), cqe))
|
||||||
|
{
|
||||||
|
struct ring_data_t *d = (struct ring_data_t*)cqe->user_data;
|
||||||
|
if (d->source < consumers.size())
|
||||||
|
{
|
||||||
|
d->res = cqe->res;
|
||||||
|
ring_consumer_t & c = consumers[d->source];
|
||||||
|
if (c.handle_event != NULL)
|
||||||
|
{
|
||||||
|
c.handle_event(d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
io_uring_cqe_seen(ring, cqe);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < consumers.size(); i++)
|
||||||
|
{
|
||||||
|
consumers[i].loop();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef _LARGEFILE64_SOURCE
|
||||||
|
#define _LARGEFILE64_SOURCE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <liburing.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
struct ring_data_t
|
||||||
|
{
|
||||||
|
uint64_t source;
|
||||||
|
struct iovec iov; // for single-entry read/write operations
|
||||||
|
int res;
|
||||||
|
void *op;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ring_consumer_t
|
||||||
|
{
|
||||||
|
int number;
|
||||||
|
std::function<void (ring_data_t*)> handle_event;
|
||||||
|
std::function<void ()> loop;
|
||||||
|
};
|
||||||
|
|
||||||
|
class ring_loop_t
|
||||||
|
{
|
||||||
|
std::vector<ring_consumer_t> consumers;
|
||||||
|
struct ring_data_t *ring_data;
|
||||||
|
public:
|
||||||
|
struct io_uring *ring;
|
||||||
|
ring_loop_t(int qd);
|
||||||
|
~ring_loop_t();
|
||||||
|
struct io_uring_sqe* get_sqe();
|
||||||
|
int register_consumer(ring_consumer_t & consumer);
|
||||||
|
void unregister_consumer(int number);
|
||||||
|
void loop(bool sleep);
|
||||||
|
};
|
1
test.cpp
1
test.cpp
|
@ -41,6 +41,7 @@ static void test_write(struct io_uring *ring, int fd)
|
||||||
printf("cqe failed: %d %s\n", ret, strerror(-ret));
|
printf("cqe failed: %d %s\n", ret, strerror(-ret));
|
||||||
else
|
else
|
||||||
printf("result: %d\n", ret);
|
printf("result: %d\n", ret);
|
||||||
|
io_uring_cqe_seen(ring, cqe);
|
||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue