2019-11-03 01:34:29 +03:00
|
|
|
#include "blockstore.h"
|
2019-11-01 02:47:57 +03:00
|
|
|
|
2019-11-05 02:43:21 +03:00
|
|
|
blockstore::blockstore(spp::sparse_hash_map<std::string, std::string> & config, ring_loop_t *ringloop)
|
2019-11-01 02:47:57 +03:00
|
|
|
{
|
2019-11-05 02:43:21 +03:00
|
|
|
this->ringloop = ringloop;
|
|
|
|
ring_consumer.handle_event = [this](ring_data_t *d) { handle_event(d); };
|
|
|
|
ring_consumer.loop = [this]() { loop(); };
|
|
|
|
ringloop->register_consumer(ring_consumer);
|
2019-11-03 01:34:29 +03:00
|
|
|
initialized = 0;
|
|
|
|
block_order = stoull(config["block_size_order"]);
|
|
|
|
block_size = 1 << block_order;
|
|
|
|
if (block_size <= 1 || block_size >= MAX_BLOCK_SIZE)
|
2019-11-01 02:47:57 +03:00
|
|
|
{
|
2019-11-03 01:34:29 +03:00
|
|
|
throw new std::runtime_error("Bad block size");
|
2019-10-31 13:49:46 +03:00
|
|
|
}
|
2019-11-03 01:34:29 +03:00
|
|
|
data_fd = meta_fd = journal_fd = -1;
|
|
|
|
try
|
2019-10-31 13:49:46 +03:00
|
|
|
{
|
2019-11-03 01:34:29 +03:00
|
|
|
open_data(config);
|
|
|
|
open_meta(config);
|
|
|
|
open_journal(config);
|
|
|
|
calc_lengths(config);
|
|
|
|
data_alloc = allocator_create(block_count);
|
|
|
|
if (!data_alloc)
|
|
|
|
throw new std::bad_alloc();
|
2019-10-31 13:49:46 +03:00
|
|
|
}
|
2019-11-03 01:34:29 +03:00
|
|
|
catch (std::exception & e)
|
2019-10-31 13:49:46 +03:00
|
|
|
{
|
|
|
|
if (data_fd >= 0)
|
|
|
|
close(data_fd);
|
|
|
|
if (meta_fd >= 0 && meta_fd != data_fd)
|
|
|
|
close(meta_fd);
|
|
|
|
if (journal_fd >= 0 && journal_fd != meta_fd)
|
|
|
|
close(journal_fd);
|
2019-11-03 01:34:29 +03:00
|
|
|
throw e;
|
2019-10-31 13:49:46 +03:00
|
|
|
}
|
2019-11-03 01:34:29 +03:00
|
|
|
}
|
2019-10-31 13:49:46 +03:00
|
|
|
|
2019-11-03 01:34:29 +03:00
|
|
|
blockstore::~blockstore()
|
|
|
|
{
|
2019-11-05 02:43:21 +03:00
|
|
|
ringloop->unregister_consumer(ring_consumer.number);
|
2019-11-03 01:34:29 +03:00
|
|
|
if (data_fd >= 0)
|
|
|
|
close(data_fd);
|
|
|
|
if (meta_fd >= 0 && meta_fd != data_fd)
|
|
|
|
close(meta_fd);
|
|
|
|
if (journal_fd >= 0 && journal_fd != meta_fd)
|
|
|
|
close(journal_fd);
|
|
|
|
}
|
|
|
|
|
2019-11-05 02:43:21 +03:00
|
|
|
// main event loop - handle requests
|
|
|
|
void blockstore::handle_event(ring_data_t *data)
|
2019-11-05 02:12:04 +03:00
|
|
|
{
|
2019-11-06 19:27:48 +03:00
|
|
|
if (initialized != 10)
|
2019-11-05 02:12:04 +03:00
|
|
|
{
|
2019-11-05 02:43:21 +03:00
|
|
|
if (metadata_init_reader)
|
|
|
|
{
|
|
|
|
metadata_init_reader->handle_event(data);
|
|
|
|
}
|
|
|
|
else if (journal_init_reader)
|
|
|
|
{
|
|
|
|
journal_init_reader->handle_event(data);
|
|
|
|
}
|
2019-11-03 01:34:29 +03:00
|
|
|
}
|
2019-11-05 02:43:21 +03:00
|
|
|
else
|
2019-11-03 01:34:29 +03:00
|
|
|
{
|
2019-11-05 14:10:23 +03:00
|
|
|
struct blockstore_operation* op = (struct blockstore_operation*)data->op;
|
|
|
|
if ((op->flags & OP_TYPE_MASK) == OP_READ_DIRTY ||
|
|
|
|
(op->flags & OP_TYPE_MASK) == OP_READ)
|
|
|
|
{
|
|
|
|
op->pending_ops--;
|
|
|
|
if (data->res < 0)
|
|
|
|
{
|
|
|
|
// read error
|
|
|
|
op->retval = data->res;
|
|
|
|
}
|
|
|
|
if (op->pending_ops == 0)
|
|
|
|
{
|
|
|
|
if (op->retval == 0)
|
|
|
|
op->retval = op->len;
|
|
|
|
op->callback(op);
|
|
|
|
in_process_ops.erase(op);
|
|
|
|
}
|
|
|
|
}
|
2019-10-31 13:49:46 +03:00
|
|
|
}
|
2019-11-03 01:34:29 +03:00
|
|
|
}
|
2019-11-05 02:12:04 +03:00
|
|
|
|
2019-11-05 02:43:21 +03:00
|
|
|
// main event loop - produce requests
|
|
|
|
void blockstore::loop()
|
2019-11-05 02:12:04 +03:00
|
|
|
{
|
2019-11-05 02:43:21 +03:00
|
|
|
if (initialized != 10)
|
2019-11-05 02:12:04 +03:00
|
|
|
{
|
2019-11-05 02:43:21 +03:00
|
|
|
// read metadata, then journal
|
|
|
|
if (initialized == 0)
|
|
|
|
{
|
|
|
|
metadata_init_reader = new blockstore_init_meta(this);
|
|
|
|
initialized = 1;
|
|
|
|
}
|
|
|
|
else if (initialized == 1)
|
2019-11-05 02:12:04 +03:00
|
|
|
{
|
2019-11-05 02:43:21 +03:00
|
|
|
int res = metadata_init_reader->loop();
|
|
|
|
if (!res)
|
2019-11-05 02:12:04 +03:00
|
|
|
{
|
2019-11-05 02:43:21 +03:00
|
|
|
delete metadata_init_reader;
|
|
|
|
metadata_init_reader = NULL;
|
|
|
|
journal_init_reader = new blockstore_init_journal(this);
|
|
|
|
initialized = 2;
|
2019-11-05 02:12:04 +03:00
|
|
|
}
|
2019-11-05 02:43:21 +03:00
|
|
|
}
|
|
|
|
else if (initialized == 2)
|
|
|
|
{
|
|
|
|
int res = journal_init_reader->loop();
|
|
|
|
if (!res)
|
2019-11-05 02:12:04 +03:00
|
|
|
{
|
2019-11-05 02:43:21 +03:00
|
|
|
delete journal_init_reader;
|
|
|
|
journal_init_reader = NULL;
|
|
|
|
initialized = 10;
|
2019-11-05 02:12:04 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-11-05 02:43:21 +03:00
|
|
|
else
|
|
|
|
{
|
2019-11-06 19:27:48 +03:00
|
|
|
// try to submit ops
|
|
|
|
auto op = submit_queue.begin();
|
|
|
|
while (op != submit_queue.end())
|
|
|
|
{
|
|
|
|
auto cur = op++;
|
|
|
|
if (((*cur)->flags & OP_TYPE_MASK) == OP_READ_DIRTY ||
|
|
|
|
((*cur)->flags & OP_TYPE_MASK) == OP_READ)
|
|
|
|
{
|
|
|
|
int dequeue_op = dequeue_read(*cur);
|
|
|
|
if (dequeue_op)
|
|
|
|
{
|
|
|
|
submit_queue.erase(cur);
|
|
|
|
}
|
|
|
|
else if ((*cur)->wait_for == WAIT_SQE)
|
|
|
|
{
|
|
|
|
// ring is full, stop submission
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2019-11-07 02:24:12 +03:00
|
|
|
else if (((*cur)->flags & OP_TYPE_MASK) == OP_WRITE ||
|
|
|
|
((*cur)->flags & OP_TYPE_MASK) == OP_DELETE)
|
|
|
|
{
|
|
|
|
int dequeue_op = dequeue_write(*cur);
|
|
|
|
if (dequeue_op)
|
|
|
|
{
|
|
|
|
submit_queue.erase(cur);
|
|
|
|
}
|
|
|
|
else if ((*cur)->wait_for == WAIT_SQE)
|
|
|
|
{
|
|
|
|
// ring is full, stop submission
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (((*cur)->flags & OP_TYPE_MASK) == OP_SYNC)
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
else if (((*cur)->flags & OP_TYPE_MASK) == OP_STABLE)
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
2019-11-06 19:27:48 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int blockstore::enqueue_op(blockstore_operation *op)
|
|
|
|
{
|
2019-11-07 02:24:12 +03:00
|
|
|
if (op->offset >= block_size || op->len >= block_size-op->offset ||
|
|
|
|
(op->len % DISK_ALIGNMENT) ||
|
|
|
|
(op->flags & OP_TYPE_MASK) < OP_READ || (op->flags & OP_TYPE_MASK) > OP_DELETE)
|
2019-11-06 19:27:48 +03:00
|
|
|
{
|
2019-11-07 02:24:12 +03:00
|
|
|
// Basic verification not passed
|
2019-11-06 19:27:48 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
submit_queue.push_back(op);
|
|
|
|
if ((op->flags & OP_TYPE_MASK) == OP_WRITE)
|
|
|
|
{
|
|
|
|
// Assign version number
|
|
|
|
auto dirty_it = dirty_queue.find(op->oid);
|
|
|
|
if (dirty_it != dirty_queue.end())
|
|
|
|
{
|
|
|
|
op->version = (*dirty_it).back().version + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto clean_it = object_db.find(op->oid);
|
|
|
|
if (clean_it != object_db.end())
|
|
|
|
{
|
|
|
|
op->version = (*clean_it).version + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
op->version = 1;
|
|
|
|
}
|
|
|
|
dirty_it = dirty_queue.emplace(op->oid, dirty_list()).first;
|
|
|
|
}
|
|
|
|
// Immediately add the operation into the dirty queue, so subsequent reads could see it
|
|
|
|
(*dirty_it).push_back((dirty_entry){
|
|
|
|
.version = op->version,
|
|
|
|
.state = ST_IN_FLIGHT,
|
|
|
|
.flags = 0,
|
|
|
|
.location = 0,
|
|
|
|
.offset = op->offset,
|
|
|
|
.size = op->len,
|
|
|
|
});
|
2019-11-05 02:43:21 +03:00
|
|
|
}
|
2019-11-06 19:27:48 +03:00
|
|
|
return 0;
|
2019-11-05 02:12:04 +03:00
|
|
|
}
|