vitastor/blockstore.cpp

243 lines
7.1 KiB
C++
Raw Normal View History

#include "blockstore.h"
2019-11-01 02:47:57 +03:00
2019-11-05 02:43:21 +03:00
blockstore::blockstore(spp::sparse_hash_map<std::string, std::string> & config, ring_loop_t *ringloop)
2019-11-01 02:47:57 +03:00
{
2019-11-05 02:43:21 +03:00
this->ringloop = ringloop;
ring_consumer.handle_event = [this](ring_data_t *d) { handle_event(d); };
ring_consumer.loop = [this]() { loop(); };
ringloop->register_consumer(ring_consumer);
initialized = 0;
block_order = stoull(config["block_size_order"]);
block_size = 1 << block_order;
if (block_size <= 1 || block_size >= MAX_BLOCK_SIZE)
2019-11-01 02:47:57 +03:00
{
throw new std::runtime_error("Bad block size");
2019-10-31 13:49:46 +03:00
}
data_fd = meta_fd = journal.fd = -1;
try
2019-10-31 13:49:46 +03:00
{
open_data(config);
open_meta(config);
open_journal(config);
calc_lengths(config);
data_alloc = allocator_create(block_count);
if (!data_alloc)
throw new std::bad_alloc();
2019-10-31 13:49:46 +03:00
}
catch (std::exception & e)
2019-10-31 13:49:46 +03:00
{
if (data_fd >= 0)
close(data_fd);
if (meta_fd >= 0 && meta_fd != data_fd)
close(meta_fd);
if (journal.fd >= 0 && journal.fd != meta_fd)
close(journal.fd);
throw e;
2019-10-31 13:49:46 +03:00
}
}
2019-10-31 13:49:46 +03:00
blockstore::~blockstore()
{
2019-11-05 02:43:21 +03:00
ringloop->unregister_consumer(ring_consumer.number);
if (data_fd >= 0)
close(data_fd);
if (meta_fd >= 0 && meta_fd != data_fd)
close(meta_fd);
if (journal.fd >= 0 && journal.fd != meta_fd)
close(journal.fd);
free(journal.sector_buf);
free(journal.sector_info);
}
2019-11-05 02:43:21 +03:00
// main event loop - handle requests
void blockstore::handle_event(ring_data_t *data)
2019-11-05 02:12:04 +03:00
{
if (initialized != 10)
2019-11-05 02:12:04 +03:00
{
2019-11-05 02:43:21 +03:00
if (metadata_init_reader)
{
metadata_init_reader->handle_event(data);
}
else if (journal_init_reader)
{
journal_init_reader->handle_event(data);
}
}
2019-11-05 02:43:21 +03:00
else
{
struct blockstore_operation* op = (struct blockstore_operation*)data->op;
if ((op->flags & OP_TYPE_MASK) == OP_READ_DIRTY ||
(op->flags & OP_TYPE_MASK) == OP_READ)
{
op->pending_ops--;
if (data->res < 0)
{
// read error
op->retval = data->res;
}
if (op->pending_ops == 0)
{
if (op->retval == 0)
op->retval = op->len;
op->callback(op);
in_process_ops.erase(op);
}
}
else if ((op->flags & OP_TYPE_MASK) == OP_WRITE ||
(op->flags & OP_TYPE_MASK) == OP_DELETE)
{
op->pending_ops--;
if (data->res < 0)
{
// write error
// FIXME: our state becomes corrupted after a write error. maybe do something better than just die
throw new std::runtime_error("write operation failed. in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111");
op->retval = data->res;
}
if (op->used_journal_sector > 0)
{
uint64_t s = op->used_journal_sector-1;
if (journal.sector_info[s].usage_count > 0)
{
// The last write to this journal sector was made by this op, release the buffer
journal.sector_info[s].usage_count--;
}
op->used_journal_sector = 0;
}
if (op->pending_ops == 0)
{
}
}
2019-10-31 13:49:46 +03:00
}
}
2019-11-05 02:12:04 +03:00
2019-11-05 02:43:21 +03:00
// main event loop - produce requests
void blockstore::loop()
2019-11-05 02:12:04 +03:00
{
2019-11-05 02:43:21 +03:00
if (initialized != 10)
2019-11-05 02:12:04 +03:00
{
2019-11-05 02:43:21 +03:00
// read metadata, then journal
if (initialized == 0)
{
metadata_init_reader = new blockstore_init_meta(this);
initialized = 1;
}
else if (initialized == 1)
2019-11-05 02:12:04 +03:00
{
2019-11-05 02:43:21 +03:00
int res = metadata_init_reader->loop();
if (!res)
2019-11-05 02:12:04 +03:00
{
2019-11-05 02:43:21 +03:00
delete metadata_init_reader;
metadata_init_reader = NULL;
journal_init_reader = new blockstore_init_journal(this);
initialized = 2;
2019-11-05 02:12:04 +03:00
}
2019-11-05 02:43:21 +03:00
}
else if (initialized == 2)
{
int res = journal_init_reader->loop();
if (!res)
2019-11-05 02:12:04 +03:00
{
2019-11-05 02:43:21 +03:00
delete journal_init_reader;
journal_init_reader = NULL;
initialized = 10;
2019-11-05 02:12:04 +03:00
}
}
}
2019-11-05 02:43:21 +03:00
else
{
// try to submit ops
auto op = submit_queue.begin();
while (op != submit_queue.end())
{
auto cur = op++;
if (((*cur)->flags & OP_TYPE_MASK) == OP_READ_DIRTY ||
((*cur)->flags & OP_TYPE_MASK) == OP_READ)
{
int dequeue_op = dequeue_read(*cur);
if (dequeue_op)
{
submit_queue.erase(cur);
}
else if ((*cur)->wait_for == WAIT_SQE)
{
// ring is full, stop submission
break;
}
}
2019-11-07 02:24:12 +03:00
else if (((*cur)->flags & OP_TYPE_MASK) == OP_WRITE ||
((*cur)->flags & OP_TYPE_MASK) == OP_DELETE)
{
int dequeue_op = dequeue_write(*cur);
if (dequeue_op)
{
submit_queue.erase(cur);
}
else if ((*cur)->wait_for == WAIT_SQE)
{
// ring is full, stop submission
break;
}
}
else if (((*cur)->flags & OP_TYPE_MASK) == OP_SYNC)
{
}
else if (((*cur)->flags & OP_TYPE_MASK) == OP_STABLE)
{
}
}
}
}
int blockstore::enqueue_op(blockstore_operation *op)
{
2019-11-07 02:24:12 +03:00
if (op->offset >= block_size || op->len >= block_size-op->offset ||
(op->len % DISK_ALIGNMENT) ||
(op->flags & OP_TYPE_MASK) < OP_READ || (op->flags & OP_TYPE_MASK) > OP_DELETE)
{
2019-11-07 02:24:12 +03:00
// Basic verification not passed
return -EINVAL;
}
submit_queue.push_back(op);
if ((op->flags & OP_TYPE_MASK) == OP_WRITE)
{
// Assign version number
auto dirty_it = dirty_db.upper_bound((obj_ver_id){
.oid = op->oid,
.version = UINT64_MAX,
});
dirty_it--;
if (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
{
op->version = dirty_it->first.version + 1;
}
else
{
auto clean_it = object_db.find(op->oid);
if (clean_it != object_db.end())
{
op->version = clean_it->second.version + 1;
}
else
{
op->version = 1;
}
}
// Immediately add the operation into dirty_db, so subsequent reads could see it
dirty_db.emplace((obj_ver_id){
.oid = op->oid,
.version = op->version,
}, (dirty_entry){
.state = ST_IN_FLIGHT,
.flags = 0,
.location = 0,
.offset = op->offset,
.size = op->len,
});
2019-11-05 02:43:21 +03:00
}
return 0;
2019-11-05 02:12:04 +03:00
}