vitastor/src/qemu_driver.c

546 lines
14 KiB
C
Raw Normal View History

// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
2020-06-25 11:59:31 +03:00
// QEMU block driver
#define BUILD_DSO
2020-06-25 11:59:31 +03:00
#define _GNU_SOURCE
#include "qemu/osdep.h"
#include "block/block_int.h"
#include "qapi/error.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qerror.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "qemu/option.h"
#if QEMU_VERSION_MAJOR >= 3
#include "qemu/units.h"
#include "block/qdict.h"
2020-06-25 11:59:31 +03:00
#include "qemu/cutils.h"
#else
#include "qapi/qmp/qint.h"
#define qdict_put_int(options, name, num_val) qdict_put_obj(options, name, QOBJECT(qint_from_int(num_val)))
#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
#define qobject_unref QDECREF
#endif
2020-06-25 11:59:31 +03:00
#include "qemu_proxy.h"
void qemu_module_dummy(void)
{
}
void DSO_STAMP_FUN(void)
{
}
2020-08-03 23:50:50 +03:00
typedef struct VitastorClient
2020-06-25 11:59:31 +03:00
{
void *proxy;
void *watch;
char *etcd_host;
char *etcd_prefix;
char *image;
2020-06-25 11:59:31 +03:00
uint64_t inode;
uint64_t pool;
2020-06-25 11:59:31 +03:00
uint64_t size;
long readonly;
2020-06-25 11:59:31 +03:00
QemuMutex mutex;
2020-08-03 23:50:50 +03:00
} VitastorClient;
2020-06-25 11:59:31 +03:00
2020-08-03 23:50:50 +03:00
typedef struct VitastorRPC
2020-06-25 11:59:31 +03:00
{
BlockDriverState *bs;
Coroutine *co;
QEMUIOVector *iov;
long ret;
2020-06-25 11:59:31 +03:00
int complete;
2020-08-03 23:50:50 +03:00
} VitastorRPC;
2020-06-25 11:59:31 +03:00
static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
static void vitastor_co_generic_bh_cb(long retval, void *opaque);
static void vitastor_close(BlockDriverState *bs);
2020-06-25 11:59:31 +03:00
static char *qemu_rbd_next_tok(char *src, char delim, char **p)
{
char *end;
*p = NULL;
for (end = src; *end; ++end)
{
if (*end == delim)
break;
if (*end == '\\' && end[1] != '\0')
end++;
}
if (*end == delim)
{
*p = end + 1;
*end = '\0';
}
return src;
}
static void qemu_rbd_unescape(char *src)
{
char *p;
for (p = src; *src; ++src, ++p)
{
if (*src == '\\' && src[1] != '\0')
src++;
*p = *src;
}
*p = '\0';
}
2020-08-03 23:50:50 +03:00
// vitastor[:key=value]*
// vitastor:etcd_host=127.0.0.1:inode=1:pool=1
2020-08-03 23:50:50 +03:00
static void vitastor_parse_filename(const char *filename, QDict *options, Error **errp)
2020-06-25 11:59:31 +03:00
{
const char *start;
char *p, *buf;
2020-08-03 23:50:50 +03:00
if (!strstart(filename, "vitastor:", &start))
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
error_setg(errp, "File name must start with 'vitastor:'");
2020-06-25 11:59:31 +03:00
return;
}
buf = g_strdup(start);
p = buf;
// The following are all key/value pairs
while (p)
{
char *name, *value;
name = qemu_rbd_next_tok(p, '=', &p);
if (!p)
{
error_setg(errp, "conf option %s has no value", name);
break;
}
qemu_rbd_unescape(name);
value = qemu_rbd_next_tok(p, ':', &p);
qemu_rbd_unescape(value);
if (!strcmp(name, "inode") || !strcmp(name, "pool") || !strcmp(name, "size"))
2020-06-25 11:59:31 +03:00
{
unsigned long long num_val;
if (parse_uint_full(value, &num_val, 0))
{
error_setg(errp, "Illegal %s: %s", name, value);
goto out;
}
qdict_put_int(options, name, num_val);
}
else
{
qdict_put_str(options, name, value);
}
}
if (!qdict_get_try_str(options, "image"))
2020-06-25 11:59:31 +03:00
{
if (!qdict_get_try_int(options, "inode", 0))
{
error_setg(errp, "one of image (name) and inode (number) must be specified");
goto out;
}
if (!(qdict_get_try_int(options, "inode", 0) >> (64-POOL_ID_BITS)) &&
!qdict_get_try_int(options, "pool", 0))
{
error_setg(errp, "pool number must be specified or included in the inode number");
goto out;
}
if (!qdict_get_try_int(options, "size", 0))
{
error_setg(errp, "size must be specified when inode number is used instead of image name");
goto out;
}
2020-06-25 11:59:31 +03:00
}
if (!qdict_get_str(options, "etcd_host"))
2020-06-25 11:59:31 +03:00
{
error_setg(errp, "etcd_host is missing");
goto out;
}
out:
g_free(buf);
return;
}
static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
{
BlockDriverState *bs = task->bs;
VitastorClient *client = bs->opaque;
task->co = qemu_coroutine_self();
qemu_mutex_lock(&client->mutex);
vitastor_proxy_watch_metadata(client->proxy, client->image, vitastor_co_generic_bh_cb, task);
qemu_mutex_unlock(&client->mutex);
while (!task->complete)
{
qemu_coroutine_yield();
}
}
2020-08-03 23:50:50 +03:00
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
VitastorClient *client = bs->opaque;
2020-06-25 11:59:31 +03:00
int64_t ret = 0;
qemu_mutex_init(&client->mutex);
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd_host"));
client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd_prefix"));
2020-08-03 23:50:50 +03:00
client->proxy = vitastor_proxy_create(bdrv_get_aio_context(bs), client->etcd_host, client->etcd_prefix);
client->image = g_strdup(qdict_get_try_str(options, "image"));
client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
if (client->image)
{
// Get image metadata (size and readonly flag)
VitastorRPC task;
task.complete = 0;
task.bs = bs;
if (qemu_in_coroutine())
{
vitastor_co_get_metadata(&task);
}
else
{
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
qemu_coroutine_enter(qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
}
BDRV_POLL_WHILE(bs, !task.complete);
client->watch = (void*)task.ret;
client->readonly = client->readonly || vitastor_proxy_get_readonly(client->watch);
client->size = vitastor_proxy_get_size(client->watch);
if (!vitastor_proxy_get_inode_num(client->watch))
{
error_setg(errp, "image does not exist");
vitastor_close(bs);
}
if (!client->size)
{
client->size = qdict_get_int(options, "size");
}
}
else
{
client->watch = NULL;
client->inode = qdict_get_int(options, "inode");
client->pool = qdict_get_int(options, "pool");
if (client->pool)
{
client->inode = (client->inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
}
client->size = qdict_get_int(options, "size");
}
if (!client->size)
{
error_setg(errp, "image size not specified");
vitastor_close(bs);
return -1;
}
2020-06-25 11:59:31 +03:00
bs->total_sectors = client->size / BDRV_SECTOR_SIZE;
//client->aio_context = bdrv_get_aio_context(bs);
qdict_del(options, "etcd_host");
qdict_del(options, "etcd_prefix");
qdict_del(options, "image");
qdict_del(options, "inode");
qdict_del(options, "pool");
qdict_del(options, "size");
2020-06-25 11:59:31 +03:00
return ret;
}
2020-08-03 23:50:50 +03:00
static void vitastor_close(BlockDriverState *bs)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
VitastorClient *client = bs->opaque;
vitastor_proxy_destroy(client->proxy);
qemu_mutex_destroy(&client->mutex);
g_free(client->etcd_host);
if (client->etcd_prefix)
g_free(client->etcd_prefix);
if (client->image)
g_free(client->image);
2020-06-25 11:59:31 +03:00
}
#if QEMU_VERSION_MAJOR >= 3
2020-08-03 23:50:50 +03:00
static int vitastor_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
2020-06-25 11:59:31 +03:00
{
bsz->phys = 4096;
bsz->log = 4096;
return 0;
}
#endif
2020-06-25 11:59:31 +03:00
2020-09-20 01:46:07 +03:00
static int coroutine_fn vitastor_co_create_opts(
#if QEMU_VERSION_MAJOR >= 4
BlockDriver *drv,
#endif
const char *url, QemuOpts *opts, Error **errp)
2020-06-25 11:59:31 +03:00
{
QDict *options;
int ret;
options = qdict_new();
2020-08-03 23:50:50 +03:00
vitastor_parse_filename(url, options, errp);
if (*errp)
2020-06-25 11:59:31 +03:00
{
ret = -1;
goto out;
}
2020-08-03 23:50:50 +03:00
// inodes don't require creation in Vitastor. FIXME: They will when there will be some metadata
2020-06-25 11:59:31 +03:00
ret = 0;
out:
qobject_unref(options);
return ret;
}
#if QEMU_VERSION_MAJOR >= 3
2020-09-20 01:46:07 +03:00
static int coroutine_fn vitastor_co_truncate(BlockDriverState *bs, int64_t offset,
#if QEMU_VERSION_MAJOR >= 4
bool exact,
#endif
PreallocMode prealloc,
#if QEMU_VERSION_MAJOR >= 5 && QEMU_VERSION_MINOR >= 1 || QEMU_VERSION_MAJOR > 5
BdrvRequestFlags flags,
#endif
Error **errp)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
VitastorClient *client = bs->opaque;
2020-06-25 11:59:31 +03:00
if (prealloc != PREALLOC_MODE_OFF)
{
error_setg(errp, "Unsupported preallocation mode '%s'", PreallocMode_str(prealloc));
return -ENOTSUP;
}
// TODO: Resize inode to <offset> bytes
client->size = offset / BDRV_SECTOR_SIZE;
return 0;
}
#endif
2020-06-25 11:59:31 +03:00
2020-08-03 23:50:50 +03:00
static int vitastor_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2020-06-25 11:59:31 +03:00
{
bdi->cluster_size = 4096;
return 0;
}
2020-08-03 23:50:50 +03:00
static int64_t vitastor_getlength(BlockDriverState *bs)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
VitastorClient *client = bs->opaque;
2020-06-25 11:59:31 +03:00
return client->size;
}
#if QEMU_VERSION_MAJOR >= 3
2020-08-03 23:50:50 +03:00
static void vitastor_refresh_limits(BlockDriverState *bs, Error **errp)
#else
static int vitastor_refresh_limits(BlockDriverState *bs)
#endif
{
#if QEMU_VERSION_MAJOR >= 4
bs->bl.request_alignment = 4096;
bs->bl.min_mem_alignment = 4096;
#else
bs->request_alignment = 4096;
#endif
bs->bl.opt_mem_alignment = 4096;
#if QEMU_VERSION_MAJOR < 3
return 0;
#endif
}
2020-08-03 23:50:50 +03:00
static int64_t vitastor_get_allocated_file_size(BlockDriverState *bs)
2020-06-25 11:59:31 +03:00
{
return 0;
}
2020-08-03 23:50:50 +03:00
static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
*task = (VitastorRPC) {
2020-06-25 11:59:31 +03:00
.co = qemu_coroutine_self(),
.bs = bs,
};
}
static void vitastor_co_generic_bh_cb(long retval, void *opaque)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
VitastorRPC *task = opaque;
2020-06-25 11:59:31 +03:00
task->ret = retval;
task->complete = 1;
if (qemu_coroutine_self() != task->co)
{
#if QEMU_VERSION_MAJOR >= 3
aio_co_wake(task->co);
#else
qemu_coroutine_enter(task->co, NULL);
qemu_aio_release(task);
#endif
}
2020-06-25 11:59:31 +03:00
}
2020-08-03 23:50:50 +03:00
static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
VitastorClient *client = bs->opaque;
VitastorRPC task;
vitastor_co_init_task(bs, &task);
2020-06-25 11:59:31 +03:00
task.iov = iov;
uint64_t inode = client->watch ? vitastor_proxy_get_inode_num(client->watch) : client->inode;
2020-06-25 11:59:31 +03:00
qemu_mutex_lock(&client->mutex);
vitastor_proxy_rw(0, client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
2020-06-25 11:59:31 +03:00
qemu_mutex_unlock(&client->mutex);
while (!task.complete)
{
qemu_coroutine_yield();
}
return task.ret;
}
2020-08-03 23:50:50 +03:00
static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
VitastorClient *client = bs->opaque;
VitastorRPC task;
vitastor_co_init_task(bs, &task);
2020-06-25 11:59:31 +03:00
task.iov = iov;
uint64_t inode = client->watch ? vitastor_proxy_get_inode_num(client->watch) : client->inode;
2020-06-25 11:59:31 +03:00
qemu_mutex_lock(&client->mutex);
vitastor_proxy_rw(1, client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
2020-06-25 11:59:31 +03:00
qemu_mutex_unlock(&client->mutex);
while (!task.complete)
{
qemu_coroutine_yield();
}
return task.ret;
}
#if QEMU_VERSION_MAJOR < 3
static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
{
return vitastor_co_preadv(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0);
}
static int coroutine_fn vitastor_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
{
return vitastor_co_pwritev(bs, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, iov, 0);
}
#endif
2020-08-03 23:50:50 +03:00
static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
VitastorClient *client = bs->opaque;
VitastorRPC task;
vitastor_co_init_task(bs, &task);
2020-06-25 11:59:31 +03:00
qemu_mutex_lock(&client->mutex);
2020-08-03 23:50:50 +03:00
vitastor_proxy_sync(client->proxy, vitastor_co_generic_bh_cb, &task);
2020-06-25 11:59:31 +03:00
qemu_mutex_unlock(&client->mutex);
while (!task.complete)
{
qemu_coroutine_yield();
}
return task.ret;
}
#if QEMU_VERSION_MAJOR >= 3
2020-08-03 23:50:50 +03:00
static QemuOptsList vitastor_create_opts = {
.name = "vitastor-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(vitastor_create_opts.head),
2020-06-25 11:59:31 +03:00
.desc = {
{
.name = BLOCK_OPT_SIZE,
.type = QEMU_OPT_SIZE,
.help = "Virtual disk size"
},
{ /* end of list */ }
}
};
#else
static QEMUOptionParameter vitastor_create_opts[] = {
{
.name = BLOCK_OPT_SIZE,
.type = OPT_SIZE,
.help = "Virtual disk size"
},
{ NULL }
};
#endif
2020-06-25 11:59:31 +03:00
2020-08-03 23:50:50 +03:00
static const char *vitastor_strong_runtime_opts[] = {
"inode",
"pool",
"etcd_host",
"etcd_prefix",
2020-06-25 11:59:31 +03:00
NULL
};
2020-08-03 23:50:50 +03:00
static BlockDriver bdrv_vitastor = {
.format_name = "vitastor",
.protocol_name = "vitastor",
2020-06-25 11:59:31 +03:00
2020-08-03 23:50:50 +03:00
.instance_size = sizeof(VitastorClient),
.bdrv_parse_filename = vitastor_parse_filename,
2020-06-25 11:59:31 +03:00
.bdrv_has_zero_init = bdrv_has_zero_init_1,
2020-08-03 23:50:50 +03:00
.bdrv_get_info = vitastor_get_info,
.bdrv_getlength = vitastor_getlength,
#if QEMU_VERSION_MAJOR >= 3
2020-08-03 23:50:50 +03:00
.bdrv_probe_blocksizes = vitastor_probe_blocksizes,
#endif
2020-08-03 23:50:50 +03:00
.bdrv_refresh_limits = vitastor_refresh_limits,
2020-06-25 11:59:31 +03:00
// FIXME: Implement it along with per-inode statistics
2020-08-03 23:50:50 +03:00
//.bdrv_get_allocated_file_size = vitastor_get_allocated_file_size,
2020-06-25 11:59:31 +03:00
2020-08-03 23:50:50 +03:00
.bdrv_file_open = vitastor_file_open,
.bdrv_close = vitastor_close,
2020-06-25 11:59:31 +03:00
// Option list for the create operation
#if QEMU_VERSION_MAJOR >= 3
2020-08-03 23:50:50 +03:00
.create_opts = &vitastor_create_opts,
#else
.create_options = vitastor_create_opts,
#endif
2020-06-25 11:59:31 +03:00
// For qmp_blockdev_create(), used by the qemu monitor / QAPI
// Requires patching QAPI IDL, thus unimplemented
2020-08-03 23:50:50 +03:00
//.bdrv_co_create = vitastor_co_create,
2020-06-25 11:59:31 +03:00
#if QEMU_VERSION_MAJOR >= 3
2020-06-25 11:59:31 +03:00
// For bdrv_create(), used by qemu-img
2020-08-03 23:50:50 +03:00
.bdrv_co_create_opts = vitastor_co_create_opts,
2020-06-25 11:59:31 +03:00
2020-08-03 23:50:50 +03:00
.bdrv_co_truncate = vitastor_co_truncate,
2020-06-25 11:59:31 +03:00
2020-08-03 23:50:50 +03:00
.bdrv_co_preadv = vitastor_co_preadv,
.bdrv_co_pwritev = vitastor_co_pwritev,
#else
.bdrv_co_readv = vitastor_co_readv,
.bdrv_co_writev = vitastor_co_writev,
#endif
2020-08-03 23:50:50 +03:00
.bdrv_co_flush_to_disk = vitastor_co_flush,
2020-06-25 11:59:31 +03:00
2020-09-20 01:46:07 +03:00
#if QEMU_VERSION_MAJOR >= 4
2020-08-03 23:50:50 +03:00
.strong_runtime_opts = vitastor_strong_runtime_opts,
2020-09-20 01:46:07 +03:00
#endif
2020-06-25 11:59:31 +03:00
};
2020-08-03 23:50:50 +03:00
static void vitastor_block_init(void)
2020-06-25 11:59:31 +03:00
{
2020-08-03 23:50:50 +03:00
bdrv_register(&bdrv_vitastor);
2020-06-25 11:59:31 +03:00
}
2020-08-03 23:50:50 +03:00
block_init(vitastor_block_init);