From 7dba1148e7f050230776cbb968d660c5eca9b795 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sat, 29 Jan 2022 23:43:22 +0300 Subject: [PATCH] Add Hugo-based (https://gohugo.io) documentation --- docs/gen-docs.js | 55 +++++ docs/hugo/archetypes/default.md | 6 + docs/hugo/config.yaml | 35 +++ docs/hugo/content/_index.md | 6 + docs/hugo/content/config/_index.en.md | 61 +++++ docs/hugo/content/config/_index.ru.md | 63 +++++ docs/hugo/content/config/pool.en.md | 178 +++++++++++++++ docs/hugo/content/installation/packages.md | 41 ++++ docs/hugo/content/installation/quickstart.md | 72 ++++++ docs/hugo/content/installation/source.md | 54 +++++ docs/hugo/content/introduction/_index.md | 4 + .../hugo/content/introduction/architecture.md | 73 ++++++ docs/hugo/content/introduction/author.md | 34 +++ docs/hugo/content/introduction/features.md | 60 +++++ docs/hugo/content/performance/comparison1.md | 93 ++++++++ docs/hugo/content/performance/theoretical.md | 46 ++++ docs/hugo/content/performance/tuning.md | 6 + .../hugo/content/performance/understanding.md | 52 +++++ docs/hugo/content/usage/cli.md | 183 +++++++++++++++ docs/hugo/content/usage/nbd.md | 20 ++ docs/hugo/content/usage/qemu.md | 39 ++++ docs/hugo/i18n/ru.yaml | 37 +++ docs/hugo/layouts/partials/site-footer.html | 34 +++ docs/hugo/static/brand.svg | 215 ++++++++++++++++++ docs/hugo/static/custom.css | 138 +++++++++++ docs/hugo/static/favicon/favicon-16x16.png | Bin 0 -> 709 bytes docs/hugo/static/favicon/favicon-32x32.png | Bin 0 -> 1491 bytes docs/hugo/static/favicon/favicon.svg | 196 ++++++++++++++++ docs/params/head/common.en.md | 6 + docs/params/head/common.ru.md | 6 + docs/params/head/layout-cluster.en.md | 7 + docs/params/head/layout-cluster.ru.md | 7 + docs/params/head/layout-osd.en.md | 7 + docs/params/head/layout-osd.ru.md | 8 + docs/params/head/monitor.en.md | 6 + docs/params/head/monitor.ru.md | 6 + docs/params/head/network.en.md | 7 + docs/params/head/network.ru.md | 7 + docs/params/head/osd.en.md | 7 + docs/params/head/osd.ru.md | 8 + docs/params/osd.yml | 4 + 41 files changed, 1887 insertions(+) create mode 100755 docs/gen-docs.js create mode 100644 docs/hugo/archetypes/default.md create mode 100644 docs/hugo/config.yaml create mode 100644 docs/hugo/content/_index.md create mode 100644 docs/hugo/content/config/_index.en.md create mode 100644 docs/hugo/content/config/_index.ru.md create mode 100644 docs/hugo/content/config/pool.en.md create mode 100644 docs/hugo/content/installation/packages.md create mode 100644 docs/hugo/content/installation/quickstart.md create mode 100644 docs/hugo/content/installation/source.md create mode 100644 docs/hugo/content/introduction/_index.md create mode 100644 docs/hugo/content/introduction/architecture.md create mode 100644 docs/hugo/content/introduction/author.md create mode 100644 docs/hugo/content/introduction/features.md create mode 100644 docs/hugo/content/performance/comparison1.md create mode 100644 docs/hugo/content/performance/theoretical.md create mode 100644 docs/hugo/content/performance/tuning.md create mode 100644 docs/hugo/content/performance/understanding.md create mode 100644 docs/hugo/content/usage/cli.md create mode 100644 docs/hugo/content/usage/nbd.md create mode 100644 docs/hugo/content/usage/qemu.md create mode 100644 docs/hugo/i18n/ru.yaml create mode 100644 docs/hugo/layouts/partials/site-footer.html create mode 100644 docs/hugo/static/brand.svg create mode 100644 docs/hugo/static/custom.css create mode 100644 docs/hugo/static/favicon/favicon-16x16.png create mode 100644 docs/hugo/static/favicon/favicon-32x32.png create mode 100644 docs/hugo/static/favicon/favicon.svg create mode 100644 docs/params/head/common.en.md create mode 100644 docs/params/head/common.ru.md create mode 100644 docs/params/head/layout-cluster.en.md create mode 100644 docs/params/head/layout-cluster.ru.md create mode 100644 docs/params/head/layout-osd.en.md create mode 100644 docs/params/head/layout-osd.ru.md create mode 100644 docs/params/head/monitor.en.md create mode 100644 docs/params/head/monitor.ru.md create mode 100644 docs/params/head/network.en.md create mode 100644 docs/params/head/network.ru.md create mode 100644 docs/params/head/osd.en.md create mode 100644 docs/params/head/osd.ru.md diff --git a/docs/gen-docs.js b/docs/gen-docs.js new file mode 100755 index 00000000..7b71cbf1 --- /dev/null +++ b/docs/gen-docs.js @@ -0,0 +1,55 @@ +#!/usr/bin/nodejs + +const fs = require('fs'); +const yaml = require('yaml'); + +const L = { + en: {}, + ru: { + Type: 'Тип', + Default: 'Значение по умолчанию', + Minimum: 'Минимальное значение', + }, +}; +const types = { + en: { + string: 'string', + bool: 'boolean', + int: 'integer', + sec: 'seconds', + ms: 'milliseconds', + us: 'microseconds', + }, + ru: { + string: 'строка', + bool: 'булево (да/нет)', + int: 'целое число', + sec: 'секунды', + ms: 'миллисекунды', + us: 'микросекунды', + }, +}; +const params_files = fs.readdirSync(__dirname+'/params') + .filter(f => f.substr(-4) == '.yml') + .map(f => f.substr(0, f.length-4)); + +for (const file of params_files) +{ + const cfg = yaml.parse(fs.readFileSync(__dirname+'/params/'+file+'.yml', { encoding: 'utf-8' })); + for (const lang in types) + { + let out = '\n\n{{< toc >}}'; + for (const c of cfg) + { + out += `\n\n## ${c.name}\n\n`; + out += `- ${L[lang]['Type'] || 'Type'}: ${c["type_"+lang] || types[lang][c.type] || c.type}\n`; + if (c.default !== undefined) + out += `- ${L[lang]['Default'] || 'Default'}: ${c.default}\n`; + if (c.min !== undefined) + out += `- ${L[lang]['Minimum'] || 'Minimum'}: ${c.min}\n`; + out += `\n`+(c["info_"+lang] || c["info"]).replace(/\s+$/, ''); + } + const head = fs.readFileSync(__dirname+'/params/head/'+file+'.'+lang+'.md', { encoding: 'utf-8' }); + fs.writeFileSync(__dirname+'/hugo/content/config/'+file+'.'+lang+'.md', head.replace(/\s+$/, '')+out+"\n"); + } +} diff --git a/docs/hugo/archetypes/default.md b/docs/hugo/archetypes/default.md new file mode 100644 index 00000000..00e77bd7 --- /dev/null +++ b/docs/hugo/archetypes/default.md @@ -0,0 +1,6 @@ +--- +title: "{{ replace .Name "-" " " | title }}" +date: {{ .Date }} +draft: true +--- + diff --git a/docs/hugo/config.yaml b/docs/hugo/config.yaml new file mode 100644 index 00000000..0203f425 --- /dev/null +++ b/docs/hugo/config.yaml @@ -0,0 +1,35 @@ +baseURL: http://localhost +title: Vitastor +theme: hugo-geekdoc +#languageCode: en-us + +pluralizeListTitles: false + +# Geekdoc required configuration +pygmentsUseClasses: true +pygmentsCodeFences: true +disablePathToLower: true + +# Required if you want to render robots.txt template +enableRobotsTXT: true + +defaultContentLanguage: en +languages: + en: + weight: 1 + languageName: English + ru: + weight: 1 + languageName: Русский + +markup: + goldmark: + renderer: + # Needed for mermaid shortcode + unsafe: true + tableOfContents: + startLevel: 1 + endLevel: 9 + +taxonomies: + tag: tags diff --git a/docs/hugo/content/_index.md b/docs/hugo/content/_index.md new file mode 100644 index 00000000..aefdd528 --- /dev/null +++ b/docs/hugo/content/_index.md @@ -0,0 +1,6 @@ +## The Idea + +Vitastor is a small, simple and fast clustered block storage (storage for VM drives), +architecturally similar to Ceph which means strong consistency, primary-replication, +symmetric clustering and automatic data distribution over any number of drives +of any size with configurable redundancy (replication or erasure codes/XOR). diff --git a/docs/hugo/content/config/_index.en.md b/docs/hugo/content/config/_index.en.md new file mode 100644 index 00000000..c41adae0 --- /dev/null +++ b/docs/hugo/content/config/_index.en.md @@ -0,0 +1,61 @@ +--- +title: Parameter Reference +weight: 1 +--- + +Vitastor configuration consists of: +- Configuration parameters (key-value), described here +- [Pool configuration]({{< ref "config/pool" >}}) +- OSD placement tree configuration +- Inode configuration i.e. image metadata like name, size and parent reference + +Configuration parameters can be set in 3 places: +- Configuration file (`/etc/vitastor/vitastor.conf` or other path) +- etcd key `/vitastor/config/global`. Most variables can be set there, but etcd + connection parameters should obviously be set in the configuration file. +- Command line of Vitastor components: OSD, mon, fio and QEMU options, + OpenStack/Proxmox/etc configuration. The latter doesn't allow to set all + variables directly, but it allows to override the configuration file and + set everything you need inside it. + +In the future, additional configuration methods may be added: +- OSD superblock which will, by design, contain parameters related to the disk + layout and to one specific OSD. +- OSD-specific keys in etcd like `/vitastor/config/osd/`. + +## Common Parameters + +These are the most common parameters which apply to all components of Vitastor. + +[See the list]({{< ref "common" >}}) + +## Cluster-Wide Disk Layout Parameters + +These parameters apply to clients and OSDs and can't be changed after OSD +initialization. + +[See the list]({{< ref "layout-cluster" >}}) + +## OSD Disk Layout Parameters + +These parameters apply to OSDs and can't be changed after OSD initialization. + +[See the list]({{< ref "layout-osd" >}}) + +## Network Protocol Parameters + +These parameters apply to clients and OSDs and can be changed with a restart. + +[See the list]({{< ref "network" >}}) + +## Runtime OSD Parameters + +These parameters apply to OSDs and can be changed with an OSD restart. + +[See the list]({{< ref "osd" >}}) + +## Monitor Parameters + +These parameters only apply to Monitors. + +[See the list]({{< ref "monitor" >}}) diff --git a/docs/hugo/content/config/_index.ru.md b/docs/hugo/content/config/_index.ru.md new file mode 100644 index 00000000..f998828a --- /dev/null +++ b/docs/hugo/content/config/_index.ru.md @@ -0,0 +1,63 @@ +--- +title: Перечень настроек +weight: 1 +--- + +Конфигурация Vitastor состоит из: +- Параметров (ключ-значение), описанных на данной странице +- Настроек пулов +- Настроек дерева OSD +- Настроек инодов, т.е. метаданных образов, таких, как имя, размер и ссылки на + родительский образ + +Параметры конфигурации могут задаваться в 3 местах: +- Файле конфигурации (`/etc/vitastor/vitastor.conf` или по другому пути) +- Ключе в etcd `/vitastor/config/global`. Большая часть параметров может + задаваться там, кроме, естественно, самих параметров соединения с etcd, + которые должны задаваться в файле конфигурации +- В командной строке компонентов Vitastor: OSD, монитора, опциях fio и QEMU, + настроек OpenStack, Proxmox и т.п. Последние, как правило, не включают полный + набор параметров напрямую, но разрешают определить путь к файлу конфигурации + и задать любые параметры в нём. + +В будущем также могут быть добавлены другие способы конфигурации: +- Суперблок OSD, в котором будут храниться параметры OSD, связанные с дисковым + форматом и с этим конкретным OSD. +- OSD-специфичные ключи в etcd типа `/vitastor/config/osd/<номер>`. + +## Общие параметры + +Это наиболее общие параметры, используемые всеми компонентами Vitastor. + +[Посмотреть список]({{< ref "common" >}}) + +## Дисковые параметры уровня кластера + +Эти параметры используются клиентами и OSD и не могут быть изменены после +инициализации OSD. + +[Посмотреть список]({{< ref "layout-cluster" >}}) + +## Дисковые параметры OSD + +Эти параметры используются OSD и не могут быть изменены после инициализации OSD. + +[Посмотреть список]({{< ref "layout-osd" >}}) + +## Параметры сетевого протокола + +Эти параметры используются клиентами и OSD и могут быть изменены с перезапуском. + +[Посмотреть список]({{< ref "network" >}}) + +## Изменяемые параметры OSD + +Эти параметры используются OSD и могут быть изменены с перезапуском. + +[Посмотреть список]({{< ref "osd" >}}) + +## Параметры мониторов + +Данные параметры используются только мониторами Vitastor. + +[Посмотреть список]({{< ref "monitor" >}}) diff --git a/docs/hugo/content/config/pool.en.md b/docs/hugo/content/config/pool.en.md new file mode 100644 index 00000000..d8164163 --- /dev/null +++ b/docs/hugo/content/config/pool.en.md @@ -0,0 +1,178 @@ +--- +title: Pool configuration +weight: 100 +--- + +Pool configuration is set in etcd key `/vitastor/config/pools` in the following +JSON format: + +``` +{ + "": { + "name": "", + ...other parameters... + } +} +``` + +{{< toc >}} + +# Parameters + +## name + +- Type: string +- Required + +Pool name. + +## scheme + +- Type: string +- Required +- One of: "replicated", "xor" or "jerasure" + +Redundancy scheme used for data in this pool. + +## pg_size + +- Type: integer +- Required + +Total number of disks for PGs of this pool - i.e., number of replicas for +replicated pools and number of data plus parity disks for EC/XOR pools. + +## parity_chunks + +- Type: integer + +Number of parity chunks for EC/XOR pools. For such pools, data will be lost +if you lose more than parity_chunks disks at once, so this parameter can be +equally described as FTT (number of failures to tolerate). + +Required for EC/XOR pools, ignored for replicated pools. + +## pg_minsize + +- Type: integer +- Required + +Number of available live disks for PGs of this pool to remain active. +That is, if it becomes impossible to place PG data on at least (pg_minsize) +OSDs, PG is deactivated for both read and write. So you know that a fresh +write always goes to at least (pg_minsize) OSDs (disks). + +FIXME: pg_minsize behaviour may be changed in the future to only make PGs +read-only instead of deactivating them. + +## pg_count + +- Type: integer +- Required + +Number of PGs for this pool. The value should be big enough for the monitor / +LP solver to be able to optimize data placement. + +"Enough" is usually around 64-128 PGs per OSD, i.e. you set pg_count for pool +to (total OSD count * 100 / pg_size). You can round it to the closest power of 2, +because it makes it easier to reduce or increase PG count later by dividing or +multiplying it by 2. + +In Vitastor, PGs are ephemeral, so you can change pool PG count anytime just +by overwriting pool configuration in etcd. Amount of the data affected by +rebalance will be smaller if the new PG count is a multiple of the old PG count +or vice versa. + +## failure_domain + +- Type: string +- Default: host + +Failure domain specification. Must be "host" or "osd" or refer to one of the +placement tree levels, defined in [placement_levels]({{< ref "config/monitor#placement_levels" >}}). + +Two replicas, or two parts in case of EC/XOR, of the same block of data are +never put on OSDs in the same failure domain (for example, on the same host). +So failure domain specifies the unit which failure you are protecting yourself +from. + +## max_osd_combinations + +- Type: integer +- Default: 10000 + +Vitastor data placement algorithm is based on the LP solver and OSD combinations +which are fed to it are generated ramdonly. This parameter specifies the maximum +number of combinations to generate when optimising PG placement. + +This parameter usually doesn't require to be changed. + +## pg_stripe_size + +- Type: integer +- Default: 0 + +Specifies the stripe size for this pool according to which images are split into +different PGs. Stripe size can't be smaller than [block_size]({{< ref "config/layout-cluster#block_size" >}}) +multiplied by (pg_size - parity_chunks) for EC/XOR pools, or 1 for replicated pools, +and the same value is used by default. + +This means first `pg_stripe_size = (block_size * (pg_size-parity_chunks))` bytes +of an image go to one PG, next `pg_stripe_size` bytes go to another PG and so on. + +Usually doesn't require to be changed separately from the block size. + +## root_node + +- Type: string + +Specifies the root node of the OSD tree to restrict this pool OSDs to. +Referenced root node must exist in /vitastor/config/node_placement. + +## osd_tags + +- Type: string or array of strings + +Specifies OSD tags to restrict this pool to. If multiple tags are specified, +only OSDs having all of these tags will be used for this pool. + +## primary_affinity_tags + +- Type: string or array of strings + +Specifies OSD tags to prefer putting primary OSDs in this pool to. +Note that for EC/XOR pools Vitastor always prefers to put primary OSD on one +of the OSDs containing a data chunk for a PG. + +# Examples + +## Replicated pool + +``` +{ + "1": { + "name":"testpool", + "scheme":"replicated", + "pg_size":2, + "pg_minsize":1, + "pg_count":256, + "failure_domain":"host" + } +} +``` + +## Erasure-coded pool + +``` +{ + "2": { + "name":"ecpool", + "scheme":"jerasure", + "pg_size":3, + "parity_chunks":1, + "pg_minsize":2, + "pg_count":256, + "failure_domain":"host" + } +} +``` diff --git a/docs/hugo/content/installation/packages.md b/docs/hugo/content/installation/packages.md new file mode 100644 index 00000000..d381535b --- /dev/null +++ b/docs/hugo/content/installation/packages.md @@ -0,0 +1,41 @@ +--- +title: Packages +weight: 2 +--- + +## Debian + +- Trust Vitastor package signing key: + `wget -q -O - https://vitastor.io/debian/pubkey | sudo apt-key add -` +- Add Vitastor package repository to your /etc/apt/sources.list: + - Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main` + - Debian 10 (Buster): `deb https://vitastor.io/debian buster main` +- For Debian 10 (Buster) also enable backports repository: + `deb http://deb.debian.org/debian buster-backports main` +- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu` + +## CentOS + +- Add Vitastor package repository: + - CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm` + - CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm` +- Enable EPEL: `yum/dnf install epel-release` +- Enable additional CentOS repositories: + - CentOS 7: `yum install centos-release-scl` + - CentOS 8: `dnf install centos-release-advanced-virtualization` +- Enable elrepo-kernel: + - CentOS 7: `yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm` + - CentOS 8: `dnf install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm` +- Install packages: `yum/dnf install vitastor lpsolve etcd kernel-ml qemu-kvm` + +## Installation requirements + +- Linux kernel 5.4 or newer, for io_uring support. 5.8 or later is highly + recommended because io_uring is a relatively new technology and there is + at least one bug which reproduces with io_uring and HP SmartArray + controllers in 5.4 +- liburing 0.4 or newer +- lp_solve +- etcd 3.4.15 or newer. Earlier versions won't work because of various bugs, + for example [#12402](https://github.com/etcd-io/etcd/pull/12402). +- node.js 10 or newer diff --git a/docs/hugo/content/installation/quickstart.md b/docs/hugo/content/installation/quickstart.md new file mode 100644 index 00000000..2601c58b --- /dev/null +++ b/docs/hugo/content/installation/quickstart.md @@ -0,0 +1,72 @@ +--- +title: Quick Start +weight: 1 +--- + +Prepare: + +- Get some SATA or NVMe SSDs with capacitors (server-grade drives). You can use desktop SSDs + with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors + [here]({{< ref "config/layout-cluster#immediate_commit" >}}). +- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal. +- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`. +- [Install Vitastor packages]({{< ref "installation/packages" >}}). + +## Configure monitors + +On the monitor hosts: +- Edit variables at the top of `/usr/lib/vitastor/mon/make-units.sh` to desired values. +- Create systemd units for the monitor and etcd: `/usr/lib/vitastor/mon/make-units.sh` +- Start etcd and monitors: `systemctl start etcd vitastor-mon` + +## Configure OSDs + +- Put etcd_address and osd_network into `/etc/vitastor/vitastor.conf`. Example: + ``` + { + "etcd_address": ["10.200.1.10:2379","10.200.1.11:2379","10.200.1.12:2379"], + "osd_network": "10.200.1.0/24" + } + ``` +- Initialize OSDs: + - Simplest, SSD-only: `/usr/lib/vitastor/mon/make-osd.sh /dev/disk/by-partuuid/XXX [/dev/disk/by-partuuid/YYY ...]` + - Hybrid, HDD+SSD: `/usr/lib/vitastor/mon/make-osd-hybrid.js /dev/sda /dev/sdb ...` — pass all your + devices (HDD and SSD) to this script — it will partition disks and initialize journals on its own. + This script skips HDDs which are already partitioned so if you want to use non-empty disks for + Vitastor you should first wipe them with `wipefs -a`. SSDs with GPT partition table are not skipped, + but some free unpartitioned space must be available because the script creates new partitions for journals. +- You can change OSD configuration in units or in `vitastor.conf`. + Check [Configuration Reference]({{< ref "config" >}}) for parameter descriptions. +- `systemctl start vitastor.target` everywhere. +- If all your drives have capacitors, create global configuration in etcd: \ + `etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'` + +## Create a pool + +Create pool configuration in etcd: + +``` +etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool", + "scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":256,"failure_domain":"host"}}' +``` + +For jerasure pools the configuration should look like the following: + +``` +etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool", + "scheme":"jerasure","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}` +``` + +After you do this, one of the monitors will configure PGs and OSDs will start them. + +You can check PG states with `etcdctl --endpoints=... get --prefix /vitastor/pg/state`. All PGs should become 'active'. + +## Create an image + +Use vitastor-cli ([read CLI documentation here]({{< ref "usage/cli" >}})): + +``` +vitastor-cli create -s 10G testimg +``` + +After that, you can run benchmarks or start QEMU manually with this image. diff --git a/docs/hugo/content/installation/source.md b/docs/hugo/content/installation/source.md new file mode 100644 index 00000000..02ff1545 --- /dev/null +++ b/docs/hugo/content/installation/source.md @@ -0,0 +1,54 @@ +--- +title: Building from Source +weight: 3 +--- + +## Requirements + +- gcc and g++ 8 or newer, clang 10 or newer, or other compiler with C++11 plus + designated initializers support from C++20 +- CMake +- liburing, jerasure headers + +## Basic instructions + +Download source, for example using git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/` + +Get `fio` source and symlink it into `/fio`. If you don't want to build fio engine, +you can disable it by passing `-DWITH_FIO=no` to cmake. + +Build and install Vitastor: + +``` +cd vitastor +mkdir build +cd build +cmake .. && make -j8 install +``` + +## QEMU Driver + +It's recommended to build the QEMU driver (qemu_driver.c) in-tree, as a part of +QEMU build process. To do that: +- Install vitastor client library headers (from source or from vitastor-client-dev package) +- Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source +- Copy `src/qemu_driver.c` to QEMU source directory as `block/block-vitastor.c` +- Build QEMU as usual + +But it is also possible to build it out-of-tree. To do that: +- Get QEMU source, begin to build it, stop the build and copy headers: + - `/include` → `/qemu/include` + - Debian: + * Use qemu packages from the main repository + * `/b/qemu/config-host.h` → `/qemu/b/qemu/config-host.h` + * `/b/qemu/qapi` → `/qemu/b/qemu/qapi` + - CentOS 8: + * Use qemu packages from the Advanced-Virtualization repository. To enable it, run + `yum install centos-release-advanced-virtualization.noarch` and then `yum install qemu` + * `/config-host.h` → `/qemu/b/qemu/config-host.h` + * For QEMU 3.0+: `/qapi` → `/qemu/b/qemu/qapi` + * For QEMU 2.0+: `/qapi-types.h` → `/qemu/b/qemu/qapi-types.h` + - `config-host.h` and `qapi` are required because they contain generated headers +- Configure Vitastor with `WITH_QEMU=yes` and, if you're on RHEL, also with `QEMU_PLUGINDIR=qemu-kvm`: + `cmake .. -DWITH_QEMU=yes`. +- After that, Vitastor will build `block-vitastor.so` during its build process. diff --git a/docs/hugo/content/introduction/_index.md b/docs/hugo/content/introduction/_index.md new file mode 100644 index 00000000..c1bf0820 --- /dev/null +++ b/docs/hugo/content/introduction/_index.md @@ -0,0 +1,4 @@ +--- +title: Introduction +weight: -1 +--- diff --git a/docs/hugo/content/introduction/architecture.md b/docs/hugo/content/introduction/architecture.md new file mode 100644 index 00000000..bfc3a544 --- /dev/null +++ b/docs/hugo/content/introduction/architecture.md @@ -0,0 +1,73 @@ +--- +title: Architecture +weight: 3 +--- + +For people familiar with Ceph, Vitastor is quite similar: + +- Vitastor also has Pools, PGs, OSDs, Monitors, Failure Domains, Placement Tree: + - OSD (Object Storage Daemon) is a process that stores data and serves read/write requests. + - PG (Placement Group) is a container for data that (normally) shares the same replicas. + - Pool is a container for data that has the same redundancy scheme and placement rules. + - Monitor is a separate daemon that watches cluster state and controls data distribution. + - Failure Domain is a group of OSDs that you allow to fail. It's "host" by default. + - Placement Tree groups OSDs in a hierarchy to later split them into Failure Domains. +- Vitastor also distributes every image data across the whole cluster. +- Vitastor is also transactional (every write to the cluster is atomic). +- OSDs also have journal and metadata and they can also be put on separate drives. +- Just like in Ceph, client library attempts to recover from any cluster failure so + you can basically reboot the whole cluster and only pause, but not crash, your clients + (please report a bug if the client crashes in that case). + +However, there are also differences: + +- Vitastor's main focus is on SSDs. Hybrid SSD+HDD setups are also possible. +- Vitastor OSD is (and will always be) single-threaded. If you want to dedicate more than 1 core + per drive you should run multiple OSDs each on a different partition of the drive. + Vitastor isn't CPU-hungry though (as opposed to Ceph), so 1 core is sufficient in a lot of cases. +- Metadata and journal are always kept in memory. Metadata size depends linearly on drive capacity + and data store block size which is 128 KB by default. With 128 KB blocks metadata should occupy + around 512 MB per 1 TB (which is still less than Ceph wants). Journal doesn't have to be big, + the example test below was conducted with only 16 MB journal. A big journal is probably even + harmful as dirty write metadata also take some memory. +- Vitastor storage layer doesn't have internal copy-on-write or redirect-write. I know that maybe + it's possible to create a good copy-on-write storage, but it's much harder and makes performance + less deterministic, so CoW isn't used in Vitastor. +- The basic layer of Vitastor is block storage with fixed-size blocks, not object storage with + rich semantics like in Ceph (RADOS). +- There's a "lazy fsync" mode which allows to batch writes before flushing them to the disk. + This allows to use Vitastor with desktop SSDs, but still lowers performance due to additional + network roundtrips, so use server SSDs with capacitor-based power loss protection + ("Advanced Power Loss Protection") for best performance. +- PGs are ephemeral. This means that they aren't stored on data disks and only exist in memory + while OSDs are running. +- Recovery process is per-object (per-block), not per-PG. Also there are no PGLOGs. +- Monitors don't store data. Cluster configuration and state is stored in etcd in simple human-readable + JSON structures. Monitors only watch cluster state and handle data movement. + Thus Vitastor's Monitor isn't a critical component of the system and is more similar to Ceph's Manager. + Vitastor's Monitor is implemented in node.js. +- PG distribution isn't based on consistent hashes. All PG mappings are stored in etcd. + Rebalancing PGs between OSDs is done by mathematical optimization - data distribution problem + is reduced to a linear programming problem and solved by lp_solve. This allows for almost + perfect (96-99% uniformity compared to Ceph's 80-90%) data distribution in most cases, ability + to map PGs by hand without breaking rebalancing logic, reduced OSD peer-to-peer communication + (on average, OSDs have fewer peers) and less data movement. It also probably has a drawback - + this method may fail in very large clusters, but up to several hundreds of OSDs it's perfectly fine. + It's also easy to add consistent hashes in the future if something proves their necessity. +- There's no separate CRUSH layer. You select pool redundancy scheme, placement root, failure domain + and so on directly in pool configuration. +- Images are global i.e. you can't create multiple images with the same name in different pools. + +## Implementation Principles + +- I like architecturally simple solutions. Vitastor is and will always be designed + exactly like that. +- I also like reinventing the wheel to some extent, like writing my own HTTP client + for etcd interaction instead of using prebuilt libraries, because in this case + I'm confident about what my code does and what it doesn't do. +- I don't care about C++ "best practices" like RAII or proper inheritance or usage of + smart pointers or whatever and I don't intend to change my mind, so if you're here + looking for ideal reference C++ code, this probably isn't the right place. +- I like node.js better than any other dynamically-typed language interpreter + because it's faster than any other interpreter in the world, has neutral C-like + syntax and built-in event loop. That's why Monitor is implemented in node.js. diff --git a/docs/hugo/content/introduction/author.md b/docs/hugo/content/introduction/author.md new file mode 100644 index 00000000..b4970f88 --- /dev/null +++ b/docs/hugo/content/introduction/author.md @@ -0,0 +1,34 @@ +--- +title: Author and License +weight: 3 +--- + +Copyright (c) Vitaliy Filippov (vitalif [at] yourcmc.ru), 2019+ + +Join Vitastor Telegram Chat: https://t.me/vitastor + +All server-side code (OSD, Monitor and so on) is licensed under the terms of +Vitastor Network Public License 1.1 (VNPL 1.1), a copyleft license based on +GNU GPLv3.0 with the additional "Network Interaction" clause which requires +opensourcing all programs directly or indirectly interacting with Vitastor +through a computer network and expressly designed to be used in conjunction +with it ("Proxy Programs"). Proxy Programs may be made public not only under +the terms of the same license, but also under the terms of any GPL-Compatible +Free Software License, as listed by the Free Software Foundation. +This is a stricter copyleft license than the Affero GPL. + +Please note that VNPL doesn't require you to open the code of proprietary +software running inside a VM if it's not specially designed to be used with +Vitastor. + +Basically, you can't use the software in a proprietary environment to provide +its functionality to users without opensourcing all intermediary components +standing between the user and Vitastor or purchasing a commercial license +from the author 😀. + +Client libraries (cluster_client and so on) are dual-licensed under the same +VNPL 1.1 and also GNU GPL 2.0 or later to allow for compatibility with GPLed +software like QEMU and fio. + +You can find the full text of VNPL-1.1 in the file [VNPL-1.1.txt](VNPL-1.1.txt). +GPL 2.0 is also included in this repository as [GPL-2.0.txt](GPL-2.0.txt). diff --git a/docs/hugo/content/introduction/features.md b/docs/hugo/content/introduction/features.md new file mode 100644 index 00000000..ac72e385 --- /dev/null +++ b/docs/hugo/content/introduction/features.md @@ -0,0 +1,60 @@ +--- +title: Features +weight: 1 +--- + +Vitastor is currently a pre-release and it still misses some important features. +However, the following is implemented: + +- Basic part: highly-available block storage with symmetric clustering and no SPOF +- Performance ;-D +- Multiple redundancy schemes: Replication, XOR n+1, Reed-Solomon erasure codes + based on jerasure library with any number of data and parity drives in a group +- Configuration via simple JSON data structures in etcd (parameters, pools and images) +- Automatic data distribution over OSDs, with support for: + - Mathematical optimization for better uniformity and less data movement + - Multiple pools + - Placement tree, OSD selection by tags (device classes) and placement root + - Configurable failure domains +- Recovery of degraded blocks +- Rebalancing (data movement between OSDs) +- Lazy fsync support +- Per-OSD and per-image I/O and space usage statistics in etcd +- Snapshots and copy-on-write image clones +- Write throttling to smooth random write workloads in SSD+HDD configurations +- RDMA/RoCEv2 support via libibverbs + +CLI (vitastor-cli): +- Pool listing and space stats (df) +- Image listing, space and I/O stats (ls) +- Image and snapshot creation (create, modify) +- Image removal and snapshot merge (rm, flatten, merge, rm-data) + +Plugins and packaging: +- Debian and CentOS packages +- Generic user-space client library +- Native QEMU driver +- Loadable fio engine for benchmarks +- NBD proxy for kernel mounts +- CSI plugin for Kubernetes +- OpenStack support: Cinder driver, Nova and libvirt patches +- Proxmox storage plugin and packages + +## Roadmap + +The following features are planned for the future: + +- Better OSD creation and auto-start tools +- Other administrative tools +- Web GUI +- OpenNebula plugin +- iSCSI proxy +- Simplified NFS proxy +- Multi-threaded client +- Faster failover +- Scrubbing without checksums (verification of replicas) +- Checksums +- Tiered storage (SSD caching) +- NVDIMM support +- Compression (possibly) +- Read caching using system page cache (possibly) diff --git a/docs/hugo/content/performance/comparison1.md b/docs/hugo/content/performance/comparison1.md new file mode 100644 index 00000000..a8eb8772 --- /dev/null +++ b/docs/hugo/content/performance/comparison1.md @@ -0,0 +1,93 @@ +--- +title: Example Comparison with Ceph +weight: 4 +--- + +Hardware configuration: 4 nodes, each with: +- 6x SATA SSD Intel D3-S4510 3.84 TB +- 2x Xeon Gold 6242 (16 cores @ 2.8 GHz) +- 384 GB RAM +- 1x 25 GbE network interface (Mellanox ConnectX-4 LX), connected to a Juniper QFX5200 switch + +CPU powersaving was disabled. Both Vitastor and Ceph were configured with 2 OSDs per 1 SSD. + +All of the results below apply to 4 KB blocks and random access (unless indicated otherwise). + +T8Q64 tests were conducted over 8 400GB RBD images from all hosts (every host was running 2 instances of fio). +This is because Ceph has performance penalties related to running multiple clients over a single RBD image. + +cephx_sign_messages was set to false during tests, RocksDB and Bluestore settings were left at defaults. + +T8Q64 read test was conducted over 1 larger inode (3.2T) from all hosts (every host was running 2 instances of fio). +Vitastor has no performance penalties related to running multiple clients over a single inode. +If conducted from one node with all primary OSDs moved to other nodes the result was slightly lower (689000 iops), +this is because all operations resulted in network roundtrips between the client and the primary OSD. +When fio was colocated with OSDs (like in Ceph benchmarks above), 1/4 of the read workload actually +used the loopback network. + +Vitastor was configured with: `--disable_data_fsync true --immediate_commit all --flusher_count 8 + --disk_alignment 4096 --journal_block_size 4096 --meta_block_size 4096 + --journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 + --journal_size 16777216`. + +## Raw drive performance + +- T1Q1 write ~27000 iops (~0.037ms latency) +- T1Q1 read ~9800 iops (~0.101ms latency) +- T1Q32 write ~60000 iops +- T1Q32 read ~81700 iops + +## 2 replicas + +### Ceph 15.2.4 (Bluestore) + +- T1Q1 write ~1000 iops (~1ms latency) +- T1Q1 read ~1750 iops (~0.57ms latency) +- T8Q64 write ~100000 iops, total CPU usage by OSDs about 40 virtual cores on each node +- T8Q64 read ~480000 iops, total CPU usage by OSDs about 40 virtual cores on each node + +In fact, not that bad for Ceph. These servers are an example of well-balanced Ceph nodes. +However, CPU usage and I/O latency were through the roof, as usual. + +### Vitastor 0.4.0 (native) + +- T1Q1 write: 7087 iops (0.14ms latency) +- T1Q1 read: 6838 iops (0.145ms latency) +- T2Q64 write: 162000 iops, total CPU usage by OSDs about 3 virtual cores on each node +- T8Q64 read: 895000 iops, total CPU usage by OSDs about 4 virtual cores on each node +- Linear write (4M T1Q32): 2800 MB/s +- Linear read (4M T1Q32): 1500 MB/s + +### Vitastor 0.4.0 (NBD) + +NBD is currently required to mount Vitastor via kernel, but it imposes additional overhead +due to additional copying between the kernel and userspace. This mostly hurts linear +bandwidth, not iops. + +Vitastor with single-threaded NBD on the same hardware: +- T1Q1 write: 6000 iops (0.166ms latency) +- T1Q1 read: 5518 iops (0.18ms latency) +- T1Q128 write: 94400 iops +- T1Q128 read: 103000 iops +- Linear write (4M T1Q128): 1266 MB/s (compared to 2800 MB/s via fio) +- Linear read (4M T1Q128): 975 MB/s (compared to 1500 MB/s via fio) + +## EC/XOR 2+1 + +### Ceph 15.2.4 + +- T1Q1 write: 730 iops (~1.37ms latency) +- T1Q1 read: 1500 iops with cold cache (~0.66ms latency), 2300 iops after 2 minute metadata cache warmup (~0.435ms latency) +- T4Q128 write (4 RBD images): 45300 iops, total CPU usage by OSDs about 30 virtual cores on each node +- T8Q64 read (4 RBD images): 278600 iops, total CPU usage by OSDs about 40 virtual cores on each node +- Linear write (4M T1Q32): 1950 MB/s before preallocation, 2500 MB/s after preallocation +- Linear read (4M T1Q32): 2400 MB/s + +### Vitastor 0.4.0 + +- T1Q1 write: 2808 iops (~0.355ms latency) +- T1Q1 read: 6190 iops (~0.16ms latency) +- T2Q64 write: 85500 iops, total CPU usage by OSDs about 3.4 virtual cores on each node +- T8Q64 read: 812000 iops, total CPU usage by OSDs about 4.7 virtual cores on each node +- Linear write (4M T1Q32): 3200 MB/s +- Linear read (4M T1Q32): 1800 MB/s diff --git a/docs/hugo/content/performance/theoretical.md b/docs/hugo/content/performance/theoretical.md new file mode 100644 index 00000000..02637946 --- /dev/null +++ b/docs/hugo/content/performance/theoretical.md @@ -0,0 +1,46 @@ +--- +title: Vitastor's Theoretical Maximum Performance +weight: 3 +--- + +Replicated setups: +- Single-threaded (T1Q1) read latency: 1 network roundtrip + 1 disk read. +- Single-threaded write+fsync latency: + - With immediate commit: 2 network roundtrips + 1 disk write. + - With lazy commit: 4 network roundtrips + 1 disk write + 1 disk flush. +- Saturated parallel read iops: min(network bandwidth, sum(disk read iops)). +- Saturated parallel write iops: min(network bandwidth, sum(disk write iops / number of replicas / write amplification)). + +EC/XOR setups: +- Single-threaded (T1Q1) read latency: 1.5 network roundtrips + 1 disk read. +- Single-threaded write+fsync latency: + - With immediate commit: 3.5 network roundtrips + 1 disk read + 2 disk writes. + - With lazy commit: 5.5 network roundtrips + 1 disk read + 2 disk writes + 2 disk fsyncs. + - 0.5 in actually (k-1)/k which means that an additional roundtrip doesn't happen when + the read sub-operation can be served locally. +- Saturated parallel read iops: min(network bandwidth, sum(disk read iops)). +- Saturated parallel write iops: min(network bandwidth, sum(disk write iops * number of data drives / (number of data + parity drives) / write amplification)). + In fact, you should put disk write iops under the condition of ~10% reads / ~90% writes in this formula. + +Write amplification for 4 KB blocks is usually 3-5 in Vitastor: +1. Journal block write +2. Journal data write +3. Metadata block write +4. Another journal block write for EC/XOR setups +5. Data block write + +If you manage to get an SSD which handles 512 byte blocks well (Optane?) you may +lower 1, 3 and 4 to 512 bytes (1/8 of data size) and get WA as low as 2.375. + +Lazy fsync also reduces WA for parallel workloads because journal blocks are only +written when they fill up or fsync is requested. + +## In Practice + +In practice, using tests from [Understanding Performance]({{< ref "performance/understanding" >}}) +and good server-grade SSD/NVMe drives, you should head for: +- At least 5000 T1Q1 replicated read and write iops (maximum 0.2ms latency) +- At least ~80k parallel read iops or ~30k write iops per 1 core (1 OSD) +- Disk-speed or wire-speed linear reads and writes, whichever is the bottleneck in your case + +If your results are lower, that may mean you have bad drives, bad network or some kind of misconfiguration. diff --git a/docs/hugo/content/performance/tuning.md b/docs/hugo/content/performance/tuning.md new file mode 100644 index 00000000..cbf19c77 --- /dev/null +++ b/docs/hugo/content/performance/tuning.md @@ -0,0 +1,6 @@ +--- +title: Tuning +weight: 2 +--- + +- Disable CPU powersaving diff --git a/docs/hugo/content/performance/understanding.md b/docs/hugo/content/performance/understanding.md new file mode 100644 index 00000000..92e2e282 --- /dev/null +++ b/docs/hugo/content/performance/understanding.md @@ -0,0 +1,52 @@ +--- +title: Understanding Storage Performance +weight: 1 +--- + +The most important thing for fast storage is latency, not parallel iops. + +The best possible latency is achieved with one thread and queue depth of 1 which basically means +"client load as low as possible". In this case IOPS = 1/latency, and this number doesn't +scale with number of servers, drives, server processes or threads and so on. +Single-threaded IOPS and latency numbers only depend on *how fast a single daemon is*. + +Why is it important? It's important because some of the applications *can't* use +queue depth greater than 1 because their task isn't parallelizable. A notable example +is any ACID DBMS because all of them write their WALs sequentially with fsync()s. + +fsync, by the way, is another important thing often missing in benchmarks. The point is +that drives have cache buffers and don't guarantee that your data is actually persisted +until you call fsync() which is translated to a FLUSH CACHE command by the OS. + +Desktop SSDs are very fast without fsync - NVMes, for example, can process ~80000 write +operations per second with queue depth of 1 without fsync - but they're really slow with +fsync because they have to actually write data to flash chips when you call fsync. Typical +number is around 1000-2000 iops with fsync. + +Server SSDs often have supercapacitors that act as a built-in UPS and allow the drive +to flush its DRAM cache to the persistent flash storage when a power loss occurs. +This makes them perform equally well with and without fsync. This feature is called +"Advanced Power Loss Protection" by Intel; other vendors either call it similarly +or directly as "Full Capacitor-Based Power Loss Protection". + +All software-defined storages that I currently know are slow in terms of latency. +Notable examples are Ceph and internal SDSes used by cloud providers like Amazon, Google, +Yandex and so on. They're all slow and can only reach ~0.3ms read and ~0.6ms 4 KB write latency +with best-in-slot hardware. + +And that's in the SSD era when you can buy an SSD that has ~0.04ms latency for 100 $. + +I use the following 6 commands with small variations to benchmark any storage: + +- Linear write: + `fio -ioengine=libaio -direct=1 -invalidate=1 -name=test -bs=4M -iodepth=32 -rw=write -runtime=60 -filename=/dev/sdX` +- Linear read: + `fio -ioengine=libaio -direct=1 -invalidate=1 -name=test -bs=4M -iodepth=32 -rw=read -runtime=60 -filename=/dev/sdX` +- Random write latency (T1Q1, this hurts storages the most): + `fio -ioengine=libaio -direct=1 -invalidate=1 -name=test -bs=4k -iodepth=1 -fsync=1 -rw=randwrite -runtime=60 -filename=/dev/sdX` +- Random read latency (T1Q1): + `fio -ioengine=libaio -direct=1 -invalidate=1 -name=test -bs=4k -iodepth=1 -rw=randread -runtime=60 -filename=/dev/sdX` +- Parallel write iops (use numjobs if a single CPU core is insufficient to saturate the load): + `fio -ioengine=libaio -direct=1 -invalidate=1 -name=test -bs=4k -iodepth=128 [-numjobs=4 -group_reporting] -rw=randwrite -runtime=60 -filename=/dev/sdX` +- Parallel read iops (use numjobs if a single CPU core is insufficient to saturate the load): + `fio -ioengine=libaio -direct=1 -invalidate=1 -name=test -bs=4k -iodepth=128 [-numjobs=4 -group_reporting] -rw=randread -runtime=60 -filename=/dev/sdX` diff --git a/docs/hugo/content/usage/cli.md b/docs/hugo/content/usage/cli.md new file mode 100644 index 00000000..bb6819ae --- /dev/null +++ b/docs/hugo/content/usage/cli.md @@ -0,0 +1,183 @@ +--- +title: Vitastor CLI +weight: 1 +--- + +vitastor-cli is a command-line tool for administrative tasks like image management. + +It supports the following commands: + +{{< toc >}} + +Global options: + +``` +--etcd_address ADDR Etcd connection address +--iodepth N Send N operations in parallel to each OSD when possible (default 32) +--parallel_osds M Work with M osds in parallel when possible (default 4) +--progress 1|0 Report progress (default 1) +--cas 1|0 Use online CAS writes when possible (default auto) +--no-color Disable colored output +--json JSON output +``` + +## status + +`vitastor-cli status` + +Show cluster status. + +Example output: + +``` + cluster: + etcd: 1 / 1 up, 1.8 M database size + mon: 1 up, master stump + osd: 8 / 12 up + + data: + raw: 498.5 G used, 301.2 G / 799.7 G available, 399.8 G down + state: 156.6 G clean, 97.6 G misplaced + pools: 2 / 3 active + pgs: 30 active + 34 active+has_misplaced + 32 offline + + io: + client: 0 B/s rd, 0 op/s rd, 0 B/s wr, 0 op/s wr + rebalance: 989.8 M/s, 7.9 K op/s +``` + +## df + +`vitastor-cli df` + +Show pool space statistics. + +Example output: + +``` +NAME SCHEME PGS TOTAL USED AVAILABLE USED% EFFICIENCY +testpool 2/1 32 100 G 34.2 G 60.7 G 39.23% 100% +size1 1/1 32 199.9 G 10 G 121.5 G 39.23% 100% +kaveri 2/1 32 0 B 10 G 0 B 100% 0% +``` + +In the example above, "kaveri" pool has "zero" efficiency because all its OSD are down. + +## ls + +`vitastor-cli ls [-l] [-p POOL] [--sort FIELD] [-r] [-n N] [ ...]` + +List images (only matching `` pattern(s) if passed). + +Options: + +``` +-p|--pool POOL Filter images by pool ID or name +-l|--long Also report allocated size and I/O statistics +--del Also include delete operation statistics +--sort FIELD Sort by specified field (name, size, used_size, _) +-r|--reverse Sort in descending order +-n|--count N Only list first N items +``` + +Example output: + +``` +NAME POOL SIZE USED READ IOPS QUEUE LAT WRITE IOPS QUEUE LAT FLAGS PARENT +debian9 testpool 20 G 12.3 G 0 B/s 0 0 0 us 0 B/s 0 0 0 us RO +pve/vm-100-disk-0 testpool 20 G 0 B 0 B/s 0 0 0 us 0 B/s 0 0 0 us - debian9 +pve/base-101-disk-0 testpool 20 G 0 B 0 B/s 0 0 0 us 0 B/s 0 0 0 us RO debian9 +pve/vm-102-disk-0 testpool 32 G 36.4 M 0 B/s 0 0 0 us 0 B/s 0 0 0 us - pve/base-101-disk-0 +debian9-test testpool 20 G 36.6 M 0 B/s 0 0 0 us 0 B/s 0 0 0 us - debian9 +bench testpool 10 G 10 G 0 B/s 0 0 0 us 0 B/s 0 0 0 us - +bench-kaveri kaveri 10 G 10 G 0 B/s 0 0 0 us 0 B/s 0 0 0 us - +``` + +## create + +`vitastor-cli create -s|--size [-p|--pool ] [--parent [@]] ` + +Create an image. You may use K/M/G/T suffixes for ``. If `--parent` is specified, +a copy-on-write image clone is created. Parent must be a snapshot (readonly image). +Pool must be specified if there is more than one pool. + +``` +vitastor-cli create --snapshot [-p|--pool ] +vitastor-cli snap-create [-p|--pool ] @ +``` + +Create a snapshot of image `` (either form can be used). May be used live if only a single writer is active. + +## modify + +`vitastor-cli modify [--rename ] [--resize ] [--readonly | --readwrite] [-f|--force]` + +Rename, resize image or change its readonly status. Images with children can't be made read-write. +If the new size is smaller than the old size, extra data will be purged. +You should resize file system in the image, if present, before shrinking it. + +``` +-f|--force Proceed with shrinking or setting readwrite flag even if the image has children. +``` + +## rm + +`vitastor-cli rm [] [--writers-stopped]` + +Remove `` or all layers between `` and `` (`` must be a child of ``), +rebasing all their children accordingly. --writers-stopped allows merging to be a bit +more effective in case of a single 'slim' read-write child and 'fat' removed parent: +the child is merged into parent and parent is renamed to child in that case. +In other cases parent layers are always merged into children. + +## flatten + +`vitastor-cli flatten ` + +Flatten a layer, i.e. merge data and detach it from parents. + +## rm-data + +`vitastor-cli rm-data --pool --inode [--wait-list] [--min-offset ]` + +Remove inode data without changing metadata. + +``` +--wait-list Retrieve full objects listings before starting to remove objects. + Requires more memory, but allows to show correct removal progress. +--min-offset Purge only data starting with specified offset. +``` + +## merge-data + +`vitastor-cli merge-data [--target ]` + +Merge layer data without changing metadata. Merge ``..`` to ``. +`` must be a child of `` and `` may be one of the layers between +`` and ``, including `` and ``. + +## alloc-osd + +`vitastor-cli alloc-osd` + +Allocate a new OSD number and reserve it by creating empty `/osd/stats/` key. + +## simple-offsets + +`vitastor-cli simple-offsets ` + +Calculate offsets for simple&stupid (no superblock) OSD deployment. + +Options: + +``` +--object_size 128k Set blockstore block size +--bitmap_granularity 4k Set bitmap granularity +--journal_size 16M Set journal size +--device_block_size 4k Set device block size +--journal_offset 0 Set journal offset +--device_size 0 Set device size +--format text Result format: json, options, env, or text +``` diff --git a/docs/hugo/content/usage/nbd.md b/docs/hugo/content/usage/nbd.md new file mode 100644 index 00000000..5583033a --- /dev/null +++ b/docs/hugo/content/usage/nbd.md @@ -0,0 +1,20 @@ +--- +title: NBD +weight: 6 +--- + +To create a local block device for a Vitastor image, use NBD. For example: + +``` +vitastor-nbd map --etcd_address 10.115.0.10:2379/v3 --image testimg +``` + +It will output the device name, like /dev/nbd0 which you can then format and mount as a normal block device. + +You can also use `--pool --inode --size ` instead of `--image ` if you want. + +To unmap the device run: + +``` +vitastor-nbd unmap /dev/nbd0 +``` diff --git a/docs/hugo/content/usage/qemu.md b/docs/hugo/content/usage/qemu.md new file mode 100644 index 00000000..1ab8b1fc --- /dev/null +++ b/docs/hugo/content/usage/qemu.md @@ -0,0 +1,39 @@ +--- +title: QEMU and qemu-img +weight: 2 +--- + +You need patched QEMU version to use Vitastor driver. + +To start a VM using plain QEMU command-line with Vitastor disk, use the following commands: + +Old syntax (-drive): + +``` +qemu-system-x86_64 -enable-kvm -m 1024 \ + -drive 'file=vitastor:etcd_host=192.168.7.2\:2379/v3:image=debian9', + format=raw,if=none,id=drive-virtio-disk0,cache=none \ + -device 'virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0, + id=virtio-disk0,bootindex=1,write-cache=off' \ + -vnc 0.0.0.0:0 +``` + +New syntax (-blockdev): + +``` +qemu-system-x86_64 -enable-kvm -m 1024 \ + -blockdev '{"node-name":"drive-virtio-disk0","driver":"vitastor","image":"debian9", + "cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \ + -device 'virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0, + id=virtio-disk0,bootindex=1,write-cache=off' \ + -vnc 0.0.0.0:0 +``` + +For qemu-img, you should use `vitastor:etcd_host=:image=` as filename. For example: + +``` +qemu-img convert -f qcow2 debian10.qcow2 -p -O raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=debian10' +``` + +You can also specify `:pool=:inode=:size=` instead of `:image=` +if you don't want to use inode metadata. diff --git a/docs/hugo/i18n/ru.yaml b/docs/hugo/i18n/ru.yaml new file mode 100644 index 00000000..a4f4eb40 --- /dev/null +++ b/docs/hugo/i18n/ru.yaml @@ -0,0 +1,37 @@ +--- +nav_navigation: Навигация +nav_tags: Теги +nav_more: Подробнее +nav_top: К началу + +form_placeholder_search: Поиск + +error_page_title: Открыта несуществующая страница +error_message_title: Потерялись? +error_message_code: Ошибка 404 +error_message_text: > + Похоже, страница, которую вы открыли, не существует. Попробуйте найти + нужную информацию с главной страницы. + +button_toggle_dark: Переключить тёмный/светлый/авто режим +button_nav_open: Показать навигацию +button_nav_close: Скрыть навигацию +button_menu_open: Открыть меню +button_menu_close: Закрыть меню +button_homepage: На главную + +title_anchor_prefix: "Ссылка на:" + +posts_read_more: Читать подробнее +posts_read_time: + one: "Одна минута на чтение" + other: "{{ . }} минут(ы) на чтение" +posts_update_prefix: Обновлено + +footer_build_with: > + Сделано на Hugo с + +footer_legal_notice: Правовая информация +footer_privacy_policy: Приватность + +language_switch_no_tranlation_prefix: "Страница не переведена:" diff --git a/docs/hugo/layouts/partials/site-footer.html b/docs/hugo/layouts/partials/site-footer.html new file mode 100644 index 00000000..15021bfa --- /dev/null +++ b/docs/hugo/layouts/partials/site-footer.html @@ -0,0 +1,34 @@ + diff --git a/docs/hugo/static/brand.svg b/docs/hugo/static/brand.svg new file mode 100644 index 00000000..e51bde0e --- /dev/null +++ b/docs/hugo/static/brand.svg @@ -0,0 +1,215 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + diff --git a/docs/hugo/static/custom.css b/docs/hugo/static/custom.css new file mode 100644 index 00000000..0b9be97e --- /dev/null +++ b/docs/hugo/static/custom.css @@ -0,0 +1,138 @@ +/* Global customization */ + +:root { + --code-max-height: 60rem; +} + +/* Light mode theming */ +:root, +:root[color-mode="light"] { + --header-background: #404050; + --header-font-color: #ffffff; + + --body-background: #ffffff; + --body-font-color: #343a40; + + --button-background: #62cb97; + --button-border-color: #4ec58a; + + --link-color: #c54e8a; + --link-color-visited: #c54e8a; + + --code-background: #f5f6f8; + --code-accent-color: #e3e7eb; + --code-accent-color-lite: #eff1f3; + + --accent-color: #e9ecef; + --accent-color-lite: #f8f9fa; + + --control-icons: #b2bac1; + + --footer-background: #606070; + --footer-font-color: #ffffff; + --footer-link-color: #ffcc5c; + --footer-link-color-visited: #ffcc5c; +} +@media (prefers-color-scheme: light) { + :root { + --header-background: #404050; + --header-font-color: #ffffff; + + --body-background: #ffffff; + --body-font-color: #343a40; + + --button-background: #62cb97; + --button-border-color: #4ec58a; + + --link-color: #c54e8a; + --link-color-visited: #c54e8a; + + --code-background: #f5f6f8; + --code-accent-color: #e3e7eb; + --code-accent-color-lite: #eff1f3; + + --accent-color: #e9ecef; + --accent-color-lite: #f8f9fa; + + --control-icons: #b2bac1; + + --footer-background: #606070; + --footer-font-color: #ffffff; + --footer-link-color: #ffcc5c; + --footer-link-color-visited: #ffcc5c; + } +} + +/* Dark mode theming */ +:root[color-mode="dark"] { + --header-background: #202830; + --header-font-color: #ffffff; + + --body-background: #343a44; + --body-font-color: #ced3d8; + + --button-background: #62cb97; + --button-border-color: #4ec58a; + + --link-color: #7ac29e; + --link-color-visited: #7ac29e; + + --code-background: #2f353a; + --code-accent-color: #262b2f; + --code-accent-color-lite: #2b3035; + + --accent-color: #2b3035; + --accent-color-lite: #2f353a; + + --control-icons: #b2bac1; + + --footer-background: #2f333e; + --footer-font-color: #cccccc; + --footer-link-color: #7ac29e; + --footer-link-color-visited: #7ac29e; +} +@media (prefers-color-scheme: dark) { + :root { + --header-background: #404070; + --header-font-color: #ffffff; + + --body-background: #343a40; + --body-font-color: #ced3d8; + + --button-background: #62cb97; + --button-border-color: #4ec58a; + + --link-color: #7ac29e; + --link-color-visited: #7ac29e; + + --code-background: #2f353a; + --code-accent-color: #262b2f; + --code-accent-color-lite: #2b3035; + + --accent-color: #2b3035; + --accent-color-lite: #2f353a; + + --control-icons: #b2bac1; + + --footer-background: #2f333e; + --footer-font-color: #cccccc; + --footer-link-color: #7ac29e; + --footer-link-color-visited: #7ac29e; + } +} + +.gdoc-brand__img { + width: 48px; + height: auto; + margin-top: -4px; + margin-bottom: -4px; +} + +.gdoc-menu-header > span { + display: flex; + flex-direction: row-reverse; +} + +span.gdoc-language { + margin-right: 20px; +} diff --git a/docs/hugo/static/favicon/favicon-16x16.png b/docs/hugo/static/favicon/favicon-16x16.png new file mode 100644 index 0000000000000000000000000000000000000000..2e88ee28c3992d4f71ab1c316cf3f237fa01c646 GIT binary patch literal 709 zcmV;$0y_PPP)@~Q8|`}XvGpL(8Gn1;DV3lSdx z8>K)11H|z(5gX(nkEf2p3{0Qhk8N#gED$gTbkvwA8kBCjrt8(r1tb!8@LWiGkaWTO zF|Gr)b!Mhqc4eP!Q3hw~V&}%b72Vm3=do>20#OX1CY&_2J^eTLjfP7CG}sy(9fh_S z$r?BY5PCx2i~a1_e4F{18d%b>F2a?{f!WnW>us?ODgqN2VqfLe7#|A)_=y0I>nB z8wr?38kfFwGsqf3S9EBr+;F}lumE1XJU805_OKmf)K)(uyp~o5##ixhd^OuL_aW3& zCYRFtXCfkL%+S^2L;diA+l(5{q2PiTv(9IEB0e~oc<{A__cQJkr7Zel*GMOit7 z;I6ZoQffPV8jDij;-2BXzpVe0IX6b~Ew@HY%k@{L?f9*x3Qy7WnSOH5*o)p(AF0UL rbCX}!|JUnNEWc3cTzPO~Ai@xrF$!d3)#H{jwYh{RS)NoZ55s5P|Rw5c0&_DGX%+}(5DnR$9KbM~BF zPWB_A1H-&9^FIIo{Gb2xyypmu4E;4X0dIBP6af@;u?PVCMs~9EY@2S;S7&&KRh$M^ zV+6z`VCiHDsHPcXms{}6Vi0DrP3fn^$XhrI&VjQ~VVui zeFEqH(oTb$FS)bxr?p@WSo^H(X7>CQ0I>2J)_x6ZQjVp=g{o}@LLfWTBeHKfq26hM}~gMbfweCo>gb{>BIg|we#j$m?;m}VkrLUh6#f4`MoH|*!4>gP#;ITaM5$>3QHtL*v5Pqu`L*+Pd!B&K5$ z6%AleB5X4U4qi13I@%396oRh^&BF1=Pp?V9tx_yHO0$(3V;G<5<7nAO&#EbUdZxJW zg8!n05()?fL?=WC(aPAn`!|m`fRgG!q0Q?Nz7kyrcp>SfLiodpYq@FjI|<0zw5qmm zZWK&p!x#R^@2=YlK7jWeKK*rmFuotWU?Qj}Aa%s`=Pbj^Z*QuLVW03%zv!FX>U*u&b_VBZlkK?T&7D7ljCpsBC^ifBgImV_v}fZ;htJzXT`>?Urj}G7 zrJ&ND@7BK21}~H26A|qoqJVN_b_>r>-$QUhv^n>ddQ{s#uM8Kuf9tnD*ybzd#X0ay zCKSS&)t_+3+SiCyP-D2PdXn3|_$Gu*DwpAG>A2;@2*(=hFhMZ^j8|MK+dh1_Z)8yd z0N?!HUq^f)ccm0CmyVE48M2daz2qRztbHV_(kAivsR#JgnH>pu4xF*05BA-@5UXn| zwnhd*g<2||4ke0qT6|ULyJ(ejx@f?zVbV{+>-oisFwx(-LSe6czfTbx-ZO7 z=rE+u713$TUN_P9f)6ykIWGGrm`L(bv9>=R?_oYuW zJ>5iSrm>BZN*R!-gSG98OTQul0Fyh`4~nSKoauvCfIW#P;60Yck{-ifbsgWU#hG!}-6AHT#Z@ ttb2C+rSplgygw#q8P(u_cs}m~{0}7QmfaLi4RZhh002ovPDHLkV1lo`);0hD literal 0 HcmV?d00001 diff --git a/docs/hugo/static/favicon/favicon.svg b/docs/hugo/static/favicon/favicon.svg new file mode 100644 index 00000000..17683e5f --- /dev/null +++ b/docs/hugo/static/favicon/favicon.svg @@ -0,0 +1,196 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + diff --git a/docs/params/head/common.en.md b/docs/params/head/common.en.md new file mode 100644 index 00000000..00e3c728 --- /dev/null +++ b/docs/params/head/common.en.md @@ -0,0 +1,6 @@ +--- +title: Common Parameters +weight: 1 +--- + +These are the most common parameters which apply to all components of Vitastor. diff --git a/docs/params/head/common.ru.md b/docs/params/head/common.ru.md new file mode 100644 index 00000000..c1e7bdaa --- /dev/null +++ b/docs/params/head/common.ru.md @@ -0,0 +1,6 @@ +--- +title: Общие параметры +weight: 1 +--- + +Это наиболее общие параметры, используемые всеми компонентами Vitastor. diff --git a/docs/params/head/layout-cluster.en.md b/docs/params/head/layout-cluster.en.md new file mode 100644 index 00000000..88c475b9 --- /dev/null +++ b/docs/params/head/layout-cluster.en.md @@ -0,0 +1,7 @@ +--- +title: Cluster-Wide Disk Layout Parameters +weight: 2 +--- + +These parameters apply to clients and OSDs, are fixed at the moment of OSD drive +initialization and can't be changed after it without losing data. diff --git a/docs/params/head/layout-cluster.ru.md b/docs/params/head/layout-cluster.ru.md new file mode 100644 index 00000000..869b9a37 --- /dev/null +++ b/docs/params/head/layout-cluster.ru.md @@ -0,0 +1,7 @@ +--- +title: Дисковые параметры уровня кластера +weight: 2 +--- + +Данные параметры используются клиентами и OSD, задаются в момент инициализации +диска OSD и не могут быть изменены после этого без потери данных. diff --git a/docs/params/head/layout-osd.en.md b/docs/params/head/layout-osd.en.md new file mode 100644 index 00000000..9fe1a5e2 --- /dev/null +++ b/docs/params/head/layout-osd.en.md @@ -0,0 +1,7 @@ +--- +title: OSD Disk Layout Parameters +weight: 3 +--- + +These parameters apply to OSDs, are fixed at the moment of OSD drive +initialization and can't be changed after it without losing data. diff --git a/docs/params/head/layout-osd.ru.md b/docs/params/head/layout-osd.ru.md new file mode 100644 index 00000000..4e3377d3 --- /dev/null +++ b/docs/params/head/layout-osd.ru.md @@ -0,0 +1,8 @@ +--- +title: Дисковые параметры OSD +weight: 3 +--- + +Данные параметры используются только OSD и, также как и общекластерные +дисковые параметры, задаются в момент инициализации дисков OSD и не могут быть +изменены после этого без потери данных. diff --git a/docs/params/head/monitor.en.md b/docs/params/head/monitor.en.md new file mode 100644 index 00000000..2dea1d70 --- /dev/null +++ b/docs/params/head/monitor.en.md @@ -0,0 +1,6 @@ +--- +title: Monitor Parameters +weight: 6 +--- + +These parameters only apply to Monitors. diff --git a/docs/params/head/monitor.ru.md b/docs/params/head/monitor.ru.md new file mode 100644 index 00000000..33a3b838 --- /dev/null +++ b/docs/params/head/monitor.ru.md @@ -0,0 +1,6 @@ +--- +title: Параметры мониторов +weight: 6 +--- + +Данные параметры используются только мониторами Vitastor. diff --git a/docs/params/head/network.en.md b/docs/params/head/network.en.md new file mode 100644 index 00000000..ba3bfd21 --- /dev/null +++ b/docs/params/head/network.en.md @@ -0,0 +1,7 @@ +--- +title: Network Protocol Parameters +weight: 4 +--- + +These parameters apply to clients and OSDs and affect network connection logic +between clients, OSDs and etcd. diff --git a/docs/params/head/network.ru.md b/docs/params/head/network.ru.md new file mode 100644 index 00000000..d77c74c1 --- /dev/null +++ b/docs/params/head/network.ru.md @@ -0,0 +1,7 @@ +--- +title: Параметры сетевого протокола +weight: 4 +--- + +Данные параметры используются клиентами и OSD и влияют на логику сетевого +взаимодействия между клиентами, OSD, а также etcd. diff --git a/docs/params/head/osd.en.md b/docs/params/head/osd.en.md new file mode 100644 index 00000000..a5340a82 --- /dev/null +++ b/docs/params/head/osd.en.md @@ -0,0 +1,7 @@ +--- +title: Runtime OSD Parameters +weight: 5 +--- + +These parameters only apply to OSDs, are not fixed at the moment of OSD drive +initialization and can be changed with an OSD restart. diff --git a/docs/params/head/osd.ru.md b/docs/params/head/osd.ru.md new file mode 100644 index 00000000..f805f72d --- /dev/null +++ b/docs/params/head/osd.ru.md @@ -0,0 +1,8 @@ +--- +title: Изменяемые параметры OSD +weight: 5 +--- + +Данные параметры используются только OSD, но, в отличие от дисковых параметров, +не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой +момент с перезапуском OSD. diff --git a/docs/params/osd.yml b/docs/params/osd.yml index 60a9c5bc..bfa656b8 100644 --- a/docs/params/osd.yml +++ b/docs/params/osd.yml @@ -248,6 +248,8 @@ row and slow down significantly (from 25000+ iops to ~3000 iops). When this option is set, Vitastor will always move to the next sector of the journal after writing it instead of possibly overwriting it the second time. + + Most (99%) other SSDs don't need this option. info_ru: | Включайте данную опцию для SSD вроде Intel D3-S4510 и D3-S4610, которые ОЧЕНЬ не любят, когда ПО перезаписывает один и тот же сектор несколько раз @@ -256,6 +258,8 @@ данная опция установлена, Vitastor всегда переходит к следующему сектору журнала после записи вместо потенциально повторной перезаписи того же самого сектора. + + Почти все другие SSD (99% моделей) не требуют данной опции. - name: throttle_small_writes type: bool default: false