vitastor/src/osd_ops.h

252 lines
6.4 KiB
C
Raw Permalink Normal View History

// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
2019-12-10 12:07:24 +03:00
#pragma once
#include "object_id.h"
#include "osd_id.h"
2019-12-10 12:07:24 +03:00
2019-12-13 20:16:08 +03:00
// Magic numbers
2019-12-14 20:51:41 +03:00
#define SECONDARY_OSD_OP_MAGIC 0x2bd7b10325434553l
#define SECONDARY_OSD_REPLY_MAGIC 0xbaa699b87b434553l
// Operation request / reply headers have fixed size after which comes data
#define OSD_PACKET_SIZE 0x80
2019-12-13 20:16:08 +03:00
// Opcodes
#define OSD_OP_MIN 1
#define OSD_OP_SEC_READ 1
#define OSD_OP_SEC_WRITE 2
#define OSD_OP_SEC_WRITE_STABLE 3
#define OSD_OP_SEC_SYNC 4
#define OSD_OP_SEC_STABILIZE 5
#define OSD_OP_SEC_ROLLBACK 6
#define OSD_OP_SEC_DELETE 7
#define OSD_OP_TEST_SYNC_STAB_ALL 8
#define OSD_OP_SEC_LIST 9
#define OSD_OP_SHOW_CONFIG 10
#define OSD_OP_READ 11
#define OSD_OP_WRITE 12
#define OSD_OP_SYNC 13
#define OSD_OP_DELETE 14
#define OSD_OP_PING 15
#define OSD_OP_SEC_READ_BMP 16
#define OSD_OP_MAX 16
2019-12-13 20:16:08 +03:00
// Alignment & limit for read/write operations
#ifndef MEM_ALIGNMENT
#define MEM_ALIGNMENT 512
#endif
#define OSD_RW_MAX 64*1024*1024
#define OSD_PROTOCOL_VERSION 1
2019-12-10 12:07:24 +03:00
2019-12-13 20:16:08 +03:00
// common request and reply headers
2019-12-10 12:07:24 +03:00
struct __attribute__((__packed__)) osd_op_header_t
{
// magic & protocol version
uint64_t magic;
// operation id
uint64_t id;
// operation type
uint64_t opcode;
};
struct __attribute__((__packed__)) osd_reply_header_t
{
// magic & protocol version
uint64_t magic;
// operation id
uint64_t id;
// operation type
uint64_t opcode;
2019-12-10 12:07:24 +03:00
// return value
int64_t retval;
};
// read or write to the secondary OSD
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_op_sec_rw_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_op_header_t header;
// object
object_id oid;
// read/write version (automatic or specific)
2020-05-05 00:16:01 +03:00
// FIXME deny values close to UINT64_MAX
2019-12-13 20:16:08 +03:00
uint64_t version;
// offset
uint32_t offset;
// length
uint32_t len;
// bitmap/attribute length - bitmap comes after header, but before data
uint32_t attr_len;
uint32_t pad0;
2019-12-10 12:07:24 +03:00
};
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_reply_sec_rw_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_reply_header_t header;
2020-02-12 12:30:50 +03:00
// for reads and writes: assigned or read version number
uint64_t version;
// for reads: bitmap/attribute length (just to double-check)
uint32_t attr_len;
uint32_t pad0;
2019-12-10 12:07:24 +03:00
};
// delete object on the secondary OSD
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_op_sec_del_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_op_header_t header;
// object
object_id oid;
// delete version (automatic or specific)
uint64_t version;
2019-12-10 12:07:24 +03:00
};
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_reply_sec_del_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_reply_header_t header;
uint64_t version;
2019-12-10 12:07:24 +03:00
};
// sync to the secondary OSD
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_op_sec_sync_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_op_header_t header;
2019-12-10 12:07:24 +03:00
};
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_reply_sec_sync_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_reply_header_t header;
2019-12-10 12:07:24 +03:00
};
2020-01-25 02:04:58 +03:00
// stabilize or rollback objects on the secondary OSD
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_op_sec_stab_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_op_header_t header;
// obj_ver_id array length in bytes
2020-02-25 20:10:17 +03:00
uint64_t len;
2019-12-10 12:07:24 +03:00
};
2021-02-13 23:14:28 +03:00
typedef osd_op_sec_stab_t osd_op_sec_rollback_t;
2019-12-10 12:07:24 +03:00
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_reply_sec_stab_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_reply_header_t header;
2019-12-10 12:07:24 +03:00
};
2021-02-13 23:14:28 +03:00
typedef osd_reply_sec_stab_t osd_reply_sec_rollback_t;
2019-12-10 12:07:24 +03:00
// bulk read bitmaps from a secondary OSD
struct __attribute__((__packed__)) osd_op_sec_read_bmp_t
{
osd_op_header_t header;
// obj_ver_id array length in bytes
uint64_t len;
};
struct __attribute__((__packed__)) osd_reply_sec_read_bmp_t
{
// retval is payload length in bytes. payload is {version,bitmap}[]
osd_reply_header_t header;
};
// show configuration
struct __attribute__((__packed__)) osd_op_show_config_t
{
osd_op_header_t header;
// JSON request length
uint64_t json_len;
};
struct __attribute__((__packed__)) osd_reply_show_config_t
{
osd_reply_header_t header;
};
2019-12-10 12:07:24 +03:00
// list objects on replica
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_op_sec_list_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_op_header_t header;
2019-12-19 22:04:18 +03:00
// placement group total number and total count
pg_num_t list_pg, pg_count;
// size of an area that maps to one PG continuously
uint64_t pg_stripe_size;
// inode range (used to select pools)
uint64_t min_inode, max_inode;
2019-12-10 12:07:24 +03:00
};
2021-02-13 23:14:28 +03:00
struct __attribute__((__packed__)) osd_reply_sec_list_t
2019-12-10 12:07:24 +03:00
{
2019-12-13 20:16:08 +03:00
osd_reply_header_t header;
2019-12-19 22:04:18 +03:00
// stable object version count. header.retval = total object version count
// FIXME: maybe change to the number of bytes in the reply...
2019-12-19 22:04:18 +03:00
uint64_t stable_count;
2019-12-10 12:07:24 +03:00
};
2019-12-11 14:18:19 +03:00
// read or write to the primary OSD (must be within individual stripe)
2019-12-21 19:04:36 +03:00
struct __attribute__((__packed__)) osd_op_rw_t
{
osd_op_header_t header;
// inode
uint64_t inode;
// offset
uint64_t offset;
// length
uint32_t len;
// flags (for future)
uint32_t flags;
// inode metadata revision
uint64_t meta_revision;
// object version for atomic "CAS" (compare-and-set) writes
// writes and deletes fail with -EINTR if object version differs from (version-1)
uint64_t version;
2019-12-21 19:04:36 +03:00
};
struct __attribute__((__packed__)) osd_reply_rw_t
{
osd_reply_header_t header;
// for reads: bitmap length
uint32_t bitmap_len;
uint32_t pad0;
// for reads: object version
uint64_t version;
2019-12-21 19:04:36 +03:00
};
2020-01-30 22:06:46 +03:00
// sync to the primary OSD
struct __attribute__((__packed__)) osd_op_sync_t
{
osd_op_header_t header;
};
struct __attribute__((__packed__)) osd_reply_sync_t
{
osd_reply_header_t header;
};
// FIXME it would be interesting to try to unify blockstore_op and osd_op formats
2019-12-11 14:18:19 +03:00
union osd_any_op_t
{
osd_op_header_t hdr;
2021-02-13 23:14:28 +03:00
osd_op_sec_rw_t sec_rw;
osd_op_sec_del_t sec_del;
osd_op_sec_sync_t sec_sync;
osd_op_sec_stab_t sec_stab;
osd_op_sec_read_bmp_t sec_read_bmp;
2021-02-13 23:14:28 +03:00
osd_op_sec_list_t sec_list;
osd_op_show_config_t show_conf;
2019-12-21 19:04:36 +03:00
osd_op_rw_t rw;
2020-01-30 22:06:46 +03:00
osd_op_sync_t sync;
uint8_t buf[OSD_PACKET_SIZE];
2019-12-11 14:18:19 +03:00
};
union osd_any_reply_t
{
2019-12-13 22:53:59 +03:00
osd_reply_header_t hdr;
2021-02-13 23:14:28 +03:00
osd_reply_sec_rw_t sec_rw;
osd_reply_sec_del_t sec_del;
osd_reply_sec_sync_t sec_sync;
osd_reply_sec_stab_t sec_stab;
osd_reply_sec_read_bmp_t sec_read_bmp;
2021-02-13 23:14:28 +03:00
osd_reply_sec_list_t sec_list;
osd_reply_show_config_t show_conf;
2019-12-21 19:04:36 +03:00
osd_reply_rw_t rw;
2020-01-30 22:06:46 +03:00
osd_reply_sync_t sync;
uint8_t buf[OSD_PACKET_SIZE];
2019-12-11 14:18:19 +03:00
};
extern const char* osd_op_names[];