Simplified distributed block storage with strong consistency, like in Ceph
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

222 lines
5.6 KiB

  1. // Copyright (c) Vitaliy Filippov, 2019+
  2. // License: VNPL-1.0 or GNU GPL-2.0+ (see README.md for details)
  3. #pragma once
  4. #include "object_id.h"
  5. #include "osd_id.h"
  6. // Magic numbers
  7. #define SECONDARY_OSD_OP_MAGIC 0x2bd7b10325434553l
  8. #define SECONDARY_OSD_REPLY_MAGIC 0xbaa699b87b434553l
  9. // Operation request / reply headers have fixed size after which comes data
  10. #define OSD_PACKET_SIZE 0x80
  11. // Opcodes
  12. #define OSD_OP_MIN 1
  13. #define OSD_OP_SEC_READ 1
  14. #define OSD_OP_SEC_WRITE 2
  15. #define OSD_OP_SEC_WRITE_STABLE 3
  16. #define OSD_OP_SEC_SYNC 4
  17. #define OSD_OP_SEC_STABILIZE 5
  18. #define OSD_OP_SEC_ROLLBACK 6
  19. #define OSD_OP_SEC_DELETE 7
  20. #define OSD_OP_TEST_SYNC_STAB_ALL 8
  21. #define OSD_OP_SEC_LIST 9
  22. #define OSD_OP_SHOW_CONFIG 10
  23. #define OSD_OP_READ 11
  24. #define OSD_OP_WRITE 12
  25. #define OSD_OP_SYNC 13
  26. #define OSD_OP_DELETE 14
  27. #define OSD_OP_MAX 14
  28. // Alignment & limit for read/write operations
  29. #ifndef MEM_ALIGNMENT
  30. #define MEM_ALIGNMENT 512
  31. #endif
  32. #define OSD_RW_MAX 64*1024*1024
  33. // common request and reply headers
  34. struct __attribute__((__packed__)) osd_op_header_t
  35. {
  36. // magic & protocol version
  37. uint64_t magic;
  38. // operation id
  39. uint64_t id;
  40. // operation type
  41. uint64_t opcode;
  42. };
  43. struct __attribute__((__packed__)) osd_reply_header_t
  44. {
  45. // magic & protocol version
  46. uint64_t magic;
  47. // operation id
  48. uint64_t id;
  49. // operation type
  50. uint64_t opcode;
  51. // return value
  52. int64_t retval;
  53. };
  54. // read or write to the secondary OSD
  55. struct __attribute__((__packed__)) osd_op_secondary_rw_t
  56. {
  57. osd_op_header_t header;
  58. // object
  59. object_id oid;
  60. // read/write version (automatic or specific)
  61. // FIXME deny values close to UINT64_MAX
  62. uint64_t version;
  63. // offset
  64. uint32_t offset;
  65. // length
  66. uint32_t len;
  67. // bitmap/attribute length - bitmap comes after header, but before data
  68. uint32_t attr_len;
  69. uint32_t pad0;
  70. };
  71. struct __attribute__((__packed__)) osd_reply_secondary_rw_t
  72. {
  73. osd_reply_header_t header;
  74. // for reads and writes: assigned or read version number
  75. uint64_t version;
  76. // for reads: bitmap/attribute length (just to double-check)
  77. uint32_t attr_len;
  78. uint32_t pad0;
  79. };
  80. // delete object on the secondary OSD
  81. struct __attribute__((__packed__)) osd_op_secondary_del_t
  82. {
  83. osd_op_header_t header;
  84. // object
  85. object_id oid;
  86. // delete version (automatic or specific)
  87. uint64_t version;
  88. };
  89. struct __attribute__((__packed__)) osd_reply_secondary_del_t
  90. {
  91. osd_reply_header_t header;
  92. uint64_t version;
  93. };
  94. // sync to the secondary OSD
  95. struct __attribute__((__packed__)) osd_op_secondary_sync_t
  96. {
  97. osd_op_header_t header;
  98. };
  99. struct __attribute__((__packed__)) osd_reply_secondary_sync_t
  100. {
  101. osd_reply_header_t header;
  102. };
  103. // stabilize or rollback objects on the secondary OSD
  104. struct __attribute__((__packed__)) osd_op_secondary_stabilize_t
  105. {
  106. osd_op_header_t header;
  107. // obj_ver_id array length in bytes
  108. uint64_t len;
  109. };
  110. typedef osd_op_secondary_stabilize_t osd_op_secondary_rollback_t;
  111. struct __attribute__((__packed__)) osd_reply_secondary_stabilize_t
  112. {
  113. osd_reply_header_t header;
  114. };
  115. typedef osd_reply_secondary_stabilize_t osd_reply_secondary_rollback_t;
  116. // show configuration
  117. struct __attribute__((__packed__)) osd_op_show_config_t
  118. {
  119. osd_op_header_t header;
  120. };
  121. struct __attribute__((__packed__)) osd_reply_show_config_t
  122. {
  123. osd_reply_header_t header;
  124. };
  125. // list objects on replica
  126. struct __attribute__((__packed__)) osd_op_secondary_list_t
  127. {
  128. osd_op_header_t header;
  129. // placement group total number and total count
  130. pg_num_t list_pg, pg_count;
  131. // size of an area that maps to one PG continuously
  132. uint64_t pg_stripe_size;
  133. // inode range (used to select pools)
  134. uint64_t min_inode, max_inode;
  135. };
  136. struct __attribute__((__packed__)) osd_reply_secondary_list_t
  137. {
  138. osd_reply_header_t header;
  139. // stable object version count. header.retval = total object version count
  140. // FIXME: maybe change to the number of bytes in the reply...
  141. uint64_t stable_count;
  142. };
  143. // read or write to the primary OSD (must be within individual stripe)
  144. struct __attribute__((__packed__)) osd_op_rw_t
  145. {
  146. osd_op_header_t header;
  147. // inode
  148. uint64_t inode;
  149. // offset
  150. uint64_t offset;
  151. // length
  152. uint32_t len;
  153. };
  154. struct __attribute__((__packed__)) osd_reply_rw_t
  155. {
  156. osd_reply_header_t header;
  157. // for reads: bitmap length
  158. uint32_t bitmap_len;
  159. uint32_t pad0;
  160. };
  161. // sync to the primary OSD
  162. struct __attribute__((__packed__)) osd_op_sync_t
  163. {
  164. osd_op_header_t header;
  165. };
  166. struct __attribute__((__packed__)) osd_reply_sync_t
  167. {
  168. osd_reply_header_t header;
  169. };
  170. // FIXME it would be interesting to try to unify blockstore_op and osd_op formats
  171. union osd_any_op_t
  172. {
  173. osd_op_header_t hdr;
  174. osd_op_secondary_rw_t sec_rw;
  175. osd_op_secondary_del_t sec_del;
  176. osd_op_secondary_sync_t sec_sync;
  177. osd_op_secondary_stabilize_t sec_stab;
  178. osd_op_secondary_list_t sec_list;
  179. osd_op_show_config_t show_conf;
  180. osd_op_rw_t rw;
  181. osd_op_sync_t sync;
  182. uint8_t buf[OSD_PACKET_SIZE];
  183. };
  184. union osd_any_reply_t
  185. {
  186. osd_reply_header_t hdr;
  187. osd_reply_secondary_rw_t sec_rw;
  188. osd_reply_secondary_del_t sec_del;
  189. osd_reply_secondary_sync_t sec_sync;
  190. osd_reply_secondary_stabilize_t sec_stab;
  191. osd_reply_secondary_list_t sec_list;
  192. osd_reply_show_config_t show_conf;
  193. osd_reply_rw_t rw;
  194. osd_reply_sync_t sync;
  195. uint8_t buf[OSD_PACKET_SIZE];
  196. };
  197. extern const char* osd_op_names[];