Simplified distributed block storage with strong consistency, like in Ceph
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

157 lines
5.4 KiB

  1. // Copyright (c) Vitaliy Filippov, 2019+
  2. // License: VNPL-1.0 (see README.md for details)
  3. #include "osd.h"
  4. #include "json11/json11.hpp"
  5. void osd_t::secondary_op_callback(osd_op_t *op)
  6. {
  7. if (op->req.hdr.opcode == OSD_OP_SEC_READ ||
  8. op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
  9. op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
  10. {
  11. op->reply.sec_rw.version = op->bs_op->version;
  12. }
  13. else if (op->req.hdr.opcode == OSD_OP_SEC_DELETE)
  14. {
  15. op->reply.sec_del.version = op->bs_op->version;
  16. }
  17. if (op->req.hdr.opcode == OSD_OP_SEC_READ)
  18. {
  19. if (op->bs_op->retval > 0)
  20. {
  21. op->iov.push_back(op->buf, op->bs_op->retval);
  22. }
  23. }
  24. else if (op->req.hdr.opcode == OSD_OP_SEC_LIST)
  25. {
  26. // allocated by blockstore
  27. op->buf = op->bs_op->buf;
  28. if (op->bs_op->retval > 0)
  29. {
  30. op->iov.push_back(op->buf, op->bs_op->retval * sizeof(obj_ver_id));
  31. }
  32. op->reply.sec_list.stable_count = op->bs_op->version;
  33. }
  34. int retval = op->bs_op->retval;
  35. delete op->bs_op;
  36. op->bs_op = NULL;
  37. finish_op(op, retval);
  38. }
  39. void osd_t::exec_secondary(osd_op_t *cur_op)
  40. {
  41. cur_op->bs_op = new blockstore_op_t();
  42. cur_op->bs_op->callback = [this, cur_op](blockstore_op_t* bs_op) { secondary_op_callback(cur_op); };
  43. cur_op->bs_op->opcode = (cur_op->req.hdr.opcode == OSD_OP_SEC_READ ? BS_OP_READ
  44. : (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ? BS_OP_WRITE
  45. : (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE ? BS_OP_WRITE_STABLE
  46. : (cur_op->req.hdr.opcode == OSD_OP_SEC_SYNC ? BS_OP_SYNC
  47. : (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ? BS_OP_STABLE
  48. : (cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK ? BS_OP_ROLLBACK
  49. : (cur_op->req.hdr.opcode == OSD_OP_SEC_DELETE ? BS_OP_DELETE
  50. : (cur_op->req.hdr.opcode == OSD_OP_SEC_LIST ? BS_OP_LIST
  51. : -1))))))));
  52. if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ ||
  53. cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
  54. cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
  55. {
  56. if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
  57. {
  58. // Allocate memory for the read operation
  59. if (entry_attr_size > sizeof(unsigned))
  60. cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(entry_attr_size);
  61. else
  62. cur_op->bitmap = &cur_op->bmp_data;
  63. if (cur_op->req.sec_rw.len > 0)
  64. cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
  65. }
  66. cur_op->bs_op->oid = cur_op->req.sec_rw.oid;
  67. cur_op->bs_op->version = cur_op->req.sec_rw.version;
  68. cur_op->bs_op->offset = cur_op->req.sec_rw.offset;
  69. cur_op->bs_op->len = cur_op->req.sec_rw.len;
  70. cur_op->bs_op->buf = cur_op->buf;
  71. cur_op->bs_op->bitmap = cur_op->bitmap;
  72. #ifdef OSD_STUB
  73. cur_op->bs_op->retval = cur_op->bs_op->len;
  74. #endif
  75. }
  76. else if (cur_op->req.hdr.opcode == OSD_OP_SEC_DELETE)
  77. {
  78. cur_op->bs_op->oid = cur_op->req.sec_del.oid;
  79. cur_op->bs_op->version = cur_op->req.sec_del.version;
  80. #ifdef OSD_STUB
  81. cur_op->bs_op->retval = 0;
  82. #endif
  83. }
  84. else if (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ||
  85. cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)
  86. {
  87. cur_op->bs_op->len = cur_op->req.sec_stab.len/sizeof(obj_ver_id);
  88. cur_op->bs_op->buf = cur_op->buf;
  89. #ifdef OSD_STUB
  90. cur_op->bs_op->retval = 0;
  91. #endif
  92. }
  93. else if (cur_op->req.hdr.opcode == OSD_OP_SEC_LIST)
  94. {
  95. if (cur_op->req.sec_list.pg_count < cur_op->req.sec_list.list_pg)
  96. {
  97. // requested pg number is greater than total pg count
  98. printf("Invalid LIST request: pg count %u < pg number %u\n", cur_op->req.sec_list.pg_count, cur_op->req.sec_list.list_pg);
  99. cur_op->bs_op->retval = -EINVAL;
  100. secondary_op_callback(cur_op);
  101. return;
  102. }
  103. cur_op->bs_op->oid.stripe = cur_op->req.sec_list.pg_stripe_size;
  104. cur_op->bs_op->len = cur_op->req.sec_list.pg_count;
  105. cur_op->bs_op->offset = cur_op->req.sec_list.list_pg - 1;
  106. cur_op->bs_op->oid.inode = cur_op->req.sec_list.min_inode;
  107. cur_op->bs_op->version = cur_op->req.sec_list.max_inode;
  108. #ifdef OSD_STUB
  109. cur_op->bs_op->retval = 0;
  110. cur_op->bs_op->buf = NULL;
  111. #endif
  112. }
  113. #ifdef OSD_STUB
  114. secondary_op_callback(cur_op);
  115. #else
  116. bs->enqueue_op(cur_op->bs_op);
  117. #endif
  118. }
  119. void osd_t::exec_show_config(osd_op_t *cur_op)
  120. {
  121. // FIXME: Send the real config, not its source
  122. std::string cfg_str = json11::Json(config).dump();
  123. cur_op->buf = malloc_or_die(cfg_str.size()+1);
  124. memcpy(cur_op->buf, cfg_str.c_str(), cfg_str.size()+1);
  125. cur_op->iov.push_back(cur_op->buf, cfg_str.size()+1);
  126. finish_op(cur_op, cfg_str.size()+1);
  127. }
  128. void osd_t::exec_sync_stab_all(osd_op_t *cur_op)
  129. {
  130. // Sync and stabilize all objects
  131. // This command is only valid for tests
  132. cur_op->bs_op = new blockstore_op_t();
  133. if (!allow_test_ops)
  134. {
  135. cur_op->bs_op->retval = -EINVAL;
  136. secondary_op_callback(cur_op);
  137. return;
  138. }
  139. cur_op->bs_op->opcode = BS_OP_SYNC_STAB_ALL;
  140. cur_op->bs_op->callback = [this, cur_op](blockstore_op_t *bs_op)
  141. {
  142. secondary_op_callback(cur_op);
  143. };
  144. #ifdef OSD_STUB
  145. cur_op->bs_op->retval = 0;
  146. secondary_op_callback(cur_op);
  147. #else
  148. bs->enqueue_op(cur_op->bs_op);
  149. #endif
  150. }