Simplified distributed block storage with strong consistency, like in Ceph
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

180 lines
5.0 KiB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
  1. #pragma once
  2. #ifndef _LARGEFILE64_SOURCE
  3. #define _LARGEFILE64_SOURCE
  4. #endif
  5. #include <string.h>
  6. #include <assert.h>
  7. #include <liburing.h>
  8. #include <functional>
  9. #include <vector>
  10. static inline void my_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, const void *addr, unsigned len, off_t offset)
  11. {
  12. sqe->opcode = op;
  13. sqe->flags = 0;
  14. sqe->ioprio = 0;
  15. sqe->fd = fd;
  16. sqe->off = offset;
  17. sqe->addr = (unsigned long) addr;
  18. sqe->len = len;
  19. sqe->rw_flags = 0;
  20. sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0;
  21. }
  22. static inline void my_uring_prep_readv(struct io_uring_sqe *sqe, int fd, const struct iovec *iovecs, unsigned nr_vecs, off_t offset)
  23. {
  24. my_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
  25. }
  26. static inline void my_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd, void *buf, unsigned nbytes, off_t offset, int buf_index)
  27. {
  28. my_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
  29. sqe->buf_index = buf_index;
  30. }
  31. static inline void my_uring_prep_writev(struct io_uring_sqe *sqe, int fd, const struct iovec *iovecs, unsigned nr_vecs, off_t offset)
  32. {
  33. my_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
  34. }
  35. static inline void my_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, const void *buf, unsigned nbytes, off_t offset, int buf_index)
  36. {
  37. my_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
  38. sqe->buf_index = buf_index;
  39. }
  40. static inline void my_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd, struct msghdr *msg, unsigned flags)
  41. {
  42. my_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0);
  43. sqe->msg_flags = flags;
  44. }
  45. static inline void my_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd, const struct msghdr *msg, unsigned flags)
  46. {
  47. my_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0);
  48. sqe->msg_flags = flags;
  49. }
  50. static inline void my_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, short poll_mask)
  51. {
  52. my_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
  53. sqe->poll_events = poll_mask;
  54. }
  55. static inline void my_uring_prep_poll_remove(struct io_uring_sqe *sqe, void *user_data)
  56. {
  57. my_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, 0, user_data, 0, 0);
  58. }
  59. static inline void my_uring_prep_fsync(struct io_uring_sqe *sqe, int fd, unsigned fsync_flags)
  60. {
  61. my_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0);
  62. sqe->fsync_flags = fsync_flags;
  63. }
  64. static inline void my_uring_prep_nop(struct io_uring_sqe *sqe)
  65. {
  66. my_uring_prep_rw(IORING_OP_NOP, sqe, 0, NULL, 0, 0);
  67. }
  68. static inline void my_uring_prep_timeout(struct io_uring_sqe *sqe, struct __kernel_timespec *ts, unsigned count, unsigned flags)
  69. {
  70. my_uring_prep_rw(IORING_OP_TIMEOUT, sqe, 0, ts, 1, count);
  71. sqe->timeout_flags = flags;
  72. }
  73. static inline void my_uring_prep_timeout_remove(struct io_uring_sqe *sqe, __u64 user_data, unsigned flags)
  74. {
  75. my_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, 0, (void *)user_data, 0, 0);
  76. sqe->timeout_flags = flags;
  77. }
  78. static inline void my_uring_prep_accept(struct io_uring_sqe *sqe, int fd, struct sockaddr *addr, socklen_t *addrlen, int flags)
  79. {
  80. my_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0, (__u64) addrlen);
  81. sqe->accept_flags = flags;
  82. }
  83. static inline void my_uring_prep_cancel(struct io_uring_sqe *sqe, void *user_data, int flags)
  84. {
  85. my_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, 0, user_data, 0, 0);
  86. sqe->cancel_flags = flags;
  87. }
  88. struct ring_data_t
  89. {
  90. struct iovec iov; // for single-entry read/write operations
  91. bool allow_cancel;
  92. int res;
  93. std::function<void(ring_data_t*)> callback;
  94. };
  95. struct ring_consumer_t
  96. {
  97. int number;
  98. std::function<void(void)> loop;
  99. };
  100. class ring_loop_t
  101. {
  102. std::vector<ring_consumer_t> consumers;
  103. struct ring_data_t *ring_datas;
  104. int *free_ring_data;
  105. unsigned free_ring_data_ptr, ring_data_total;
  106. bool loop_again;
  107. struct io_uring ring;
  108. int registered = 0;
  109. std::vector<int> reg_fds;
  110. void drain_events(void *completions_ptr);
  111. void run_completions(void *completions_ptr);
  112. public:
  113. ring_loop_t(int qd);
  114. ~ring_loop_t();
  115. int register_consumer(ring_consumer_t & consumer);
  116. void unregister_consumer(ring_consumer_t & consumer);
  117. int register_fd(int fd);
  118. void unregister_fd(int fd_index);
  119. inline struct io_uring_sqe* get_sqe()
  120. {
  121. if (free_ring_data_ptr == 0)
  122. {
  123. return NULL;
  124. }
  125. struct io_uring_sqe* sqe = io_uring_get_sqe(&ring);
  126. if (sqe)
  127. {
  128. ring_data_t *data = ring_datas + free_ring_data[--free_ring_data_ptr];
  129. io_uring_sqe_set_data(sqe, data);
  130. }
  131. return sqe;
  132. }
  133. inline int submit()
  134. {
  135. return io_uring_submit(&ring);
  136. }
  137. inline int wait()
  138. {
  139. struct io_uring_cqe *cqe;
  140. return io_uring_wait_cqe(&ring, &cqe);
  141. }
  142. inline unsigned space_left()
  143. {
  144. return free_ring_data_ptr;
  145. }
  146. inline bool get_loop_again()
  147. {
  148. return loop_again;
  149. }
  150. void loop();
  151. void wakeup();
  152. unsigned save();
  153. void restore(unsigned sqe_tail);
  154. };