Simplified distributed block storage with strong consistency, like in Ceph
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

198 lines
5.7 KiB

  1. // Copyright (c) Vitaliy Filippov, 2019+
  2. // License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
  3. /**
  4. * Stub benchmarker
  5. */
  6. #include <sys/types.h>
  7. #include <time.h>
  8. #include <sys/socket.h>
  9. #include <netinet/in.h>
  10. #include <netinet/tcp.h>
  11. #include <arpa/inet.h>
  12. #include <string.h>
  13. #include <stdio.h>
  14. #include <unistd.h>
  15. #include <fcntl.h>
  16. #include <errno.h>
  17. #include <stdlib.h>
  18. #include <signal.h>
  19. #include <stdexcept>
  20. #include "rw_blocking.h"
  21. #include "osd_ops.h"
  22. int connect_stub(const char *server_address, int server_port);
  23. void run_bench(int peer_fd);
  24. static uint64_t read_sum = 0, read_count = 0;
  25. static uint64_t write_sum = 0, write_count = 0;
  26. static uint64_t sync_sum = 0, sync_count = 0;
  27. void handle_sigint(int sig)
  28. {
  29. printf("4k randread: %lu us avg\n", read_count ? read_sum/read_count : 0);
  30. printf("4k randwrite: %lu us avg\n", write_count ? write_sum/write_count : 0);
  31. printf("sync: %lu us avg\n", sync_count ? sync_sum/sync_count : 0);
  32. exit(0);
  33. }
  34. int main(int narg, char *args[])
  35. {
  36. if (narg < 2)
  37. {
  38. printf("USAGE: %s SERVER_IP [PORT]\n", args[0]);
  39. return 1;
  40. }
  41. int port = 11203;
  42. if (narg >= 3)
  43. {
  44. port = atoi(args[2]);
  45. if (port <= 0 || port >= 65536)
  46. {
  47. printf("Bad port number\n");
  48. return 1;
  49. }
  50. }
  51. signal(SIGINT, handle_sigint);
  52. int peer_fd = connect_stub(args[1], port);
  53. run_bench(peer_fd);
  54. close(peer_fd);
  55. return 0;
  56. }
  57. int connect_stub(const char *server_address, int server_port)
  58. {
  59. struct sockaddr_in addr;
  60. int r;
  61. if ((r = inet_pton(AF_INET, server_address, &addr.sin_addr)) != 1)
  62. {
  63. fprintf(stderr, "server address: %s%s\n", server_address, r == 0 ? " is not valid" : ": no ipv4 support");
  64. return -1;
  65. }
  66. addr.sin_family = AF_INET;
  67. addr.sin_port = htons(server_port);
  68. int connect_fd = socket(AF_INET, SOCK_STREAM, 0);
  69. if (connect_fd < 0)
  70. {
  71. perror("socket");
  72. return -1;
  73. }
  74. if (connect(connect_fd, (sockaddr*)&addr, sizeof(addr)) < 0)
  75. {
  76. perror("connect");
  77. return -1;
  78. }
  79. int one = 1;
  80. setsockopt(connect_fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one));
  81. return connect_fd;
  82. }
  83. bool check_reply(int r, osd_any_op_t & op, osd_any_reply_t & reply, int expected)
  84. {
  85. if (r != OSD_PACKET_SIZE)
  86. {
  87. printf("read failed\n");
  88. return false;
  89. }
  90. if (reply.hdr.magic != SECONDARY_OSD_REPLY_MAGIC ||
  91. reply.hdr.id != op.hdr.id || reply.hdr.opcode != op.hdr.opcode)
  92. {
  93. printf("bad reply: magic, id or opcode does not match request\n");
  94. return false;
  95. }
  96. if (reply.hdr.retval != expected)
  97. {
  98. printf("operation failed, retval=%ld (%s)\n", reply.hdr.retval, strerror(-reply.hdr.retval));
  99. return false;
  100. }
  101. return true;
  102. }
  103. void run_bench(int peer_fd)
  104. {
  105. osd_any_op_t op;
  106. osd_any_reply_t reply;
  107. void *buf = NULL;
  108. int r;
  109. iovec iov[2];
  110. timespec tv_begin, tv_end;
  111. clock_gettime(CLOCK_REALTIME, &tv_begin);
  112. while (1)
  113. {
  114. // read
  115. op.hdr.magic = SECONDARY_OSD_OP_MAGIC;
  116. op.hdr.id = 1;
  117. op.hdr.opcode = OSD_OP_SEC_READ;
  118. op.sec_rw.oid.inode = 3;
  119. op.sec_rw.oid.stripe = (rand() << 17) % (1 << 29); // 512 MB
  120. op.sec_rw.version = 0;
  121. op.sec_rw.len = 4096;
  122. op.sec_rw.offset = (rand() * op.sec_rw.len) % (1 << 17);
  123. r = write_blocking(peer_fd, op.buf, OSD_PACKET_SIZE) == OSD_PACKET_SIZE;
  124. if (!r)
  125. break;
  126. buf = malloc(op.sec_rw.len);
  127. iov[0] = { reply.buf, OSD_PACKET_SIZE };
  128. iov[1] = { buf, op.sec_rw.len };
  129. r = readv_blocking(peer_fd, iov, 2) == (OSD_PACKET_SIZE + op.sec_rw.len);
  130. free(buf);
  131. if (!r || !check_reply(OSD_PACKET_SIZE, op, reply, op.sec_rw.len))
  132. break;
  133. clock_gettime(CLOCK_REALTIME, &tv_end);
  134. read_count++;
  135. read_sum += (
  136. (tv_end.tv_sec - tv_begin.tv_sec)*1000000 +
  137. tv_end.tv_nsec/1000 - tv_begin.tv_nsec/1000
  138. );
  139. tv_begin = tv_end;
  140. // write
  141. op.hdr.magic = SECONDARY_OSD_OP_MAGIC;
  142. op.hdr.id = 1;
  143. op.hdr.opcode = OSD_OP_SEC_WRITE;
  144. op.sec_rw.oid.inode = 3;
  145. op.sec_rw.oid.stripe = (rand() << 17) % (1 << 29); // 512 MB
  146. op.sec_rw.version = 0;
  147. op.sec_rw.len = 4096;
  148. op.sec_rw.offset = (rand() * op.sec_rw.len) % (1 << 17);
  149. buf = malloc(op.sec_rw.len);
  150. memset(buf, rand() % 255, op.sec_rw.len);
  151. iov[0] = { op.buf, OSD_PACKET_SIZE };
  152. iov[1] = { buf, op.sec_rw.len };
  153. r = writev_blocking(peer_fd, iov, 2) == (OSD_PACKET_SIZE + op.sec_rw.len);
  154. free(buf);
  155. if (!r)
  156. break;
  157. r = read_blocking(peer_fd, reply.buf, OSD_PACKET_SIZE);
  158. if (!check_reply(r, op, reply, op.sec_rw.len))
  159. break;
  160. clock_gettime(CLOCK_REALTIME, &tv_end);
  161. write_count++;
  162. write_sum += (
  163. (tv_end.tv_sec - tv_begin.tv_sec)*1000000 +
  164. tv_end.tv_nsec/1000 - tv_begin.tv_nsec/1000
  165. );
  166. tv_begin = tv_end;
  167. // sync/stab
  168. op.hdr.magic = SECONDARY_OSD_OP_MAGIC;
  169. op.hdr.id = 1;
  170. op.hdr.opcode = OSD_OP_TEST_SYNC_STAB_ALL;
  171. r = write_blocking(peer_fd, op.buf, OSD_PACKET_SIZE) == OSD_PACKET_SIZE;
  172. if (!r)
  173. break;
  174. r = read_blocking(peer_fd, reply.buf, OSD_PACKET_SIZE);
  175. if (!check_reply(r, op, reply, 0))
  176. break;
  177. clock_gettime(CLOCK_REALTIME, &tv_end);
  178. sync_count++;
  179. sync_sum += (
  180. (tv_end.tv_sec - tv_begin.tv_sec)*1000000 +
  181. tv_end.tv_nsec/1000 - tv_begin.tv_nsec/1000
  182. );
  183. tv_begin = tv_end;
  184. }
  185. }