From 660c3f7b0de591c2ef2f48e3bfd751830c0f8fd8 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sun, 21 Nov 2021 11:39:02 +0300 Subject: [PATCH] Change default RDMA settings to 128x 129K buffers 129K to leave extra space for the header The problem with 8x 1M buffers is that the following happens with, for example, 2 OSDs and 4M T1Q1 write: - Server posts 8 receives - Client posts 8 sends - WRs are processed by the RDMA stack, but the OSD doesn't have the time to handle them and doesn't refill buffers - Client posts 1 more send - RNR retransmission happens and performance drops to zero Overall it seems that RDMA support should be reworked to use real 'RDMA' operations i.e. operations writing into remote memory. This has an additional advantage of avoiding a copy at the receive side of the OSD. --- src/messenger.cpp | 6 +++--- src/messenger.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/messenger.cpp b/src/messenger.cpp index dad170f4..28ea48c6 100644 --- a/src/messenger.cpp +++ b/src/messenger.cpp @@ -147,13 +147,13 @@ void osd_messenger_t::parse_config(const json11::Json & config) this->rdma_max_sge = 128; this->rdma_max_send = config["rdma_max_send"].uint64_value(); if (!this->rdma_max_send) - this->rdma_max_send = 32; + this->rdma_max_send = 1; this->rdma_max_recv = config["rdma_max_recv"].uint64_value(); if (!this->rdma_max_recv) - this->rdma_max_recv = 8; + this->rdma_max_recv = 128; this->rdma_max_msg = config["rdma_max_msg"].uint64_value(); if (!this->rdma_max_msg || this->rdma_max_msg > 128*1024*1024) - this->rdma_max_msg = 1024*1024; + this->rdma_max_msg = 129*1024; #endif this->receive_buffer_size = (uint32_t)config["tcp_header_buffer_size"].uint64_value(); if (!this->receive_buffer_size || this->receive_buffer_size > 1024*1024*1024) diff --git a/src/messenger.h b/src/messenger.h index 09d1e0f7..fcee30c9 100644 --- a/src/messenger.h +++ b/src/messenger.h @@ -133,7 +133,7 @@ protected: std::string rdma_device; uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0; msgr_rdma_context_t *rdma_context = NULL; - uint64_t rdma_max_sge = 0, rdma_max_send = 0, rdma_max_recv = 8; + uint64_t rdma_max_sge = 0, rdma_max_send = 0, rdma_max_recv = 0; uint64_t rdma_max_msg = 0; #endif