Simplified distributed block storage with strong consistency, like in Ceph
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

162 lines
4.3 KiB

// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
#pragma once
#include <sys/types.h>
#include <stdint.h>
#include <arpa/inet.h>
#include <set>
#include <map>
#include <deque>
#include <vector>
#include "malloc_or_die.h"
#include "json11/json11.hpp"
#include "msgr_op.h"
#include "timerfd_manager.h"
#include <ringloop.h>
#define CL_READ_HDR 1
#define CL_READ_DATA 2
#define CL_READ_REPLY_DATA 3
#define CL_WRITE_READY 1
#define CL_WRITE_REPLY 2
#define PEER_CONNECTING 1
#define PEER_CONNECTED 2
#define PEER_STOPPED 3
#define DEFAULT_PEER_CONNECT_INTERVAL 5
#define DEFAULT_PEER_CONNECT_TIMEOUT 5
#define DEFAULT_OSD_PING_TIMEOUT 5
struct osd_client_t
{
int refs = 0;
sockaddr_in peer_addr;
int peer_port;
int peer_fd;
int peer_state;
int connect_timeout_id = -1;
int ping_time_remaining = 0;
int idle_time_remaining = 0;
osd_num_t osd_num = 0;
void *in_buf = NULL;
// Read state
int read_ready = 0;
osd_op_t *read_op = NULL;
iovec read_iov = { 0 };
msghdr read_msg = { 0 };
int read_remaining = 0;
int read_state = 0;
osd_op_buf_list_t recv_list;
// Incoming operations
std::vector<osd_op_t*> received_ops;
// Outbound operations
std::map<uint64_t, osd_op_t*> sent_ops;
// PGs dirtied by this client's primary-writes
std::set<pool_pg_num_t> dirty_pgs;
// Write state
msghdr write_msg = { 0 };
int write_state = 0;
std::vector<iovec> send_list, next_send_list;
std::vector<osd_op_t*> outbox, next_outbox;
~osd_client_t()
{
free(in_buf);
in_buf = NULL;
}
};
struct osd_wanted_peer_t
{
json11::Json address_list;
int port;
time_t last_connect_attempt;
bool connecting, address_changed;
int address_index;
std::string cur_addr;
int cur_port;
};
struct osd_op_stats_t
{
uint64_t op_stat_sum[OSD_OP_MAX+1] = { 0 };
uint64_t op_stat_count[OSD_OP_MAX+1] = { 0 };
uint64_t op_stat_bytes[OSD_OP_MAX+1] = { 0 };
uint64_t subop_stat_sum[OSD_OP_MAX+1] = { 0 };
uint64_t subop_stat_count[OSD_OP_MAX+1] = { 0 };
};
struct osd_messenger_t
{
protected:
int keepalive_timer_id = -1;
// FIXME: make receive_buffer_size configurable
int receive_buffer_size = 64*1024;
int peer_connect_interval = DEFAULT_PEER_CONNECT_INTERVAL;
int peer_connect_timeout = DEFAULT_PEER_CONNECT_TIMEOUT;
int osd_idle_timeout = DEFAULT_OSD_PING_TIMEOUT;
int osd_ping_timeout = DEFAULT_OSD_PING_TIMEOUT;
int log_level = 0;
bool use_sync_send_recv = false;
std::vector<int> read_ready_clients;
std::vector<int> write_ready_clients;
std::vector<std::function<void()>> set_immediate;
public:
timerfd_manager_t *tfd;
ring_loop_t *ringloop;
// osd_num_t is only for logging and asserts
osd_num_t osd_num;
uint64_t next_subop_id = 1;
std::map<int, osd_client_t*> clients;
std::map<osd_num_t, osd_wanted_peer_t> wanted_peers;
std::map<uint64_t, int> osd_peer_fds;
// op statistics
osd_op_stats_t stats;
void init();
void parse_config(const json11::Json & config);
void connect_peer(uint64_t osd_num, json11::Json peer_state);
void stop_client(int peer_fd, bool force = false);
void outbox_push(osd_op_t *cur_op);
std::function<void(osd_op_t*)> exec_op;
std::function<void(osd_num_t)> repeer_pgs;
void read_requests();
void send_replies();
void accept_connections(int listen_fd);
~osd_messenger_t();
protected:
void try_connect_peer(uint64_t osd_num);
void try_connect_peer_addr(osd_num_t peer_osd, const char *peer_host, int peer_port);
void handle_peer_epoll(int peer_fd, int epoll_events);
void handle_connect_epoll(int peer_fd);
void on_connect_peer(osd_num_t peer_osd, int peer_fd);
void check_peer_config(osd_client_t *cl);
void cancel_osd_ops(osd_client_t *cl);
void cancel_op(osd_op_t *op);
bool try_send(osd_client_t *cl);
void measure_exec(osd_op_t *cur_op);
void handle_send(int result, osd_client_t *cl);
bool handle_read(int result, osd_client_t *cl);
bool handle_finished_read(osd_client_t *cl);
void handle_op_hdr(osd_client_t *cl);
bool handle_reply_hdr(osd_client_t *cl);
void handle_reply_ready(osd_op_t *op);
};