Make OSDs stick to the last successful etcd address

Previously OSDs were selecting a new random etcd from the cluster
on every request so they were failing randomly when part of etcds was down
test-assert
Vitaliy Filippov 2021-11-27 22:35:22 +03:00
parent 8398ad0117
commit ce5b6253ab
3 changed files with 63 additions and 20 deletions

View File

@ -50,19 +50,13 @@ void etcd_state_client_t::etcd_txn(json11::Json txn, int timeout, std::function<
void etcd_state_client_t::etcd_call(std::string api, json11::Json payload, int timeout, std::function<void(std::string, json11::Json)> callback)
{
if (!etcd_addresses.size())
if (!etcd_addresses.size() && !etcd_local.size())
{
fprintf(stderr, "etcd_address is missing in Vitastor configuration\n");
exit(1);
}
std::string etcd_address;
// Always prefer local etcd
// FIXME: Handle partial etcd failures. Now OSDs select etcd instance
// randomly so all OSDs may crash if any of etcds crashes.
if (etcd_local.size() > 0)
etcd_address = etcd_local[rand() % etcd_local.size()];
else
etcd_address = etcd_addresses[rand() % etcd_addresses.size()];
pick_next_etcd();
std::string etcd_address = selected_etcd_address;
std::string etcd_api_path;
int pos = etcd_address.find('/');
if (pos >= 0)
@ -77,7 +71,12 @@ void etcd_state_client_t::etcd_call(std::string api, json11::Json payload, int t
"Content-Length: "+std::to_string(req.size())+"\r\n"
"Connection: close\r\n"
"\r\n"+req;
http_request_json(tfd, etcd_address, req, timeout, callback);
http_request_json(tfd, etcd_address, req, timeout, [this, cur_addr = selected_etcd_address, callback](std::string err, json11::Json data)
{
if (err != "" && cur_addr == selected_etcd_address)
selected_etcd_address = "";
callback(err, data);
});
}
void etcd_state_client_t::add_etcd_url(std::string addr)
@ -104,10 +103,17 @@ void etcd_state_client_t::add_etcd_url(std::string addr)
check_addr = addr;
if (pos == std::string::npos)
addr += "/v3";
this->etcd_addresses.push_back(addr);
for (int i = 0; i < local_ips.size(); i++)
int i;
for (i = 0; i < local_ips.size(); i++)
{
if (local_ips[i] == check_addr)
{
this->etcd_local.push_back(addr);
break;
}
}
if (i >= local_ips.size())
this->etcd_addresses.push_back(addr);
}
}
@ -146,14 +152,38 @@ void etcd_state_client_t::parse_config(const json11::Json & config)
this->log_level = config["log_level"].int64_value();
}
void etcd_state_client_t::pick_next_etcd()
{
if (selected_etcd_address != "")
return;
if (addresses_to_try.size() == 0)
{
// Prefer local etcd, if any
for (int i = 0; i < etcd_local.size(); i++)
addresses_to_try.push_back(etcd_local[i]);
std::vector<int> ns;
for (int i = 0; i < etcd_addresses.size(); i++)
ns.push_back(i);
while (ns.size())
{
int i = rand() % ns.size();
addresses_to_try.push_back(etcd_addresses[ns[i]]);
ns.erase(ns.begin()+i, ns.begin()+i+1);
}
}
selected_etcd_address = addresses_to_try[0];
addresses_to_try.erase(addresses_to_try.begin(), addresses_to_try.begin()+1);
}
void etcd_state_client_t::start_etcd_watcher()
{
if (!etcd_addresses.size())
if (!etcd_addresses.size() && !etcd_local.size())
{
fprintf(stderr, "etcd_address is missing in Vitastor configuration\n");
exit(1);
}
std::string etcd_address = etcd_addresses[rand() % etcd_addresses.size()];
pick_next_etcd();
std::string etcd_address = selected_etcd_address;
std::string etcd_api_path;
int pos = etcd_address.find('/');
if (pos >= 0)
@ -162,7 +192,8 @@ void etcd_state_client_t::start_etcd_watcher()
etcd_address = etcd_address.substr(0, pos);
}
etcd_watches_initialised = 0;
etcd_watch_ws = open_websocket(tfd, etcd_address, etcd_api_path+"/watch", ETCD_SLOW_TIMEOUT, [this](const http_response_t *msg)
etcd_watch_ws = open_websocket(tfd, etcd_address, etcd_api_path+"/watch", ETCD_SLOW_TIMEOUT,
[this, cur_addr = selected_etcd_address](const http_response_t *msg)
{
if (msg->body.length())
{
@ -181,6 +212,7 @@ void etcd_state_client_t::start_etcd_watcher()
if (etcd_watches_initialised == 4)
{
etcd_watch_revision = data["result"]["header"]["revision"].uint64_value();
addresses_to_try.clear();
}
// First gather all changes into a hash to remove multiple overwrites
std::map<std::string, etcd_kv_t> changes;
@ -209,11 +241,13 @@ void etcd_state_client_t::start_etcd_watcher()
}
if (msg->eof)
{
if (cur_addr == selected_etcd_address)
selected_etcd_address = "";
etcd_watch_ws = NULL;
if (etcd_watches_initialised == 0)
{
// Connection not established, retry in <ETCD_SLOW_TIMEOUT>
tfd->set_timer(ETCD_SLOW_TIMEOUT, false, [this](int)
// Connection not established, retry in <ETCD_QUICK_TIMEOUT>
tfd->set_timer(ETCD_QUICK_TIMEOUT, false, [this](int)
{
start_etcd_watcher();
});
@ -805,3 +839,8 @@ json11::Json::object etcd_state_client_t::serialize_inode_cfg(inode_config_t *cf
}
return new_cfg;
}
int etcd_state_client_t::address_count()
{
return etcd_addresses.size() + etcd_local.size();
}

View File

@ -76,13 +76,16 @@ struct etcd_state_client_t
{
protected:
std::vector<std::string> local_ips;
std::vector<std::string> etcd_addresses;
std::vector<std::string> etcd_local;
std::string selected_etcd_address;
std::vector<std::string> addresses_to_try;
std::vector<inode_watch_t*> watches;
websocket_t *etcd_watch_ws = NULL;
uint64_t bs_block_size = DEFAULT_BLOCK_SIZE;
void add_etcd_url(std::string);
void pick_next_etcd();
public:
std::vector<std::string> etcd_addresses;
std::string etcd_prefix;
int log_level = 0;
timerfd_manager_t *tfd = NULL;
@ -112,5 +115,6 @@ public:
void parse_config(const json11::Json & config);
inode_watch_t* watch_inode(std::string name);
void close_watch(inode_watch_t* watch);
int address_count();
~etcd_state_client_t();
};

View File

@ -15,7 +15,7 @@
// Peer connection is lost -> Reload connection data -> Try to reconnect
void osd_t::init_cluster()
{
if (!st_cli.etcd_addresses.size())
if (!st_cli.address_count())
{
if (run_primary)
{
@ -711,7 +711,7 @@ struct reporting_pg_t
void osd_t::report_pg_states()
{
if (etcd_reporting_pg_state || !this->pg_state_dirty.size() || !st_cli.etcd_addresses.size())
if (etcd_reporting_pg_state || !this->pg_state_dirty.size() || !st_cli.address_count())
{
return;
}