Add up_wait_retry_interval to config and fix it so it actually works
parent
44973e7f27
commit
73e26dbbea
|
@ -101,15 +101,21 @@ void cluster_client_t::stop()
|
|||
}
|
||||
}
|
||||
|
||||
void cluster_client_t::continue_ops()
|
||||
void cluster_client_t::continue_ops(bool up_retry)
|
||||
{
|
||||
if (retry_timeout_id)
|
||||
{
|
||||
tfd->clear_timer(retry_timeout_id);
|
||||
retry_timeout_id = 0;
|
||||
}
|
||||
for (auto op_it = cur_ops.begin(); op_it != cur_ops.end(); )
|
||||
{
|
||||
if ((*op_it)->up_wait)
|
||||
{
|
||||
if (up_retry)
|
||||
{
|
||||
(*op_it)->up_wait = false;
|
||||
continue_rw(*op_it++);
|
||||
}
|
||||
else
|
||||
op_it++;
|
||||
}
|
||||
else
|
||||
continue_rw(*op_it++);
|
||||
}
|
||||
}
|
||||
|
@ -173,6 +179,15 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & config)
|
|||
{
|
||||
client_dirty_limit = DEFAULT_CLIENT_DIRTY_LIMIT;
|
||||
}
|
||||
up_wait_retry_interval = config["up_wait_retry_interval"].uint64_value();
|
||||
if (!up_wait_retry_interval)
|
||||
{
|
||||
up_wait_retry_interval = 500;
|
||||
}
|
||||
else if (up_wait_retry_interval < 50)
|
||||
{
|
||||
up_wait_retry_interval = 50;
|
||||
}
|
||||
msgr.peer_connect_interval = config["peer_connect_interval"].uint64_value();
|
||||
if (!msgr.peer_connect_interval)
|
||||
{
|
||||
|
@ -696,9 +711,17 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
|
|||
part->osd_num, part->op.reply.hdr.retval, expected
|
||||
);
|
||||
msgr.stop_client(part->op.peer_fd);
|
||||
if (part->op.reply.hdr.retval && !retry_timeout_id)
|
||||
if (part->op.reply.hdr.retval == -EPIPE)
|
||||
{
|
||||
retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int) { retry_timeout_id = 0; continue_ops(); });
|
||||
op->up_wait = true;
|
||||
if (!retry_timeout_id)
|
||||
{
|
||||
retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int)
|
||||
{
|
||||
retry_timeout_id = 0;
|
||||
continue_ops(true);
|
||||
});
|
||||
}
|
||||
}
|
||||
if (!op->retval || op->retval == -EPIPE)
|
||||
{
|
||||
|
|
|
@ -40,6 +40,7 @@ protected:
|
|||
cluster_op_t *orig_op = NULL;
|
||||
bool is_internal = false;
|
||||
bool needs_reslice = false;
|
||||
bool up_wait = false;
|
||||
int sent_count = 0, done_count = 0;
|
||||
std::vector<cluster_op_part_t> parts;
|
||||
friend class cluster_client_t;
|
||||
|
@ -59,7 +60,6 @@ class cluster_client_t
|
|||
// FIXME: Implement inmemory_commit mode. Note that it requires to return overlapping reads from memory.
|
||||
uint64_t client_dirty_limit = 0;
|
||||
int log_level;
|
||||
// FIXME: Put up_wait_retry_interval into config and fix it so it could actually work
|
||||
int up_wait_retry_interval = 500; // ms
|
||||
|
||||
uint64_t op_id = 1;
|
||||
|
@ -85,7 +85,7 @@ public:
|
|||
void stop();
|
||||
|
||||
protected:
|
||||
void continue_ops();
|
||||
void continue_ops(bool up_retry = false);
|
||||
void on_load_config_hook(json11::Json::object & config);
|
||||
void on_load_pgs_hook(bool success);
|
||||
void on_change_hook(json11::Json::object & changes);
|
||||
|
|
13
lp/mon.js
13
lp/mon.js
|
@ -30,11 +30,11 @@ class Mon
|
|||
/* global: {
|
||||
// mon
|
||||
etcd_mon_ttl: 30, // min: 10
|
||||
etcd_mon_timeout: 1000, // min: 0
|
||||
etcd_mon_timeout: 1000, // ms. min: 0
|
||||
etcd_mon_retries: 5, // min: 0
|
||||
mon_change_timeout: 1000, // min: 100
|
||||
mon_stats_timeout: 1000, // min: 100
|
||||
osd_out_time: 1800, // min: 0
|
||||
mon_change_timeout: 1000, // ms. min: 100
|
||||
mon_stats_timeout: 1000, // ms. min: 100
|
||||
osd_out_time: 1800, // seconds. min: 0
|
||||
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
|
||||
// client and osd
|
||||
use_sync_send_recv: false,
|
||||
|
@ -45,8 +45,9 @@ class Mon
|
|||
pg_stripe_size: 4194304,
|
||||
immediate_commit: false, // 'all' or 'small'
|
||||
client_dirty_limit: 33554432,
|
||||
peer_connect_interval: 5,
|
||||
peer_connect_timeout: 5,
|
||||
peer_connect_interval: 5, // seconds. min: 1
|
||||
peer_connect_timeout: 5, // seconds. min: 1
|
||||
up_wait_retry_interval: 500, // ms. min: 50
|
||||
// osd
|
||||
etcd_report_interval: 30, // min: 10
|
||||
run_primary: true,
|
||||
|
|
Loading…
Reference in New Issue