forked from vitalif/vitastor
Autosync based on number of unstable ops to prevent journal stalls
parent
24b9b19066
commit
cfe8de9b84
|
@ -87,6 +87,7 @@ const etcd_tree = {
|
||||||
bind_address: "0.0.0.0",
|
bind_address: "0.0.0.0",
|
||||||
bind_port: 0,
|
bind_port: 0,
|
||||||
autosync_interval: 5,
|
autosync_interval: 5,
|
||||||
|
autosync_writes: 128,
|
||||||
client_queue_depth: 128, // unused
|
client_queue_depth: 128, // unused
|
||||||
recovery_queue_depth: 4,
|
recovery_queue_depth: 4,
|
||||||
recovery_sync_batch: 16,
|
recovery_sync_batch: 16,
|
||||||
|
|
|
@ -68,6 +68,11 @@ uint64_t blockstore_t::get_free_block_count()
|
||||||
return impl->get_free_block_count();
|
return impl->get_free_block_count();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t blockstore_t::get_journal_size()
|
||||||
|
{
|
||||||
|
return impl->get_journal_size();
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t blockstore_t::get_bitmap_granularity()
|
uint32_t blockstore_t::get_bitmap_granularity()
|
||||||
{
|
{
|
||||||
return impl->get_bitmap_granularity();
|
return impl->get_bitmap_granularity();
|
||||||
|
|
|
@ -194,5 +194,7 @@ public:
|
||||||
uint64_t get_block_count();
|
uint64_t get_block_count();
|
||||||
uint64_t get_free_block_count();
|
uint64_t get_free_block_count();
|
||||||
|
|
||||||
|
uint64_t get_journal_size();
|
||||||
|
|
||||||
uint32_t get_bitmap_granularity();
|
uint32_t get_bitmap_granularity();
|
||||||
};
|
};
|
||||||
|
|
|
@ -368,4 +368,5 @@ public:
|
||||||
inline uint64_t get_block_count() { return block_count; }
|
inline uint64_t get_block_count() { return block_count; }
|
||||||
inline uint64_t get_free_block_count() { return data_alloc->get_free_count(); }
|
inline uint64_t get_free_block_count() { return data_alloc->get_free_count(); }
|
||||||
inline uint32_t get_bitmap_granularity() { return disk_alignment; }
|
inline uint32_t get_bitmap_granularity() { return disk_alignment; }
|
||||||
|
inline uint64_t get_journal_size() { return journal.len; }
|
||||||
};
|
};
|
||||||
|
|
11
src/osd.cpp
11
src/osd.cpp
|
@ -45,6 +45,12 @@ osd_t::osd_t(const json11::Json & config, ring_loop_t *ringloop)
|
||||||
// FIXME: Create Blockstore from on-disk superblock config and check it against the OSD cluster config
|
// FIXME: Create Blockstore from on-disk superblock config and check it against the OSD cluster config
|
||||||
auto bs_cfg = json_to_bs(this->config);
|
auto bs_cfg = json_to_bs(this->config);
|
||||||
this->bs = new blockstore_t(bs_cfg, ringloop, tfd);
|
this->bs = new blockstore_t(bs_cfg, ringloop, tfd);
|
||||||
|
{
|
||||||
|
// Autosync based on the number of unstable writes to prevent stalls due to insufficient journal space
|
||||||
|
uint64_t max_autosync = bs->get_journal_size() / bs->get_block_size() / 2;
|
||||||
|
if (autosync_writes > max_autosync)
|
||||||
|
autosync_writes = max_autosync;
|
||||||
|
}
|
||||||
|
|
||||||
this->tfd->set_timer(print_stats_interval*1000, true, [this](int timer_id)
|
this->tfd->set_timer(print_stats_interval*1000, true, [this](int timer_id)
|
||||||
{
|
{
|
||||||
|
@ -123,6 +129,11 @@ void osd_t::parse_config(const json11::Json & config)
|
||||||
if (autosync_interval > MAX_AUTOSYNC_INTERVAL)
|
if (autosync_interval > MAX_AUTOSYNC_INTERVAL)
|
||||||
autosync_interval = DEFAULT_AUTOSYNC_INTERVAL;
|
autosync_interval = DEFAULT_AUTOSYNC_INTERVAL;
|
||||||
}
|
}
|
||||||
|
if (!config["autosync_writes"].is_null())
|
||||||
|
{
|
||||||
|
// Allow to set it to 0
|
||||||
|
autosync_writes = config["autosync_writes"].uint64_value();
|
||||||
|
}
|
||||||
if (!config["client_queue_depth"].is_null())
|
if (!config["client_queue_depth"].is_null())
|
||||||
{
|
{
|
||||||
client_queue_depth = config["client_queue_depth"].uint64_value();
|
client_queue_depth = config["client_queue_depth"].uint64_value();
|
||||||
|
|
|
@ -35,6 +35,7 @@
|
||||||
|
|
||||||
#define MAX_AUTOSYNC_INTERVAL 3600
|
#define MAX_AUTOSYNC_INTERVAL 3600
|
||||||
#define DEFAULT_AUTOSYNC_INTERVAL 5
|
#define DEFAULT_AUTOSYNC_INTERVAL 5
|
||||||
|
#define DEFAULT_AUTOSYNC_WRITES 128
|
||||||
#define MAX_RECOVERY_QUEUE 2048
|
#define MAX_RECOVERY_QUEUE 2048
|
||||||
#define DEFAULT_RECOVERY_QUEUE 4
|
#define DEFAULT_RECOVERY_QUEUE 4
|
||||||
#define DEFAULT_RECOVERY_BATCH 16
|
#define DEFAULT_RECOVERY_BATCH 16
|
||||||
|
@ -108,7 +109,8 @@ class osd_t
|
||||||
int print_stats_interval = 3;
|
int print_stats_interval = 3;
|
||||||
int slow_log_interval = 10;
|
int slow_log_interval = 10;
|
||||||
int immediate_commit = IMMEDIATE_NONE;
|
int immediate_commit = IMMEDIATE_NONE;
|
||||||
int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // sync every 5 seconds
|
int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // "emergency" sync every 5 seconds
|
||||||
|
int autosync_writes = DEFAULT_AUTOSYNC_WRITES;
|
||||||
int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
|
int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
|
||||||
int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
|
int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
|
||||||
int log_level = 0;
|
int log_level = 0;
|
||||||
|
@ -140,6 +142,7 @@ class osd_t
|
||||||
osd_op_t *autosync_op = NULL;
|
osd_op_t *autosync_op = NULL;
|
||||||
|
|
||||||
// Unstable writes
|
// Unstable writes
|
||||||
|
uint64_t unstable_write_count = 0;
|
||||||
std::map<osd_object_id_t, uint64_t> unstable_writes;
|
std::map<osd_object_id_t, uint64_t> unstable_writes;
|
||||||
std::deque<osd_op_t*> syncs_in_progress;
|
std::deque<osd_op_t*> syncs_in_progress;
|
||||||
|
|
||||||
|
|
|
@ -5,8 +5,6 @@
|
||||||
|
|
||||||
void osd_t::autosync()
|
void osd_t::autosync()
|
||||||
{
|
{
|
||||||
// FIXME Autosync based on the number of unstable writes to prevent
|
|
||||||
// "journal_sector_buffer_count is too low for this batch" errors
|
|
||||||
if (immediate_commit != IMMEDIATE_ALL && !autosync_op)
|
if (immediate_commit != IMMEDIATE_ALL && !autosync_op)
|
||||||
{
|
{
|
||||||
autosync_op = new osd_op_t();
|
autosync_op = new osd_op_t();
|
||||||
|
|
|
@ -274,6 +274,11 @@ continue_others:
|
||||||
}
|
}
|
||||||
// finish_op would invalidate next_it if it cleared pg.write_queue, but it doesn't do that :)
|
// finish_op would invalidate next_it if it cleared pg.write_queue, but it doesn't do that :)
|
||||||
finish_op(cur_op, cur_op->reply.hdr.retval);
|
finish_op(cur_op, cur_op->reply.hdr.retval);
|
||||||
|
if (unstable_write_count >= autosync_writes)
|
||||||
|
{
|
||||||
|
unstable_write_count = 0;
|
||||||
|
autosync();
|
||||||
|
}
|
||||||
if (next_op)
|
if (next_op)
|
||||||
{
|
{
|
||||||
// Continue next write to the same object
|
// Continue next write to the same object
|
||||||
|
@ -353,6 +358,7 @@ resume_7:
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
lazy:
|
lazy:
|
||||||
|
unstable_write_count++;
|
||||||
if (op_data->scheme != POOL_SCHEME_REPLICATED)
|
if (op_data->scheme != POOL_SCHEME_REPLICATED)
|
||||||
{
|
{
|
||||||
// Remember version as unstable for EC/XOR
|
// Remember version as unstable for EC/XOR
|
||||||
|
|
|
@ -5,6 +5,12 @@
|
||||||
#LD_PRELOAD=libasan.so.5 \
|
#LD_PRELOAD=libasan.so.5 \
|
||||||
# fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M
|
# fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M
|
||||||
|
|
||||||
|
# Random writes without immediate_commit were stalling OSDs
|
||||||
|
|
||||||
|
LD_PRELOAD=libasan.so.5 \
|
||||||
|
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=124k -direct=1 -numjobs=16 -iodepth=4 \
|
||||||
|
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10
|
||||||
|
|
||||||
# A lot of parallel syncs was crashing the primary OSD at some point
|
# A lot of parallel syncs was crashing the primary OSD at some point
|
||||||
|
|
||||||
LD_PRELOAD=libasan.so.5 \
|
LD_PRELOAD=libasan.so.5 \
|
||||||
|
|
Loading…
Reference in New Issue