Browse Source

Autosync based on number of unstable ops to prevent journal stalls

nbd-vmsplice
Vitaliy Filippov 10 months ago
parent
commit
cfe8de9b84
  1. 1
      mon/mon.js
  2. 5
      src/blockstore.cpp
  3. 2
      src/blockstore.h
  4. 1
      src/blockstore_impl.h
  5. 11
      src/osd.cpp
  6. 5
      src/osd.h
  7. 2
      src/osd_primary_subops.cpp
  8. 6
      src/osd_primary_write.cpp
  9. 6
      tests/test_write.sh

1
mon/mon.js

@ -87,6 +87,7 @@ const etcd_tree = {
bind_address: "0.0.0.0",
bind_port: 0,
autosync_interval: 5,
autosync_writes: 128,
client_queue_depth: 128, // unused
recovery_queue_depth: 4,
recovery_sync_batch: 16,

5
src/blockstore.cpp

@ -68,6 +68,11 @@ uint64_t blockstore_t::get_free_block_count()
return impl->get_free_block_count();
}
uint64_t blockstore_t::get_journal_size()
{
return impl->get_journal_size();
}
uint32_t blockstore_t::get_bitmap_granularity()
{
return impl->get_bitmap_granularity();

2
src/blockstore.h

@ -194,5 +194,7 @@ public:
uint64_t get_block_count();
uint64_t get_free_block_count();
uint64_t get_journal_size();
uint32_t get_bitmap_granularity();
};

1
src/blockstore_impl.h

@ -368,4 +368,5 @@ public:
inline uint64_t get_block_count() { return block_count; }
inline uint64_t get_free_block_count() { return data_alloc->get_free_count(); }
inline uint32_t get_bitmap_granularity() { return disk_alignment; }
inline uint64_t get_journal_size() { return journal.len; }
};

11
src/osd.cpp

@ -45,6 +45,12 @@ osd_t::osd_t(const json11::Json & config, ring_loop_t *ringloop)
// FIXME: Create Blockstore from on-disk superblock config and check it against the OSD cluster config
auto bs_cfg = json_to_bs(this->config);
this->bs = new blockstore_t(bs_cfg, ringloop, tfd);
{
// Autosync based on the number of unstable writes to prevent stalls due to insufficient journal space
uint64_t max_autosync = bs->get_journal_size() / bs->get_block_size() / 2;
if (autosync_writes > max_autosync)
autosync_writes = max_autosync;
}
this->tfd->set_timer(print_stats_interval*1000, true, [this](int timer_id)
{
@ -123,6 +129,11 @@ void osd_t::parse_config(const json11::Json & config)
if (autosync_interval > MAX_AUTOSYNC_INTERVAL)
autosync_interval = DEFAULT_AUTOSYNC_INTERVAL;
}
if (!config["autosync_writes"].is_null())
{
// Allow to set it to 0
autosync_writes = config["autosync_writes"].uint64_value();
}
if (!config["client_queue_depth"].is_null())
{
client_queue_depth = config["client_queue_depth"].uint64_value();

5
src/osd.h

@ -35,6 +35,7 @@
#define MAX_AUTOSYNC_INTERVAL 3600
#define DEFAULT_AUTOSYNC_INTERVAL 5
#define DEFAULT_AUTOSYNC_WRITES 128
#define MAX_RECOVERY_QUEUE 2048
#define DEFAULT_RECOVERY_QUEUE 4
#define DEFAULT_RECOVERY_BATCH 16
@ -108,7 +109,8 @@ class osd_t
int print_stats_interval = 3;
int slow_log_interval = 10;
int immediate_commit = IMMEDIATE_NONE;
int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // sync every 5 seconds
int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // "emergency" sync every 5 seconds
int autosync_writes = DEFAULT_AUTOSYNC_WRITES;
int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
int log_level = 0;
@ -140,6 +142,7 @@ class osd_t
osd_op_t *autosync_op = NULL;
// Unstable writes
uint64_t unstable_write_count = 0;
std::map<osd_object_id_t, uint64_t> unstable_writes;
std::deque<osd_op_t*> syncs_in_progress;

2
src/osd_primary_subops.cpp

@ -5,8 +5,6 @@
void osd_t::autosync()
{
// FIXME Autosync based on the number of unstable writes to prevent
// "journal_sector_buffer_count is too low for this batch" errors
if (immediate_commit != IMMEDIATE_ALL && !autosync_op)
{
autosync_op = new osd_op_t();

6
src/osd_primary_write.cpp

@ -274,6 +274,11 @@ continue_others:
}
// finish_op would invalidate next_it if it cleared pg.write_queue, but it doesn't do that :)
finish_op(cur_op, cur_op->reply.hdr.retval);
if (unstable_write_count >= autosync_writes)
{
unstable_write_count = 0;
autosync();
}
if (next_op)
{
// Continue next write to the same object
@ -353,6 +358,7 @@ resume_7:
else
{
lazy:
unstable_write_count++;
if (op_data->scheme != POOL_SCHEME_REPLICATED)
{
// Remember version as unstable for EC/XOR

6
tests/test_write.sh

@ -5,6 +5,12 @@
#LD_PRELOAD=libasan.so.5 \
# fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M
# Random writes without immediate_commit were stalling OSDs
LD_PRELOAD=libasan.so.5 \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=124k -direct=1 -numjobs=16 -iodepth=4 \
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10
# A lot of parallel syncs was crashing the primary OSD at some point
LD_PRELOAD=libasan.so.5 \

Loading…
Cancel
Save