Implement per-pool PG calculation, fix some lint warnings

Vitaliy Filippov 2020-09-01 18:50:23 +03:00
parent fe0d78bf8e
commit a8b3cbd6af
2 changed files with 185 additions and 108 deletions

View File

@ -55,7 +55,7 @@ async function optimize_initial({ osd_tree, pg_count, pg_size = 3, pg_minsize =
} }
const all_weights = Object.assign({}, ...Object.values(osd_tree)); const all_weights = Object.assign({}, ...Object.values(osd_tree));
const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0); const total_weight = Object.values(all_weights).reduce((a, c) => Number(a) + Number(c), 0);
all_pgs = Object.values(random_combinations(osd_tree, pg_size, max_combinations)); const all_pgs = Object.values(random_combinations(osd_tree, pg_size, max_combinations));
const pg_per_osd = {}; const pg_per_osd = {};
for (const pg of all_pgs) for (const pg of all_pgs)
{ {
@ -246,7 +246,7 @@ async function optimize_change({ prev_pgs: prev_int_pgs, osd_tree, pg_size = 3,
} }
} }
// Get all combinations // Get all combinations
all_pgs = random_combinations(osd_tree, pg_size, max_combinations); let all_pgs = random_combinations(osd_tree, pg_size, max_combinations);
add_valid_previous(osd_tree, prev_weights, all_pgs); add_valid_previous(osd_tree, prev_weights, all_pgs);
all_pgs = Object.values(all_pgs); all_pgs = Object.values(all_pgs);
const pg_per_osd = {}; const pg_per_osd = {};

289
lp/mon.js
View File

@ -1,4 +1,5 @@
const http = require('http'); const http = require('http');
const crypto = require('crypto');
const os = require('os'); const os = require('os');
const WebSocket = require('ws'); const WebSocket = require('ws');
const LPOptimizer = require('./lp-optimizer.js'); const LPOptimizer = require('./lp-optimizer.js');
@ -13,14 +14,15 @@ class Mon
'config/global', 'config/global',
'config/node_placement', 'config/node_placement',
'config/pools', 'config/pools',
'config/osd/\d+', 'config/osd/[1-9]\d*',
'config/pgs', 'config/pgs',
'osd/state/\d+', 'osd/state/[1-9]\d*',
'osd/stats/\d+', 'osd/stats/[1-9]\d*',
'mon/master', 'mon/master',
'pg/state/\d+/\d+', 'pg/state/[1-9]\d*/[1-9]\d*',
'pg/stats/\d+/\d+', 'pg/stats/[1-9]\d*/[1-9]\d*',
'pg/history/\d+/\d+', 'pg/history/[1-9]\d*/[1-9]\d*',
'stats',
].join('$|^')+'$') ].join('$|^')+'$')
static etcd_tree = { static etcd_tree = {
@ -33,7 +35,6 @@ class Mon
mon_change_timeout: 1000, // min: 100 mon_change_timeout: 1000, // min: 100
mon_stats_timeout: 1000, // min: 100 mon_stats_timeout: 1000, // min: 100
osd_out_time: 1800, // min: 0 osd_out_time: 1800, // min: 0
max_osd_combinations: 10000, // min: 100
placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... }, placement_levels: { datacenter: 1, rack: 2, host: 3, osd: 4, ... },
// client and osd // client and osd
use_sync_send_recv: false, use_sync_send_recv: false,
@ -64,10 +65,15 @@ class Mon
}, */ }, */
node_placement: {}, node_placement: {},
/* pools: { /* pools: {
<name>: { <id>: {
id: 1, name: 'testpool',
scheme: 'xor',
pg_size: 3,
pg_minsize: 2,
pg_count: 100, pg_count: 100,
failure_domain: 'host', failure_domain: 'host',
max_osd_combinations: 10000,
// FIXME add device classes/tags
}, },
... ...
}, */ }, */
@ -76,11 +82,14 @@ class Mon
/* <id>: { reweight: 1 }, ... */ /* <id>: { reweight: 1 }, ... */
}, },
/* pgs: { /* pgs: {
<pool_id>: { hash: string,
<pg_id>: { items: {
osd_set: [ 1, 2, 3 ], <pool_id>: {
primary: 1, <pg_id>: {
pause: false, osd_set: [ 1, 2, 3 ],
primary: 1,
pause: false,
}
} }
} }
}, */ }, */
@ -118,7 +127,9 @@ class Mon
}, },
}, },
mon: { mon: {
master: {}, master: {
/* ip: [ string ], */
},
}, },
pg: { pg: {
state: { state: {
@ -132,11 +143,11 @@ class Mon
stats: { stats: {
/* <pool_id>: { /* <pool_id>: {
<pg_id>: { <pg_id>: {
object_count: int, object_count: uint64_t,
clean_count: int, clean_count: uint64_t,
misplaced_count: int, misplaced_count: uint64_t,
degraded_count: int, degraded_count: uint64_t,
incomplete_count: int, incomplete_count: uint64_t,
write_osd_set: osd_num_t[], write_osd_set: osd_num_t[],
}, },
}, */ }, */
@ -146,11 +157,30 @@ class Mon
<pg_id>: { <pg_id>: {
osd_sets: osd_num_t[][], osd_sets: osd_num_t[][],
all_peers: osd_num_t[], all_peers: osd_num_t[],
epoch: int, epoch: uint32_t,
}, },
}, */ }, */
}, },
}, },
stats: {
/* op_stats: {
<string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
},
subop_stats: {
<string>: { count: uint64_t, usec: uint64_t },
},
recovery_stats: {
degraded: { count: uint64_t, bytes: uint64_t },
misplaced: { count: uint64_t, bytes: uint64_t },
},
object_counts: {
object: uint64_t,
clean: uint64_t,
misplaced: uint64_t,
degraded: uint64_t,
incomplete: uint64_t,
}, */
},
} }
constructor(config) constructor(config)
@ -223,11 +253,6 @@ class Mon
{ {
this.config.osd_out_time = 30*60; // 30 minutes by default this.config.osd_out_time = 30*60; // 30 minutes by default
} }
this.config.max_osd_combinations = Number(this.config.max_osd_combinations) || 10000;
if (this.config.max_osd_combinations < 100)
{
this.config.max_osd_combinations = 100;
}
} }
async start_watcher(retries) async start_watcher(retries)
@ -246,7 +271,7 @@ class Mon
{ {
this.ws.close(); this.ws.close();
ok(false); ok(false);
}, timeout); }, this.config.etcd_mon_timeout);
this.ws = new WebSocket(base+'/watch'); this.ws = new WebSocket(base+'/watch');
this.ws.on('open', () => this.ws.on('open', () =>
{ {
@ -329,7 +354,7 @@ class Mon
{ {
this.die('Lease expired'); this.die('Lease expired');
} }
}, config.etcd_mon_timeout); }, this.config.etcd_mon_timeout);
} }
async become_master() async become_master()
@ -421,28 +446,29 @@ class Mon
return LPOptimizer.flatten_tree(tree[''].children, levels, this.config.failure_domain, 'osd'); return LPOptimizer.flatten_tree(tree[''].children, levels, this.config.failure_domain, 'osd');
} }
async stop_all_pgs() async stop_all_pgs(pool_id)
{ {
let has_online = false, paused = true; let has_online = false, paused = true;
for (const pg in this.state.config.pgs.items||{}) for (const pg in this.state.config.pgs.items[pool_id]||{})
{ {
const cur_state = ((this.state.pg.state[pg]||{}).state||[]).join(','); // FIXME: Change all (||{}) to ?. (optional chaining) at some point
const cur_state = (((this.state.pg.state[pool_id]||{})[pg]||{}).state||[]).join(',');
if (cur_state != '' && cur_state != 'offline') if (cur_state != '' && cur_state != 'offline')
{ {
has_online = true; has_online = true;
} }
if (!this.state.config.pgs.items[pg].pause) if (!this.state.config.pgs.items[pool_id][pg].pause)
{ {
paused = false; paused = false;
} }
} }
if (!paused) if (!paused)
{ {
console.log('Stopping all PGs before changing PG count'); console.log('Stopping all PGs for pool '+pool_id+' before changing PG count');
const new_cfg = JSON.parse(JSON.stringify(this.state.config.pgs)); const new_cfg = JSON.parse(JSON.stringify(this.state.config.pgs));
for (const pg in new_cfg.items) for (const pg in new_cfg.items[pool_id])
{ {
new_cfg.items[pg].pause = true; new_cfg.items[pool_id][pg].pause = true;
} }
// Check that no OSDs change their state before we pause PGs // Check that no OSDs change their state before we pause PGs
// Doing this we make sure that OSDs don't wake up in the middle of our "transaction" // Doing this we make sure that OSDs don't wake up in the middle of our "transaction"
@ -472,9 +498,8 @@ class Mon
return !has_online; return !has_online;
} }
async save_new_pgs(prev_pgs, new_pgs, pg_history, tree_hash) save_new_pgs_txn(request, pool_id, prev_pgs, new_pgs, pg_history)
{ {
const txn = [], checks = [];
const pg_items = {}; const pg_items = {};
new_pgs.map((osd_set, i) => new_pgs.map((osd_set, i) =>
{ {
@ -493,109 +518,158 @@ class Mon
}); });
for (let i = 0; i < new_pgs.length || i < prev_pgs.length; i++) for (let i = 0; i < new_pgs.length || i < prev_pgs.length; i++)
{ {
checks.push({ request.compare.push({
key: b64(this.etcd_prefix+'/pg/history/'+(i+1)), key: b64(this.etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
target: 'MOD', target: 'MOD',
mod_revision: ''+this.etcd_watch_revision, mod_revision: ''+this.etcd_watch_revision,
result: 'LESS', result: 'LESS',
}); });
if (pg_history[i]) if (pg_history[i])
{ {
txn.push({ request.success.push({
requestPut: { requestPut: {
key: b64(this.etcd_prefix+'/pg/history/'+(i+1)), key: b64(this.etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
value: b64(JSON.stringify(pg_history[i])), value: b64(JSON.stringify(pg_history[i])),
}, },
}); });
} }
else else
{ {
txn.push({ request.success.push({
requestDeleteRange: { requestDeleteRange: {
key: b64(this.etcd_prefix+'/pg/history/'+(i+1)), key: b64(this.etcd_prefix+'/pg/history/'+pool_id+'/'+(i+1)),
}, },
}); });
} }
} }
this.state.config.pgs = { this.state.config.pgs.items[pool_id] = pg_items;
hash: tree_hash, }
items: pg_items,
}; validate_pool_cfg(pool_id, pool_cfg)
const res = await this.etcd_call('/txn', { {
compare: [ pool_cfg.pg_size = Math.floor(pool_cfg.pg_size);
{ key: b64(this.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id }, pool_cfg.pg_minsize = Math.floor(pool_cfg.pg_minsize);
{ key: b64(this.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' }, pool_cfg.pg_count = Math.floor(pool_cfg.pg_count);
...checks, pool_cfg.failure_domain = pool_cfg.failure_domain || 'host';
], pool_cfg.max_osd_combinations = Math.floor(pool_cfg.max_osd_combinations) || 10000;
success: [ if (!/^[1-9]\d*$/.exec(''+pool_id))
{ requestPut: { key: b64(this.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(this.state.config.pgs)) } }, {
...txn, console.log('Pool ID '+pool_id+' is invalid');
], return false;
}, this.config.etcd_mon_timeout, 0); }
return res.succeeded; if (!pool_cfg.pg_size || pool_cfg.pg_size < 1)
{
console.log('Pool '+pool_id+' has invalid pg_size');
return false;
}
if (!pool_cfg.pg_minsize || pool_cfg.pg_minsize < 1 || pool_cfg.pg_minsize > pool_cfg.pg_size)
{
console.log('Pool '+pool_id+' has invalid pg_minsize');
return false;
}
if (!pool_cfg.pg_count || pool_cfg.pg_count < 1)
{
console.log('Pool '+pool_id+' has invalid pg_count');
return false;
}
if (!pool_cfg.name)
{
console.log('Pool '+pool_id+' has invalid pg_count');
return false;
}
if (pool_cfg.max_osd_combinations < 100)
{
console.log('Pool '+pool_id+' has invalid max_osd_combinations');
return false;
}
return true;
} }
async recheck_pgs() async recheck_pgs()
{ {
// Take configuration and state, check it against the stored configuration hash // Take configuration and state, check it against the stored configuration hash
// Recalculate PGs and save them to etcd if the configuration is changed // Recalculate PGs and save them to etcd if the configuration is changed
// FIXME: Also do not change anything if the distribution is good enough and no PGs are degraded
const tree_cfg = { const tree_cfg = {
osd_tree: this.get_osd_tree(), osd_tree: this.get_osd_tree(),
pg_count: this.config.pg_count || Object.keys(this.state.config.pgs.items||{}).length || 128, pools: this.state.config.pools,
max_osd_combinations: this.config.max_osd_combinations,
}; };
const tree_hash = sha1hex(stableStringify(tree_cfg)); const tree_hash = sha1hex(stableStringify(tree_cfg));
if (this.state.config.pgs.hash != tree_hash) if (this.state.config.pgs.hash != tree_hash)
{ {
// Something has changed // Something has changed
const prev_pgs = []; const etcd_request = { compare: [], success: [] };
for (const pg in this.state.config.pgs.items||{}) for (const pool_id in this.state.config.pools)
{ {
prev_pgs[pg-1] = this.state.config.pgs.items[pg].osd_set; const pool_cfg = this.state.config.pools[pool_id];
} if (!this.validate_pool_cfg(pool_id, pool_cfg))
const pg_history = [];
const old_pg_count = prev_pgs.length;
let optimize_result;
if (old_pg_count > 0)
{
if (old_pg_count != tree_cfg.pg_count)
{ {
// PG count changed. Need to bring all PGs down. return;
if (!await this.stop_all_pgs())
{
this.schedule_recheck();
return;
}
PGUtil.scale_pg_count(prev_pgs, this.state.pg.history, pg_history, new_pg_count);
} }
optimize_result = await LPOptimizer.optimize_change({ const prev_pgs = [];
prev_pgs, for (const pg in (this.state.config.pgs.items[pool_id]||{}).items||{})
osd_tree: tree_cfg.osd_tree, {
pg_size: 3, prev_pgs[pg-1] = this.state.config.pgs.items[pool_id][pg].osd_set;
max_combinations: tree_cfg.max_osd_combinations, }
}); const pg_history = [];
const old_pg_count = prev_pgs.length;
let optimize_result;
if (old_pg_count > 0)
{
if (old_pg_count != pool_cfg.pg_count)
{
// PG count changed. Need to bring all PGs down.
if (!await this.stop_all_pgs(pool_id))
{
this.schedule_recheck();
return;
}
PGUtil.scale_pg_count(prev_pgs, this.state.pg.history[pool_id]||{}, pg_history, pool_cfg.pg_count);
}
optimize_result = await LPOptimizer.optimize_change({
prev_pgs,
osd_tree: tree_cfg.osd_tree,
pg_size: pool_cfg.pg_size,
pg_minsize: pool_cfg.pg_minsize,
max_combinations: pool_cfg.max_osd_combinations,
});
}
else
{
optimize_result = await LPOptimizer.optimize_initial({
osd_tree: tree_cfg.osd_tree,
pg_count: pool_cfg.pg_count,
pg_size: pool_cfg.pg_size,
pg_minsize: pool_cfg.pg_minsize,
max_combinations: pool_cfg.max_osd_combinations,
});
}
if (old_pg_count != optimize_result.int_pgs.length)
{
console.log(
`PG count for pool ${pool_id} (${pool_cfg.name || 'unnamed'}) `+
`changed from: ${old_pg_count} to ${optimize_result.int_pgs.length}`
);
}
LPOptimizer.print_change_stats(optimize_result);
this.save_new_pgs_txn(etcd_request, pool_id, prev_pgs, optimize_result.int_pgs, pg_history);
} }
else this.state.config.pgs.hash = tree_hash;
{ etcd_request.compare.push(
optimize_result = await LPOptimizer.optimize_initial({ { key: b64(this.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
osd_tree: tree_cfg.osd_tree, { key: b64(this.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
pg_size: 3, );
pg_count: tree_cfg.pg_count, etcd_request.success.push(
max_combinations: tree_cfg.max_osd_combinations, { requestPut: { key: b64(this.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(this.state.config.pgs)) } },
}); );
} const res = await this.etcd_call('/txn', etcd_request, this.config.etcd_mon_timeout, 0);
if (!await this.save_new_pgs(prev_pgs, optimize_result.int_pgs, pg_history, tree_hash)) if (!res.succeeded)
{ {
console.log('Someone changed PG configuration while we also tried to change it. Retrying in '+this.config.mon_change_timeout+' ms'); console.log('Someone changed PG configuration while we also tried to change it. Retrying in '+this.config.mon_change_timeout+' ms');
this.schedule_recheck(); this.schedule_recheck();
return; return;
} }
console.log('PG configuration successfully changed'); console.log('PG configuration successfully changed');
if (old_pg_count != optimize_result.int_pgs.length)
{
console.log(`PG count changed from: ${old_pg_count} to ${optimize_result.int_pgs.length}`);
}
LPOptimizer.print_change_stats(optimize_result);
} }
} }
@ -688,14 +762,17 @@ class Mon
} }
} }
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n }; const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
for (const pg_num in this.state.pg.stats) for (const pool_id in this.state.pg.stats)
{ {
const st = this.state.pg.stats[pg_num]; for (const pg_num in this.state.pg.stats[pool_id])
for (const k in object_counts)
{ {
if (st[k+'_count']) const st = this.state.pg.stats[pool_id][pg_num];
for (const k in object_counts)
{ {
object_counts[k] += BigInt(st[k+'_count']); if (st[k+'_count'])
{
object_counts[k] += BigInt(st[k+'_count']);
}
} }
} }
} }
@ -761,7 +838,7 @@ class Mon
return; return;
} }
key = key.split('/'); key = key.split('/');
const cur = this.state; let cur = this.state;
for (let i = 0; i < key.length-1; i++) for (let i = 0; i < key.length-1; i++)
{ {
cur = (cur[key[i]] = cur[key[i]] || {}); cur = (cur[key[i]] = cur[key[i]] || {});
@ -854,7 +931,7 @@ function POST(url, body, timeout)
clearTimeout(timer_id); clearTimeout(timer_id);
let res_body = ''; let res_body = '';
res.setEncoding('utf8'); res.setEncoding('utf8');
res.on('data', chunk => { res_body += chunk }); res.on('data', chunk => { res_body += chunk; });
res.on('end', () => res.on('end', () =>
{ {
if (res.statusCode != 200) if (res.statusCode != 200)