Report inode I/O statistics, aggregate it in the monitor

rdma-zerocopy
Vitaliy Filippov 2021-01-21 00:30:18 +03:00
parent 4ae1b84c67
commit ffe1cd4c79
7 changed files with 183 additions and 95 deletions

View File

@ -34,12 +34,12 @@ breaking changes in the future. However, the following is implemented:
- NBD proxy for kernel mounts - NBD proxy for kernel mounts
- Inode removal tool (vitastor-rm) - Inode removal tool (vitastor-rm)
- Packaging for Debian and CentOS - Packaging for Debian and CentOS
- Per-inode I/O and space usage statistics
## Roadmap ## Roadmap
- OSD creation tool (OSDs currently have to be created by hand) - OSD creation tool (OSDs currently have to be created by hand)
- Other administrative tools - Other administrative tools
- Per-inode I/O and space usage statistics
- Proxmox and OpenNebula plugins - Proxmox and OpenNebula plugins
- iSCSI proxy - iSCSI proxy
- Inode metadata storage in etcd - Inode metadata storage in etcd

View File

@ -26,13 +26,14 @@ const etcd_allow = new RegExp('^'+[
'config/pgs', 'config/pgs',
'osd/state/[1-9]\\d*', 'osd/state/[1-9]\\d*',
'osd/stats/[1-9]\\d*', 'osd/stats/[1-9]\\d*',
'osd/inodestats/[1-9]\\d*',
'osd/space/[1-9]\\d*', 'osd/space/[1-9]\\d*',
'mon/master', 'mon/master',
'pg/state/[1-9]\\d*/[1-9]\\d*', 'pg/state/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*', 'pg/stats/[1-9]\\d*/[1-9]\\d*',
'pg/history/[1-9]\\d*/[1-9]\\d*', 'pg/history/[1-9]\\d*/[1-9]\\d*',
'history/last_clean_pgs', 'history/last_clean_pgs',
'inode/space/[1-9]\\d*', 'inode/stats/[1-9]\\d*',
'stats', 'stats',
].join('$|^')+'$'); ].join('$|^')+'$');
@ -174,6 +175,13 @@ const etcd_tree = {
}, },
}, */ }, */
}, },
inodestats: {
/* <inode_t>: {
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
}, */
},
space: { space: {
/* <osd_num_t>: { /* <osd_num_t>: {
<inode_t>: uint64_t, // bytes <inode_t>: uint64_t, // bytes
@ -219,9 +227,12 @@ const etcd_tree = {
}, },
}, },
inode: { inode: {
space: { stats: {
/* <inode_t>: { /* <inode_t>: {
raw: uint64_t, // raw bytes on OSDs raw_used: uint64_t, // raw used bytes on OSDs
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
}, */ }, */
}, },
}, },
@ -409,7 +420,7 @@ class Mon
{ {
this.parse_kv(e.kv); this.parse_kv(e.kv);
const key = e.kv.key.substr(this.etcd_prefix.length); const key = e.kv.key.substr(this.etcd_prefix.length);
if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/') if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/' || key.substr(0, 16) == '/osd/inodestats/')
{ {
stats_changed = true; stats_changed = true;
} }
@ -417,7 +428,7 @@ class Mon
{ {
pg_states_changed = true; pg_states_changed = true;
} }
else if (key != '/stats' && key.substr(0, 13) != '/inode/space/') else if (key != '/stats' && key.substr(0, 13) != '/inode/stats/')
{ {
changed = true; changed = true;
} }
@ -1093,8 +1104,6 @@ class Mon
sum_stats() sum_stats()
{ {
let overflow = false;
this.prev_stats = this.prev_stats || { op_stats: {}, subop_stats: {}, recovery_stats: {} };
const op_stats = {}, subop_stats = {}, recovery_stats = {}; const op_stats = {}, subop_stats = {}, recovery_stats = {};
for (const osd in this.state.osd.stats) for (const osd in this.state.osd.stats)
{ {
@ -1119,52 +1128,11 @@ class Mon
recovery_stats[op].bytes += BigInt(st.recovery_stats[op].bytes||0); recovery_stats[op].bytes += BigInt(st.recovery_stats[op].bytes||0);
} }
} }
for (const op in op_stats) return { op_stats, subop_stats, recovery_stats };
{ }
if (op_stats[op].count >= 0x10000000000000000n)
{ sum_object_counts()
if (!this.prev_stats.op_stats[op]) {
{
overflow = true;
}
else
{
op_stats[op].count -= this.prev_stats.op_stats[op].count;
op_stats[op].usec -= this.prev_stats.op_stats[op].usec;
op_stats[op].bytes -= this.prev_stats.op_stats[op].bytes;
}
}
}
for (const op in subop_stats)
{
if (subop_stats[op].count >= 0x10000000000000000n)
{
if (!this.prev_stats.subop_stats[op])
{
overflow = true;
}
else
{
subop_stats[op].count -= this.prev_stats.subop_stats[op].count;
subop_stats[op].usec -= this.prev_stats.subop_stats[op].usec;
}
}
}
for (const op in recovery_stats)
{
if (recovery_stats[op].count >= 0x10000000000000000n)
{
if (!this.prev_stats.recovery_stats[op])
{
overflow = true;
}
else
{
recovery_stats[op].count -= this.prev_stats.recovery_stats[op].count;
recovery_stats[op].bytes -= this.prev_stats.recovery_stats[op].bytes;
}
}
}
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n }; const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
for (const pool_id in this.state.pg.stats) for (const pool_id in this.state.pg.stats)
{ {
@ -1183,49 +1151,107 @@ class Mon
} }
} }
} }
return (this.prev_stats = { overflow, op_stats, subop_stats, recovery_stats, object_counts }); return object_counts;
}
sum_inode_stats()
{
const inode_stats = {};
const inode_stub = () => ({
raw_used: 0n,
read: { count: 0n, usec: 0n, bytes: 0n },
write: { count: 0n, usec: 0n, bytes: 0n },
delete: { count: 0n, usec: 0n, bytes: 0n },
});
for (const osd_num in this.state.osd.space)
{
for (const inode_num in this.state.osd.space[osd_num])
{
inode_stats[inode_num] = inode_stats[inode_num] || inode_stub();
inode_stats[inode_num].raw_used += BigInt(this.state.osd.space[osd_num][inode_num]||0);
}
}
for (const osd_num in this.state.osd.inodestats)
{
const ist = this.state.osd.inodestats[osd_num];
for (const inode_num in ist)
{
inode_stats[inode_num] = inode_stats[inode_num] || inode_stub();
for (const op of [ 'read', 'write', 'delete' ])
{
inode_stats[inode_num][op].count += BigInt(ist[inode_num][op].count||0);
inode_stats[inode_num][op].usec += BigInt(ist[inode_num][op].usec||0);
inode_stats[inode_num][op].bytes += BigInt(ist[inode_num][op].bytes||0);
}
}
}
return inode_stats;
}
fix_stat_overflows(obj, scratch)
{
for (const k in obj)
{
if (typeof obj[k] == 'bigint')
{
if (obj[k] >= 0x10000000000000000n)
{
if (scratch[k])
{
for (const k2 in scratch)
{
obj[k2] -= scratch[k2];
scratch[k2] = 0n;
}
}
else
{
for (const k2 in obj)
{
scratch[k2] = obj[k2];
}
}
}
}
else if (typeof obj[k] == 'object')
{
this.fix_stat_overflows(obj[k], scratch[k] = (scratch[k] || {}));
}
}
}
serialize_bigints(obj)
{
for (const k in obj)
{
if (typeof obj[k] == 'bigint')
{
obj[k] = ''+obj[k];
}
else if (typeof obj[k] == 'object')
{
this.serialize_bigints(obj[k]);
}
}
} }
async update_total_stats() async update_total_stats()
{ {
const txn = []; const txn = [];
const stats = this.sum_stats(); const stats = this.sum_stats();
if (!stats.overflow) const object_counts = this.sum_object_counts();
{ const inode_stats = this.sum_inode_stats();
// Convert to strings, serialize and save this.fix_stat_overflows(stats, (this.prev_stats = this.prev_stats || {}));
const ser = {}; this.fix_stat_overflows(inode_stats, (this.prev_inode_stats = this.prev_inode_stats || {}));
for (const st of [ 'op_stats', 'subop_stats', 'recovery_stats' ]) stats.object_counts = object_counts;
{ this.serialize_bigints(stats);
ser[st] = {}; this.serialize_bigints(inode_stats);
for (const op in stats[st]) txn.push({ requestPut: { key: b64(this.etcd_prefix+'/stats'), value: b64(JSON.stringify(stats)) } });
{ for (const inode_num in inode_stats)
ser[st][op] = {};
for (const k in stats[st][op])
{
ser[st][op][k] = ''+stats[st][op][k];
}
}
}
ser.object_counts = {};
for (const k in stats.object_counts)
{
ser.object_counts[k] = ''+stats.object_counts[k];
}
txn.push({ requestPut: { key: b64(this.etcd_prefix+'/stats'), value: b64(JSON.stringify(ser)) } });
}
const space_stats = {};
for (const osd_num in this.state.osd.space)
{
for (const inode_num in this.state.osd.space[osd_num])
{
space_stats[inode_num] = (space_stats[inode_num] || BigInt(0)) + BigInt(this.state.osd.space[osd_num][inode_num]||0);
}
}
for (const inode_num in space_stats)
{ {
txn.push({ requestPut: { txn.push({ requestPut: {
key: b64(this.etcd_prefix+'/inode/space/'+inode_num), key: b64(this.etcd_prefix+'/inode/stats/'+inode_num),
value: b64(JSON.stringify({ raw: ''+space_stats[inode_num] })), value: b64(JSON.stringify(inode_stats[inode_num])),
} }); } });
} }
if (txn.length) if (txn.length)

View File

@ -154,7 +154,7 @@ struct osd_primary_op_data_t;
struct osd_op_t struct osd_op_t
{ {
timespec tv_begin; timespec tv_begin = { 0 }, tv_end = { 0 };
uint64_t op_type = OSD_OP_IN; uint64_t op_type = OSD_OP_IN;
int peer_fd; int peer_fd;
osd_any_op_t req; osd_any_op_t req;

View File

@ -109,8 +109,10 @@ void osd_messenger_t::measure_exec(osd_op_t *cur_op)
{ {
return; return;
} }
timespec tv_end; if (!cur_op->tv_end.tv_sec)
clock_gettime(CLOCK_REALTIME, &tv_end); {
clock_gettime(CLOCK_REALTIME, &cur_op->tv_end);
}
stats.op_stat_count[cur_op->req.hdr.opcode]++; stats.op_stat_count[cur_op->req.hdr.opcode]++;
if (!stats.op_stat_count[cur_op->req.hdr.opcode]) if (!stats.op_stat_count[cur_op->req.hdr.opcode])
{ {
@ -119,8 +121,8 @@ void osd_messenger_t::measure_exec(osd_op_t *cur_op)
stats.op_stat_bytes[cur_op->req.hdr.opcode] = 0; stats.op_stat_bytes[cur_op->req.hdr.opcode] = 0;
} }
stats.op_stat_sum[cur_op->req.hdr.opcode] += ( stats.op_stat_sum[cur_op->req.hdr.opcode] += (
(tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 + (cur_op->tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 +
(tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000 (cur_op->tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000
); );
if (cur_op->req.hdr.opcode == OSD_OP_READ || if (cur_op->req.hdr.opcode == OSD_OP_READ ||
cur_op->req.hdr.opcode == OSD_OP_WRITE) cur_op->req.hdr.opcode == OSD_OP_WRITE)

View File

@ -55,6 +55,17 @@ struct osd_recovery_op_t
osd_op_t *osd_op = NULL; osd_op_t *osd_op = NULL;
}; };
// Posted as /osd/inodestats/$osd, then accumulated by the monitor
#define INODE_STATS_READ 0
#define INODE_STATS_WRITE 1
#define INODE_STATS_DELETE 2
struct inode_stats_t
{
uint64_t op_sum[3] = { 0 };
uint64_t op_count[3] = { 0 };
uint64_t op_bytes[3] = { 0 };
};
class osd_t class osd_t
{ {
// config // config
@ -126,6 +137,7 @@ class osd_t
// op statistics // op statistics
osd_op_stats_t prev_stats; osd_op_stats_t prev_stats;
std::map<uint64_t, inode_stats_t> inode_stats;
const char* recovery_stat_names[2] = { "degraded", "misplaced" }; const char* recovery_stat_names[2] = { "degraded", "misplaced" };
uint64_t recovery_stat_count[2][2] = { 0 }; uint64_t recovery_stat_count[2][2] = { 0 };
uint64_t recovery_stat_bytes[2][2] = { 0 }; uint64_t recovery_stat_bytes[2][2] = { 0 };

View File

@ -187,6 +187,27 @@ void osd_t::report_statistics()
{ {
inode_space[std::to_string(kv.first)] = kv.second; inode_space[std::to_string(kv.first)] = kv.second;
} }
json11::Json::object inode_ops;
for (auto kv: inode_stats)
{
inode_ops[std::to_string(kv.first)] = json11::Json::object {
{ "read", json11::Json::object {
{ "count", kv.second.op_count[INODE_STATS_READ] },
{ "usec", kv.second.op_sum[INODE_STATS_READ] },
{ "bytes", kv.second.op_bytes[INODE_STATS_READ] },
} },
{ "write", json11::Json::object {
{ "count", kv.second.op_count[INODE_STATS_WRITE] },
{ "usec", kv.second.op_sum[INODE_STATS_WRITE] },
{ "bytes", kv.second.op_bytes[INODE_STATS_WRITE] },
} },
{ "delete", json11::Json::object {
{ "count", kv.second.op_count[INODE_STATS_DELETE] },
{ "usec", kv.second.op_sum[INODE_STATS_DELETE] },
{ "bytes", kv.second.op_bytes[INODE_STATS_DELETE] },
} },
};
}
json11::Json::array txn = { json11::Json::object { json11::Json::array txn = { json11::Json::object {
{ "request_put", json11::Json::object { { "request_put", json11::Json::object {
{ "key", base64_encode(st_cli.etcd_prefix+"/osd/stats/"+std::to_string(osd_num)) }, { "key", base64_encode(st_cli.etcd_prefix+"/osd/stats/"+std::to_string(osd_num)) },
@ -196,6 +217,10 @@ void osd_t::report_statistics()
{ "key", base64_encode(st_cli.etcd_prefix+"/osd/space/"+std::to_string(osd_num)) }, { "key", base64_encode(st_cli.etcd_prefix+"/osd/space/"+std::to_string(osd_num)) },
{ "value", base64_encode(json11::Json(inode_space).dump()) }, { "value", base64_encode(json11::Json(inode_space).dump()) },
} }, } },
{ "request_put", json11::Json::object {
{ "key", base64_encode(st_cli.etcd_prefix+"/osd/inodestats/"+std::to_string(osd_num)) },
{ "value", base64_encode(json11::Json(inode_ops).dump()) },
} },
} }; } };
for (auto & p: pgs) for (auto & p: pgs)
{ {

View File

@ -36,6 +36,29 @@ void osd_t::autosync()
void osd_t::finish_op(osd_op_t *cur_op, int retval) void osd_t::finish_op(osd_op_t *cur_op, int retval)
{ {
inflight_ops--; inflight_ops--;
if (cur_op->req.hdr.opcode == OSD_OP_READ ||
cur_op->req.hdr.opcode == OSD_OP_WRITE ||
cur_op->req.hdr.opcode == OSD_OP_DELETE)
{
// Track inode statistics
if (!cur_op->tv_end.tv_sec)
{
clock_gettime(CLOCK_REALTIME, &cur_op->tv_end);
}
uint64_t usec = (
(cur_op->tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 +
(cur_op->tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000
);
int inode_st_op = cur_op->req.hdr.opcode == OSD_OP_DELETE
? INODE_STATS_DELETE
: (cur_op->req.hdr.opcode == OSD_OP_READ ? INODE_STATS_READ : INODE_STATS_WRITE);
inode_stats[cur_op->req.rw.inode].op_count[inode_st_op]++;
inode_stats[cur_op->req.rw.inode].op_sum[inode_st_op] += usec;
if (cur_op->req.hdr.opcode == OSD_OP_DELETE)
inode_stats[cur_op->req.rw.inode].op_bytes[inode_st_op] += cur_op->op_data->pg_data_size * bs_block_size;
else
inode_stats[cur_op->req.rw.inode].op_bytes[inode_st_op] += cur_op->req.rw.len;
}
if (cur_op->op_data) if (cur_op->op_data)
{ {
if (cur_op->op_data->pg_num > 0) if (cur_op->op_data->pg_num > 0)
@ -66,7 +89,7 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
} }
else else
{ {
// FIXME add separate magic number // FIXME add separate magic number for primary ops
auto cl_it = c_cli.clients.find(cur_op->peer_fd); auto cl_it = c_cli.clients.find(cur_op->peer_fd);
if (cl_it != c_cli.clients.end()) if (cl_it != c_cli.clients.end())
{ {