forked from vitalif/vitastor
Report inode I/O statistics, aggregate it in the monitor
parent
4ae1b84c67
commit
ffe1cd4c79
|
@ -34,12 +34,12 @@ breaking changes in the future. However, the following is implemented:
|
|||
- NBD proxy for kernel mounts
|
||||
- Inode removal tool (vitastor-rm)
|
||||
- Packaging for Debian and CentOS
|
||||
- Per-inode I/O and space usage statistics
|
||||
|
||||
## Roadmap
|
||||
|
||||
- OSD creation tool (OSDs currently have to be created by hand)
|
||||
- Other administrative tools
|
||||
- Per-inode I/O and space usage statistics
|
||||
- Proxmox and OpenNebula plugins
|
||||
- iSCSI proxy
|
||||
- Inode metadata storage in etcd
|
||||
|
|
202
mon/mon.js
202
mon/mon.js
|
@ -26,13 +26,14 @@ const etcd_allow = new RegExp('^'+[
|
|||
'config/pgs',
|
||||
'osd/state/[1-9]\\d*',
|
||||
'osd/stats/[1-9]\\d*',
|
||||
'osd/inodestats/[1-9]\\d*',
|
||||
'osd/space/[1-9]\\d*',
|
||||
'mon/master',
|
||||
'pg/state/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/stats/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/history/[1-9]\\d*/[1-9]\\d*',
|
||||
'history/last_clean_pgs',
|
||||
'inode/space/[1-9]\\d*',
|
||||
'inode/stats/[1-9]\\d*',
|
||||
'stats',
|
||||
].join('$|^')+'$');
|
||||
|
||||
|
@ -174,6 +175,13 @@ const etcd_tree = {
|
|||
},
|
||||
}, */
|
||||
},
|
||||
inodestats: {
|
||||
/* <inode_t>: {
|
||||
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
}, */
|
||||
},
|
||||
space: {
|
||||
/* <osd_num_t>: {
|
||||
<inode_t>: uint64_t, // bytes
|
||||
|
@ -219,9 +227,12 @@ const etcd_tree = {
|
|||
},
|
||||
},
|
||||
inode: {
|
||||
space: {
|
||||
stats: {
|
||||
/* <inode_t>: {
|
||||
raw: uint64_t, // raw bytes on OSDs
|
||||
raw_used: uint64_t, // raw used bytes on OSDs
|
||||
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
|
||||
}, */
|
||||
},
|
||||
},
|
||||
|
@ -409,7 +420,7 @@ class Mon
|
|||
{
|
||||
this.parse_kv(e.kv);
|
||||
const key = e.kv.key.substr(this.etcd_prefix.length);
|
||||
if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/')
|
||||
if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/' || key.substr(0, 16) == '/osd/inodestats/')
|
||||
{
|
||||
stats_changed = true;
|
||||
}
|
||||
|
@ -417,7 +428,7 @@ class Mon
|
|||
{
|
||||
pg_states_changed = true;
|
||||
}
|
||||
else if (key != '/stats' && key.substr(0, 13) != '/inode/space/')
|
||||
else if (key != '/stats' && key.substr(0, 13) != '/inode/stats/')
|
||||
{
|
||||
changed = true;
|
||||
}
|
||||
|
@ -1093,8 +1104,6 @@ class Mon
|
|||
|
||||
sum_stats()
|
||||
{
|
||||
let overflow = false;
|
||||
this.prev_stats = this.prev_stats || { op_stats: {}, subop_stats: {}, recovery_stats: {} };
|
||||
const op_stats = {}, subop_stats = {}, recovery_stats = {};
|
||||
for (const osd in this.state.osd.stats)
|
||||
{
|
||||
|
@ -1119,52 +1128,11 @@ class Mon
|
|||
recovery_stats[op].bytes += BigInt(st.recovery_stats[op].bytes||0);
|
||||
}
|
||||
}
|
||||
for (const op in op_stats)
|
||||
{
|
||||
if (op_stats[op].count >= 0x10000000000000000n)
|
||||
{
|
||||
if (!this.prev_stats.op_stats[op])
|
||||
{
|
||||
overflow = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
op_stats[op].count -= this.prev_stats.op_stats[op].count;
|
||||
op_stats[op].usec -= this.prev_stats.op_stats[op].usec;
|
||||
op_stats[op].bytes -= this.prev_stats.op_stats[op].bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const op in subop_stats)
|
||||
{
|
||||
if (subop_stats[op].count >= 0x10000000000000000n)
|
||||
{
|
||||
if (!this.prev_stats.subop_stats[op])
|
||||
{
|
||||
overflow = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
subop_stats[op].count -= this.prev_stats.subop_stats[op].count;
|
||||
subop_stats[op].usec -= this.prev_stats.subop_stats[op].usec;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const op in recovery_stats)
|
||||
{
|
||||
if (recovery_stats[op].count >= 0x10000000000000000n)
|
||||
{
|
||||
if (!this.prev_stats.recovery_stats[op])
|
||||
{
|
||||
overflow = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
recovery_stats[op].count -= this.prev_stats.recovery_stats[op].count;
|
||||
recovery_stats[op].bytes -= this.prev_stats.recovery_stats[op].bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
return { op_stats, subop_stats, recovery_stats };
|
||||
}
|
||||
|
||||
sum_object_counts()
|
||||
{
|
||||
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||
for (const pool_id in this.state.pg.stats)
|
||||
{
|
||||
|
@ -1183,49 +1151,107 @@ class Mon
|
|||
}
|
||||
}
|
||||
}
|
||||
return (this.prev_stats = { overflow, op_stats, subop_stats, recovery_stats, object_counts });
|
||||
return object_counts;
|
||||
}
|
||||
|
||||
sum_inode_stats()
|
||||
{
|
||||
const inode_stats = {};
|
||||
const inode_stub = () => ({
|
||||
raw_used: 0n,
|
||||
read: { count: 0n, usec: 0n, bytes: 0n },
|
||||
write: { count: 0n, usec: 0n, bytes: 0n },
|
||||
delete: { count: 0n, usec: 0n, bytes: 0n },
|
||||
});
|
||||
for (const osd_num in this.state.osd.space)
|
||||
{
|
||||
for (const inode_num in this.state.osd.space[osd_num])
|
||||
{
|
||||
inode_stats[inode_num] = inode_stats[inode_num] || inode_stub();
|
||||
inode_stats[inode_num].raw_used += BigInt(this.state.osd.space[osd_num][inode_num]||0);
|
||||
}
|
||||
}
|
||||
for (const osd_num in this.state.osd.inodestats)
|
||||
{
|
||||
const ist = this.state.osd.inodestats[osd_num];
|
||||
for (const inode_num in ist)
|
||||
{
|
||||
inode_stats[inode_num] = inode_stats[inode_num] || inode_stub();
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
inode_stats[inode_num][op].count += BigInt(ist[inode_num][op].count||0);
|
||||
inode_stats[inode_num][op].usec += BigInt(ist[inode_num][op].usec||0);
|
||||
inode_stats[inode_num][op].bytes += BigInt(ist[inode_num][op].bytes||0);
|
||||
}
|
||||
}
|
||||
}
|
||||
return inode_stats;
|
||||
}
|
||||
|
||||
fix_stat_overflows(obj, scratch)
|
||||
{
|
||||
for (const k in obj)
|
||||
{
|
||||
if (typeof obj[k] == 'bigint')
|
||||
{
|
||||
if (obj[k] >= 0x10000000000000000n)
|
||||
{
|
||||
if (scratch[k])
|
||||
{
|
||||
for (const k2 in scratch)
|
||||
{
|
||||
obj[k2] -= scratch[k2];
|
||||
scratch[k2] = 0n;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const k2 in obj)
|
||||
{
|
||||
scratch[k2] = obj[k2];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (typeof obj[k] == 'object')
|
||||
{
|
||||
this.fix_stat_overflows(obj[k], scratch[k] = (scratch[k] || {}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
serialize_bigints(obj)
|
||||
{
|
||||
for (const k in obj)
|
||||
{
|
||||
if (typeof obj[k] == 'bigint')
|
||||
{
|
||||
obj[k] = ''+obj[k];
|
||||
}
|
||||
else if (typeof obj[k] == 'object')
|
||||
{
|
||||
this.serialize_bigints(obj[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async update_total_stats()
|
||||
{
|
||||
const txn = [];
|
||||
const stats = this.sum_stats();
|
||||
if (!stats.overflow)
|
||||
{
|
||||
// Convert to strings, serialize and save
|
||||
const ser = {};
|
||||
for (const st of [ 'op_stats', 'subop_stats', 'recovery_stats' ])
|
||||
{
|
||||
ser[st] = {};
|
||||
for (const op in stats[st])
|
||||
{
|
||||
ser[st][op] = {};
|
||||
for (const k in stats[st][op])
|
||||
{
|
||||
ser[st][op][k] = ''+stats[st][op][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
ser.object_counts = {};
|
||||
for (const k in stats.object_counts)
|
||||
{
|
||||
ser.object_counts[k] = ''+stats.object_counts[k];
|
||||
}
|
||||
txn.push({ requestPut: { key: b64(this.etcd_prefix+'/stats'), value: b64(JSON.stringify(ser)) } });
|
||||
}
|
||||
const space_stats = {};
|
||||
for (const osd_num in this.state.osd.space)
|
||||
{
|
||||
for (const inode_num in this.state.osd.space[osd_num])
|
||||
{
|
||||
space_stats[inode_num] = (space_stats[inode_num] || BigInt(0)) + BigInt(this.state.osd.space[osd_num][inode_num]||0);
|
||||
}
|
||||
}
|
||||
for (const inode_num in space_stats)
|
||||
const object_counts = this.sum_object_counts();
|
||||
const inode_stats = this.sum_inode_stats();
|
||||
this.fix_stat_overflows(stats, (this.prev_stats = this.prev_stats || {}));
|
||||
this.fix_stat_overflows(inode_stats, (this.prev_inode_stats = this.prev_inode_stats || {}));
|
||||
stats.object_counts = object_counts;
|
||||
this.serialize_bigints(stats);
|
||||
this.serialize_bigints(inode_stats);
|
||||
txn.push({ requestPut: { key: b64(this.etcd_prefix+'/stats'), value: b64(JSON.stringify(stats)) } });
|
||||
for (const inode_num in inode_stats)
|
||||
{
|
||||
txn.push({ requestPut: {
|
||||
key: b64(this.etcd_prefix+'/inode/space/'+inode_num),
|
||||
value: b64(JSON.stringify({ raw: ''+space_stats[inode_num] })),
|
||||
key: b64(this.etcd_prefix+'/inode/stats/'+inode_num),
|
||||
value: b64(JSON.stringify(inode_stats[inode_num])),
|
||||
} });
|
||||
}
|
||||
if (txn.length)
|
||||
|
|
|
@ -154,7 +154,7 @@ struct osd_primary_op_data_t;
|
|||
|
||||
struct osd_op_t
|
||||
{
|
||||
timespec tv_begin;
|
||||
timespec tv_begin = { 0 }, tv_end = { 0 };
|
||||
uint64_t op_type = OSD_OP_IN;
|
||||
int peer_fd;
|
||||
osd_any_op_t req;
|
||||
|
|
|
@ -109,8 +109,10 @@ void osd_messenger_t::measure_exec(osd_op_t *cur_op)
|
|||
{
|
||||
return;
|
||||
}
|
||||
timespec tv_end;
|
||||
clock_gettime(CLOCK_REALTIME, &tv_end);
|
||||
if (!cur_op->tv_end.tv_sec)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, &cur_op->tv_end);
|
||||
}
|
||||
stats.op_stat_count[cur_op->req.hdr.opcode]++;
|
||||
if (!stats.op_stat_count[cur_op->req.hdr.opcode])
|
||||
{
|
||||
|
@ -119,8 +121,8 @@ void osd_messenger_t::measure_exec(osd_op_t *cur_op)
|
|||
stats.op_stat_bytes[cur_op->req.hdr.opcode] = 0;
|
||||
}
|
||||
stats.op_stat_sum[cur_op->req.hdr.opcode] += (
|
||||
(tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 +
|
||||
(tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000
|
||||
(cur_op->tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 +
|
||||
(cur_op->tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000
|
||||
);
|
||||
if (cur_op->req.hdr.opcode == OSD_OP_READ ||
|
||||
cur_op->req.hdr.opcode == OSD_OP_WRITE)
|
||||
|
|
12
src/osd.h
12
src/osd.h
|
@ -55,6 +55,17 @@ struct osd_recovery_op_t
|
|||
osd_op_t *osd_op = NULL;
|
||||
};
|
||||
|
||||
// Posted as /osd/inodestats/$osd, then accumulated by the monitor
|
||||
#define INODE_STATS_READ 0
|
||||
#define INODE_STATS_WRITE 1
|
||||
#define INODE_STATS_DELETE 2
|
||||
struct inode_stats_t
|
||||
{
|
||||
uint64_t op_sum[3] = { 0 };
|
||||
uint64_t op_count[3] = { 0 };
|
||||
uint64_t op_bytes[3] = { 0 };
|
||||
};
|
||||
|
||||
class osd_t
|
||||
{
|
||||
// config
|
||||
|
@ -126,6 +137,7 @@ class osd_t
|
|||
|
||||
// op statistics
|
||||
osd_op_stats_t prev_stats;
|
||||
std::map<uint64_t, inode_stats_t> inode_stats;
|
||||
const char* recovery_stat_names[2] = { "degraded", "misplaced" };
|
||||
uint64_t recovery_stat_count[2][2] = { 0 };
|
||||
uint64_t recovery_stat_bytes[2][2] = { 0 };
|
||||
|
|
|
@ -187,6 +187,27 @@ void osd_t::report_statistics()
|
|||
{
|
||||
inode_space[std::to_string(kv.first)] = kv.second;
|
||||
}
|
||||
json11::Json::object inode_ops;
|
||||
for (auto kv: inode_stats)
|
||||
{
|
||||
inode_ops[std::to_string(kv.first)] = json11::Json::object {
|
||||
{ "read", json11::Json::object {
|
||||
{ "count", kv.second.op_count[INODE_STATS_READ] },
|
||||
{ "usec", kv.second.op_sum[INODE_STATS_READ] },
|
||||
{ "bytes", kv.second.op_bytes[INODE_STATS_READ] },
|
||||
} },
|
||||
{ "write", json11::Json::object {
|
||||
{ "count", kv.second.op_count[INODE_STATS_WRITE] },
|
||||
{ "usec", kv.second.op_sum[INODE_STATS_WRITE] },
|
||||
{ "bytes", kv.second.op_bytes[INODE_STATS_WRITE] },
|
||||
} },
|
||||
{ "delete", json11::Json::object {
|
||||
{ "count", kv.second.op_count[INODE_STATS_DELETE] },
|
||||
{ "usec", kv.second.op_sum[INODE_STATS_DELETE] },
|
||||
{ "bytes", kv.second.op_bytes[INODE_STATS_DELETE] },
|
||||
} },
|
||||
};
|
||||
}
|
||||
json11::Json::array txn = { json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
{ "key", base64_encode(st_cli.etcd_prefix+"/osd/stats/"+std::to_string(osd_num)) },
|
||||
|
@ -196,6 +217,10 @@ void osd_t::report_statistics()
|
|||
{ "key", base64_encode(st_cli.etcd_prefix+"/osd/space/"+std::to_string(osd_num)) },
|
||||
{ "value", base64_encode(json11::Json(inode_space).dump()) },
|
||||
} },
|
||||
{ "request_put", json11::Json::object {
|
||||
{ "key", base64_encode(st_cli.etcd_prefix+"/osd/inodestats/"+std::to_string(osd_num)) },
|
||||
{ "value", base64_encode(json11::Json(inode_ops).dump()) },
|
||||
} },
|
||||
} };
|
||||
for (auto & p: pgs)
|
||||
{
|
||||
|
|
|
@ -36,6 +36,29 @@ void osd_t::autosync()
|
|||
void osd_t::finish_op(osd_op_t *cur_op, int retval)
|
||||
{
|
||||
inflight_ops--;
|
||||
if (cur_op->req.hdr.opcode == OSD_OP_READ ||
|
||||
cur_op->req.hdr.opcode == OSD_OP_WRITE ||
|
||||
cur_op->req.hdr.opcode == OSD_OP_DELETE)
|
||||
{
|
||||
// Track inode statistics
|
||||
if (!cur_op->tv_end.tv_sec)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, &cur_op->tv_end);
|
||||
}
|
||||
uint64_t usec = (
|
||||
(cur_op->tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 +
|
||||
(cur_op->tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000
|
||||
);
|
||||
int inode_st_op = cur_op->req.hdr.opcode == OSD_OP_DELETE
|
||||
? INODE_STATS_DELETE
|
||||
: (cur_op->req.hdr.opcode == OSD_OP_READ ? INODE_STATS_READ : INODE_STATS_WRITE);
|
||||
inode_stats[cur_op->req.rw.inode].op_count[inode_st_op]++;
|
||||
inode_stats[cur_op->req.rw.inode].op_sum[inode_st_op] += usec;
|
||||
if (cur_op->req.hdr.opcode == OSD_OP_DELETE)
|
||||
inode_stats[cur_op->req.rw.inode].op_bytes[inode_st_op] += cur_op->op_data->pg_data_size * bs_block_size;
|
||||
else
|
||||
inode_stats[cur_op->req.rw.inode].op_bytes[inode_st_op] += cur_op->req.rw.len;
|
||||
}
|
||||
if (cur_op->op_data)
|
||||
{
|
||||
if (cur_op->op_data->pg_num > 0)
|
||||
|
@ -66,7 +89,7 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
|
|||
}
|
||||
else
|
||||
{
|
||||
// FIXME add separate magic number
|
||||
// FIXME add separate magic number for primary ops
|
||||
auto cl_it = c_cli.clients.find(cur_op->peer_fd);
|
||||
if (cl_it != c_cli.clients.end())
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue