Measure & report recovery op count and bandwidth
parent
47b6f64106
commit
5084ff7c6c
15
osd.cpp
15
osd.cpp
|
@ -546,6 +546,21 @@ void osd_t::print_stats()
|
||||||
subop_stat_sum[1][i] = subop_stat_sum[0][i];
|
subop_stat_sum[1][i] = subop_stat_sum[0][i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
{
|
||||||
|
if (recovery_stat_count[0][i] != recovery_stat_count[1][i])
|
||||||
|
{
|
||||||
|
uint64_t bw = (recovery_stat_bytes[0][i] - recovery_stat_bytes[1][i]) / print_stats_interval;
|
||||||
|
printf(
|
||||||
|
"%s recovery: %.1f op/s, B/W: %.2f %s\n", recovery_stat_names[i],
|
||||||
|
(recovery_stat_count[0][i] - recovery_stat_count[1][i]) * 1.0 / print_stats_interval,
|
||||||
|
(bw > 1024*1024*1024 ? bw/1024.0/1024/1024 : (bw > 1024*1024 ? bw/1024.0/1024 : bw/1024.0)),
|
||||||
|
(bw > 1024*1024*1024 ? "GB/s" : (bw > 1024*1024 ? "MB/s" : "KB/s"))
|
||||||
|
);
|
||||||
|
recovery_stat_count[1][i] = recovery_stat_count[0][i];
|
||||||
|
recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i];
|
||||||
|
}
|
||||||
|
}
|
||||||
if (incomplete_objects > 0)
|
if (incomplete_objects > 0)
|
||||||
{
|
{
|
||||||
printf("%lu object(s) incomplete\n", incomplete_objects);
|
printf("%lu object(s) incomplete\n", incomplete_objects);
|
||||||
|
|
3
osd.h
3
osd.h
|
@ -288,6 +288,9 @@ class osd_t
|
||||||
uint64_t op_stat_bytes[2][OSD_OP_MAX+1] = { 0 };
|
uint64_t op_stat_bytes[2][OSD_OP_MAX+1] = { 0 };
|
||||||
uint64_t subop_stat_sum[2][OSD_OP_MAX+1] = { 0 };
|
uint64_t subop_stat_sum[2][OSD_OP_MAX+1] = { 0 };
|
||||||
uint64_t subop_stat_count[2][OSD_OP_MAX+1] = { 0 };
|
uint64_t subop_stat_count[2][OSD_OP_MAX+1] = { 0 };
|
||||||
|
const char* recovery_stat_names[2] = { "degraded", "misplaced" };
|
||||||
|
uint64_t recovery_stat_count[2][2] = { 0 };
|
||||||
|
uint64_t recovery_stat_bytes[2][2] = { 0 };
|
||||||
|
|
||||||
// cluster connection
|
// cluster connection
|
||||||
void http_request(const std::string & host, const std::string & request,
|
void http_request(const std::string & host, const std::string & request,
|
||||||
|
|
|
@ -145,21 +145,22 @@ json11::Json osd_t::get_statistics()
|
||||||
json11::Json::object st;
|
json11::Json::object st;
|
||||||
timespec ts;
|
timespec ts;
|
||||||
clock_gettime(CLOCK_REALTIME, &ts);
|
clock_gettime(CLOCK_REALTIME, &ts);
|
||||||
st["time"] = std::to_string(ts.tv_sec)+"."+std::to_string(ts.tv_nsec/1000000);
|
char time_str[50] = { 0 };
|
||||||
|
sprintf(time_str, "%ld.%03ld", ts.tv_sec, ts.tv_nsec/1000000);
|
||||||
|
st["time"] = time_str;
|
||||||
st["blockstore_ready"] = bs->is_started();
|
st["blockstore_ready"] = bs->is_started();
|
||||||
if (bs)
|
if (bs)
|
||||||
{
|
{
|
||||||
st["size"] = bs->get_block_count() * bs->get_block_size();
|
st["size"] = bs->get_block_count() * bs->get_block_size();
|
||||||
st["free"] = bs->get_free_block_count() * bs->get_block_size();
|
st["free"] = bs->get_free_block_count() * bs->get_block_size();
|
||||||
}
|
}
|
||||||
// FIXME: report recovery ops and bandwidth
|
|
||||||
// FIXME: handle integer overflow
|
// FIXME: handle integer overflow
|
||||||
json11::Json::object op_stats, subop_stats;
|
json11::Json::object op_stats, subop_stats;
|
||||||
for (int i = 0; i <= OSD_OP_MAX; i++)
|
for (int i = 0; i <= OSD_OP_MAX; i++)
|
||||||
{
|
{
|
||||||
op_stats[osd_op_names[i]] = json11::Json::object {
|
op_stats[osd_op_names[i]] = json11::Json::object {
|
||||||
{ "count", op_stat_count[0][i] },
|
{ "count", op_stat_count[0][i] },
|
||||||
{ "sum", op_stat_sum[0][i] },
|
{ "usec", op_stat_sum[0][i] },
|
||||||
{ "bytes", op_stat_bytes[0][i] },
|
{ "bytes", op_stat_bytes[0][i] },
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -167,11 +168,21 @@ json11::Json osd_t::get_statistics()
|
||||||
{
|
{
|
||||||
subop_stats[osd_op_names[i]] = json11::Json::object {
|
subop_stats[osd_op_names[i]] = json11::Json::object {
|
||||||
{ "count", subop_stat_count[0][i] },
|
{ "count", subop_stat_count[0][i] },
|
||||||
{ "sum", subop_stat_sum[0][i] },
|
{ "usec", subop_stat_sum[0][i] },
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
st["op_latency"] = op_stats;
|
st["op_stats"] = op_stats;
|
||||||
st["subop_latency"] = subop_stats;
|
st["subop_stats"] = subop_stats;
|
||||||
|
st["recovery_stats"] = json11::Json::object {
|
||||||
|
{ recovery_stat_names[0], json11::Json::object {
|
||||||
|
{ "count", recovery_stat_count[0][0] },
|
||||||
|
{ "bytes", recovery_stat_bytes[0][0] },
|
||||||
|
} },
|
||||||
|
{ recovery_stat_names[1], json11::Json::object {
|
||||||
|
{ "count", recovery_stat_count[0][1] },
|
||||||
|
{ "bytes", recovery_stat_bytes[0][1] },
|
||||||
|
} },
|
||||||
|
};
|
||||||
return st;
|
return st;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -244,6 +244,14 @@ resume_5:
|
||||||
}
|
}
|
||||||
if (op_data->object_state)
|
if (op_data->object_state)
|
||||||
{
|
{
|
||||||
|
{
|
||||||
|
int recovery_type = op_data->object_state->state & (OBJ_DEGRADED|OBJ_INCOMPLETE) ? 0 : 1;
|
||||||
|
recovery_stat_count[0][recovery_type]++;
|
||||||
|
for (int role = 0; role < pg.pg_size; role++)
|
||||||
|
{
|
||||||
|
recovery_stat_bytes[0][recovery_type] += op_data->stripes[role].write_end - op_data->stripes[role].write_start;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (op_data->object_state->state & OBJ_MISPLACED)
|
if (op_data->object_state->state & OBJ_MISPLACED)
|
||||||
{
|
{
|
||||||
// Remove extra chunks
|
// Remove extra chunks
|
||||||
|
|
Loading…
Reference in New Issue