From 6852f299ae377a336900e115489949f0ca42655d Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Thu, 24 Sep 2020 23:14:22 +0300 Subject: [PATCH] Add functions to calculate AFR for a cluster --- README.md | 2 +- mon/afr.js | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 mon/afr.js diff --git a/README.md b/README.md index ec109c0e..a91e893b 100644 --- a/README.md +++ b/README.md @@ -200,7 +200,7 @@ Hardware configuration: 4 nodes, each with: - 6x SATA SSD Intel D3-4510 3.84 TB - 2x Xeon Gold 6242 (16 cores @ 2.8 GHz) - 384 GB RAM -- 1x 25 GbE network interface (Mellanox ConnectX-4 LX) +- 1x 25 GbE network interface (Mellanox ConnectX-4 LX), connected to a Juniper QFX5200 switch CPU powersaving was disabled. Both Vitastor and Ceph were configured with 2 OSDs per 1 SSD. diff --git a/mon/afr.js b/mon/afr.js new file mode 100644 index 00000000..bf8551e4 --- /dev/null +++ b/mon/afr.js @@ -0,0 +1,57 @@ +// Functions to calculate Annualized Failure Rate of your cluster +// if you know AFR of your drives, number of drives, expected rebalance time +// and replication factor +// License: VNPL-1.0 (see README.md for details) + +module.exports = { + cluster_afr, + failure_rate, + c_n_k, +}; + +console.log(100*cluster_afr(100, 0.03, 10, 3), '%'); +console.log(100*cluster_afr(1000, 0.03, 1, 3), '%'); +console.log(100*cluster_afr(5, 0.1, 1, 2), '%'); +console.log(100*cluster_afr(14, 0.01, 1, 2), '%'); +console.log(100*cluster_afr(100, 0.03, 1, 2), '%'); + +// Estimate AFR of the cluster (not taking failure domains into account) +// n - number of drives +// afr - annualized failure rate of a single drive +// l - expected rebalance time in days after a single drive failure +// k - replication factor / number of drives that must fail at the same time for the cluster to fail +function cluster_afr(n, afr, l, k) +{ + let p = 0; + for (let i = 0; i < n-(k-1); i++) + { + p += afr * (1-afr)**i * failure_rate(n-i-1, afr*l/365, k-1); + } + return p; +} + +// Probability of at least failures in a cluster with drives with AFR= +function failure_rate(n, a, f) +{ + if (f <= 0) + { + return (1-a)**n; + } + let p = 1; + for (let i = 0; i < f; i++) + { + p -= c_n_k(n, i) * (1-a)**(n-i) * a**i; + } + return p; +} + +// Combination count +function c_n_k(n, k) +{ + let r = 1; + for (let i = 0; i < k; i++) + { + r *= (n-i) / (i+1); + } + return r; +}