From 7a0b5212feab2fccc58162fb70521967e89cc528 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sun, 28 Nov 2021 01:43:31 +0300 Subject: [PATCH] Exit if unable to restart watches FIXME: It's probably not OK for the client to exit in this case --- mon/mon.js | 25 +++++++++++++++++++------ src/etcd_state_client.cpp | 14 ++++++++++++++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/mon/mon.js b/mon/mon.js index cbc9c7da..95bee44a 100644 --- a/mon/mon.js +++ b/mon/mon.js @@ -341,7 +341,7 @@ class Mon this.etcd_start_timeout = (config.etcd_start_timeout || 5) * 1000; this.state = JSON.parse(JSON.stringify(this.constructor.etcd_tree)); this.signals_set = false; - this.on_stop_cb = () => this.on_stop().catch(console.error); + this.on_stop_cb = () => this.on_stop(0).catch(console.error); } parse_etcd_addresses(addrs) @@ -530,12 +530,25 @@ class Mon catch (e) { } - if (!data || !data.result || !data.result.events) + if (!data || !data.result) { - if (!data || !data.result || !data.result.watch_id) + console.error('Unknown message received from watch websocket: '+msg); + } + else if (data.result.canceled) + { + // etcd watch canceled + if (data.result.compact_revision) { - console.error('Garbage received from watch websocket: '+msg); + // we may miss events if we proceed + console.error('Revisions before '+data.result.compact_revision+' were compacted by etcd, exiting'); + this.on_stop(1); } + console.error('Watch canceled by etcd, reason: '+data.result.cancel_reason+', exiting'); + this.on_stop(1); + } + else if (data.result.created) + { + // etcd watch created } else { @@ -639,11 +652,11 @@ class Mon } } - async on_stop() + async on_stop(status) { clearInterval(this.lease_timer); await this.etcd_call('/lease/revoke', { ID: this.etcd_lease_id }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries); - process.exit(0); + process.exit(status); } async become_master() diff --git a/src/etcd_state_client.cpp b/src/etcd_state_client.cpp index 3a26eb38..b16ff97b 100644 --- a/src/etcd_state_client.cpp +++ b/src/etcd_state_client.cpp @@ -209,6 +209,20 @@ void etcd_state_client_t::start_etcd_watcher() { etcd_watches_initialised++; } + if (data["result"]["canceled"].bool_value()) + { + // etcd watch canceled, maybe because the revision was compacted + if (data["result"]["compact_revision"].uint64_value()) + { + // we may miss events if we proceed + // FIXME: reload state and continue when used inside cluster_client + fprintf(stderr, "Revisions before %lu were compacted by etcd, exiting\n", + data["result"]["compact_revision"].uint64_value()); + exit(1); + } + fprintf(stderr, "Watch canceled by etcd, reason: %s, exiting\n", data["result"]["cancel_reason"].string_value().c_str()); + exit(1); + } if (etcd_watches_initialised == 4) { etcd_watch_revision = data["result"]["header"]["revision"].uint64_value();