diff options
author | 2020-02-04 05:19:48 -0800 | |
---|---|---|
committer | 2020-02-04 14:19:48 +0100 | |
commit | efbe4ac5e80f7c59528d878f910b5edaf8cd17e1 (patch) | |
tree | 5986dfb53525110207338625e2aa8d2150e2807c /plugin | |
parent | 22cd28a7987afc24161b110b550d3e62347d1626 (diff) | |
download | coredns-efbe4ac5e80f7c59528d878f910b5edaf8cd17e1.tar.gz coredns-efbe4ac5e80f7c59528d878f910b5edaf8cd17e1.tar.zst coredns-efbe4ac5e80f7c59528d878f910b5edaf8cd17e1.zip |
Add exponential backoff to healthcheck (#3643)
Move exponential backoff initialization to Start()
Signed-off-by: RickyRajinder <singh.sangh@gmail.com>
Move comment
Increase max interval and update README
Remove trailing whitespace
Change Start() param name back to interval
Diffstat (limited to 'plugin')
-rw-r--r-- | plugin/forward/README.md | 6 | ||||
-rw-r--r-- | plugin/pkg/up/up.go | 41 |
2 files changed, 26 insertions, 21 deletions
diff --git a/plugin/forward/README.md b/plugin/forward/README.md index cc1845377..b4307d8dd 100644 --- a/plugin/forward/README.md +++ b/plugin/forward/README.md @@ -9,8 +9,10 @@ The *forward* plugin re-uses already opened sockets to the upstreams. It supports UDP, TCP and DNS-over-TLS and uses in band health checking. -When it detects an error a health check is performed. This checks runs in a loop, every *0.5s*, for -as long as the upstream reports unhealthy. Once healthy we stop health checking (until the next +When it detects an error a health check is performed. This checks runs in a loop, +starting with a *0.5s* interval and exponentially backing off with randomized intervals +up to *60s* for as long as the upstream reports unhealthy. The exponential backoff +will reset to *0.5s* after 15 minutes. Once healthy we stop health checking (until the next error). The health checks use a recursive DNS query (`. IN NS`) to get upstream health. Any response that is not a network error (REFUSED, NOTIMPL, SERVFAIL, etc) is taken as a healthy upstream. The health check uses the same protocol as specified in **TO**. If `max_fails` is set to 0, no checking diff --git a/plugin/pkg/up/up.go b/plugin/pkg/up/up.go index 8f866311b..71c128234 100644 --- a/plugin/pkg/up/up.go +++ b/plugin/pkg/up/up.go @@ -5,6 +5,8 @@ package up import ( "sync" "time" + + "github.com/cenkalti/backoff/v4" ) // Probe is used to run a single Func until it returns true (indicating a target is healthy). If an Func @@ -13,8 +15,7 @@ import ( type Probe struct { sync.Mutex inprogress int - interval time.Duration - max time.Duration + expBackoff backoff.ExponentialBackOff } // Func is used to determine if a target is alive. If so this function must return nil. @@ -31,7 +32,13 @@ func (p *Probe) Do(f Func) { return } p.inprogress = active - interval := p.interval + interval := p.expBackoff.NextBackOff() + // If exponential backoff has reached the maximum elapsed time (15 minutes), + // reset it and try again + if interval == -1 { + p.expBackoff.Reset() + interval = p.expBackoff.NextBackOff() + } p.Unlock() // Passed the lock. Now run f for as long it returns false. If a true is returned // we return from the goroutine and we can accept another Func to run. @@ -42,9 +49,6 @@ func (p *Probe) Do(f Func) { break } time.Sleep(interval) - if i%2 == 0 && i < 4 { // 4 is 2 doubles, so no need to increase anymore - this is *also* checked in double() - p.double() - } p.Lock() if p.inprogress == stop { p.Unlock() @@ -60,15 +64,6 @@ func (p *Probe) Do(f Func) { }() } -func (p *Probe) double() { - p.Lock() - p.interval *= 2 - if p.interval > p.max { - p.interval = p.max - } - p.Unlock() -} - // Stop stops the probing. func (p *Probe) Stop() { p.Lock() @@ -77,10 +72,20 @@ func (p *Probe) Stop() { } // Start will initialize the probe manager, after which probes can be initiated with Do. +// Initializes exponential backoff using the given interval duration func (p *Probe) Start(interval time.Duration) { p.Lock() - p.interval = interval - p.max = interval * multiplier + eB := &backoff.ExponentialBackOff{ + InitialInterval: interval, + RandomizationFactor: backoff.DefaultRandomizationFactor, + Multiplier: backoff.DefaultMultiplier, + MaxInterval: backoff.DefaultMaxInterval, + MaxElapsedTime: backoff.DefaultMaxElapsedTime, + Stop: backoff.Stop, + Clock: backoff.SystemClock, + } + p.expBackoff = *eB + p.expBackoff.Reset() p.Unlock() } @@ -88,6 +93,4 @@ const ( idle = iota active stop - - multiplier = 4 ) |