diff options
Diffstat (limited to 'plugin/pkg/healthcheck/healthcheck.go')
-rw-r--r-- | plugin/pkg/healthcheck/healthcheck.go | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/plugin/pkg/healthcheck/healthcheck.go b/plugin/pkg/healthcheck/healthcheck.go new file mode 100644 index 000000000..18f09087c --- /dev/null +++ b/plugin/pkg/healthcheck/healthcheck.go @@ -0,0 +1,243 @@ +package healthcheck + +import ( + "io" + "io/ioutil" + "log" + "net" + "net/http" + "net/url" + "sync" + "sync/atomic" + "time" +) + +// UpstreamHostDownFunc can be used to customize how Down behaves. +type UpstreamHostDownFunc func(*UpstreamHost) bool + +// UpstreamHost represents a single proxy upstream +type UpstreamHost struct { + Conns int64 // must be first field to be 64-bit aligned on 32-bit systems + Name string // IP address (and port) of this upstream host + Network string // Network (tcp, unix, etc) of the host, default "" is "tcp" + Fails int32 + FailTimeout time.Duration + OkUntil time.Time + CheckDown UpstreamHostDownFunc + CheckURL string + WithoutPathPrefix string + Checking bool + CheckMu sync.Mutex +} + +// Down checks whether the upstream host is down or not. +// Down will try to use uh.CheckDown first, and will fall +// back to some default criteria if necessary. +func (uh *UpstreamHost) Down() bool { + if uh.CheckDown == nil { + // Default settings + fails := atomic.LoadInt32(&uh.Fails) + after := false + + uh.CheckMu.Lock() + until := uh.OkUntil + uh.CheckMu.Unlock() + + if !until.IsZero() && time.Now().After(until) { + after = true + } + + return after || fails > 0 + } + return uh.CheckDown(uh) +} + +// HostPool is a collection of UpstreamHosts. +type HostPool []*UpstreamHost + +// HealthCheck is used for performing healthcheck +// on a collection of upstream hosts and select +// one based on the policy. +type HealthCheck struct { + wg sync.WaitGroup // Used to wait for running goroutines to stop. + stop chan struct{} // Signals running goroutines to stop. + Hosts HostPool + Policy Policy + Spray Policy + FailTimeout time.Duration + MaxFails int32 + Future time.Duration + Path string + Port string + Interval time.Duration +} + +// Start starts the healthcheck +func (u *HealthCheck) Start() { + u.stop = make(chan struct{}) + if u.Path != "" { + u.wg.Add(1) + go func() { + defer u.wg.Done() + u.healthCheckWorker(u.stop) + }() + } +} + +// Stop sends a signal to all goroutines started by this staticUpstream to exit +// and waits for them to finish before returning. +func (u *HealthCheck) Stop() error { + close(u.stop) + u.wg.Wait() + return nil +} + +// This was moved into a thread so that each host could throw a health +// check at the same time. The reason for this is that if we are checking +// 3 hosts, and the first one is gone, and we spend minutes timing out to +// fail it, we would not have been doing any other health checks in that +// time. So we now have a per-host lock and a threaded health check. +// +// We use the Checking bool to avoid concurrent checks against the same +// host; if one is taking a long time, the next one will find a check in +// progress and simply return before trying. +// +// We are carefully avoiding having the mutex locked while we check, +// otherwise checks will back up, potentially a lot of them if a host is +// absent for a long time. This arrangement makes checks quickly see if +// they are the only one running and abort otherwise. +func healthCheckURL(nextTs time.Time, host *UpstreamHost) { + + // lock for our bool check. We don't just defer the unlock because + // we don't want the lock held while http.Get runs + host.CheckMu.Lock() + + // are we mid check? Don't run another one + if host.Checking { + host.CheckMu.Unlock() + return + } + + host.Checking = true + host.CheckMu.Unlock() + + //log.Printf("[DEBUG] Healthchecking %s, nextTs is %s\n", url, nextTs.Local()) + + // fetch that url. This has been moved into a go func because + // when the remote host is not merely not serving, but actually + // absent, then tcp syn timeouts can be very long, and so one + // fetch could last several check intervals + if r, err := http.Get(host.CheckURL); err == nil { + io.Copy(ioutil.Discard, r.Body) + r.Body.Close() + + if r.StatusCode < 200 || r.StatusCode >= 400 { + log.Printf("[WARNING] Host %s health check returned HTTP code %d\n", + host.Name, r.StatusCode) + nextTs = time.Unix(0, 0) + } + } else { + log.Printf("[WARNING] Host %s health check probe failed: %v\n", host.Name, err) + nextTs = time.Unix(0, 0) + } + + host.CheckMu.Lock() + host.Checking = false + host.OkUntil = nextTs + host.CheckMu.Unlock() +} + +func (u *HealthCheck) healthCheck() { + for _, host := range u.Hosts { + + if host.CheckURL == "" { + var hostName, checkPort string + + // The DNS server might be an HTTP server. If so, extract its name. + ret, err := url.Parse(host.Name) + if err == nil && len(ret.Host) > 0 { + hostName = ret.Host + } else { + hostName = host.Name + } + + // Extract the port number from the parsed server name. + checkHostName, checkPort, err := net.SplitHostPort(hostName) + if err != nil { + checkHostName = hostName + } + + if u.Port != "" { + checkPort = u.Port + } + + host.CheckURL = "http://" + net.JoinHostPort(checkHostName, checkPort) + u.Path + } + + // calculate this before the get + nextTs := time.Now().Add(u.Future) + + // locks/bools should prevent requests backing up + go healthCheckURL(nextTs, host) + } +} + +func (u *HealthCheck) healthCheckWorker(stop chan struct{}) { + ticker := time.NewTicker(u.Interval) + u.healthCheck() + for { + select { + case <-ticker.C: + u.healthCheck() + case <-stop: + ticker.Stop() + return + } + } +} + +// Select selects an upstream host based on the policy +// and the healthcheck result. +func (u *HealthCheck) Select() *UpstreamHost { + pool := u.Hosts + if len(pool) == 1 { + if pool[0].Down() && u.Spray == nil { + return nil + } + return pool[0] + } + allDown := true + for _, host := range pool { + if !host.Down() { + allDown = false + break + } + } + if allDown { + if u.Spray == nil { + return nil + } + return u.Spray.Select(pool) + } + + if u.Policy == nil { + h := (&Random{}).Select(pool) + if h != nil { + return h + } + if h == nil && u.Spray == nil { + return nil + } + return u.Spray.Select(pool) + } + + h := u.Policy.Select(pool) + if h != nil { + return h + } + + if u.Spray == nil { + return nil + } + return u.Spray.Select(pool) +} |