diff options
author | 2022-06-17 15:49:53 -0400 | |
---|---|---|
committer | 2022-06-17 15:49:53 -0400 | |
commit | 037e4920c29534ba4fd53806ad92bb6446d4b6ed (patch) | |
tree | 189ddc26e0d3129b43b4e8bba0ce46c5cc9bfe51 /plugin | |
parent | dded10420b8a477ebd86cd2ceed9207a42c226cc (diff) | |
download | coredns-037e4920c29534ba4fd53806ad92bb6446d4b6ed.tar.gz coredns-037e4920c29534ba4fd53806ad92bb6446d4b6ed.tar.zst coredns-037e4920c29534ba4fd53806ad92bb6446d4b6ed.zip |
plugin/health: Bypass proxy in self health check (#5401)
* add detail to docs; bypass proxy in self health check
Signed-off-by: Chris O'Haver <cohaver@infoblox.com>
Diffstat (limited to 'plugin')
-rw-r--r-- | plugin/health/README.md | 10 | ||||
-rw-r--r-- | plugin/health/overloaded.go | 16 |
2 files changed, 20 insertions, 6 deletions
diff --git a/plugin/health/README.md b/plugin/health/README.md index c8fda61a4..b18d2ec3c 100644 --- a/plugin/health/README.md +++ b/plugin/health/README.md @@ -48,13 +48,13 @@ Doing this is supported but both endpoints ":8080" and ":8081" will export the e ## Metrics -If monitoring is enabled (via the *prometheus* plugin) then the following metric is exported: +If monitoring is enabled (via the *prometheus* plugin) then the following metrics are exported: - * `coredns_health_request_duration_seconds{}` - duration to process a HTTP query to the local - `/health` endpoint. As this a local operation it should be fast. A (large) increase in this + * `coredns_health_request_duration_seconds{}` - The *health* plugin performs a self health check + once per second on the `/health` endpoint. This metric is the duration to process that request. + As this is a local operation it should be fast. A (large) increase in this duration indicates the CoreDNS process is having trouble keeping up with its query load. - * `coredns_health_request_failures_total{}` - The number of times the internal health check loop - failed to query `/health`. + * `coredns_health_request_failures_total{}` - The number of times the self health check failed. Note that these metrics *do not* have a `server` label, because being overloaded is a symptom of the running process, *not* a specific server. diff --git a/plugin/health/overloaded.go b/plugin/health/overloaded.go index 482b8a286..57b9ca2d0 100644 --- a/plugin/health/overloaded.go +++ b/plugin/health/overloaded.go @@ -2,6 +2,7 @@ package health import ( "context" + "net" "net/http" "time" @@ -13,9 +14,22 @@ import ( // overloaded queries the health end point and updates a metrics showing how long it took. func (h *health) overloaded(ctx context.Context) { + bypassProxy := &http.Transport{ + Proxy: nil, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + }).DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } timeout := 3 * time.Second client := http.Client{ - Timeout: timeout, + Timeout: timeout, + Transport: bypassProxy, } url := "http://" + h.Addr + "/health" |