diff options
author | 2016-10-26 10:01:52 +0100 | |
---|---|---|
committer | 2016-10-26 10:01:52 +0100 | |
commit | 219bfd0493124fc2f0170772833a094c3eb9b627 (patch) | |
tree | cb5d1b3bbd7f1c9cac8c519fee68129dd4d09bd0 | |
parent | 6d9d60081d7a9eef14ad90d62533de89f88ea434 (diff) | |
download | coredns-219bfd0493124fc2f0170772833a094c3eb9b627.tar.gz coredns-219bfd0493124fc2f0170772833a094c3eb9b627.tar.zst coredns-219bfd0493124fc2f0170772833a094c3eb9b627.zip |
middleware/metrics: cleanup (#355)
* middleware/metrics: add more metrics
middleware/cache:
Add metrics for number of elements in the cache. Also export the total
size. Update README to detail the new metrics.
middleware/metrics
Move metrics into subpackage called "vars". This breaks the import
cycle and is cleaner. This allows vars.Report to be used in the
the dnsserver to log refused queries.
middleware/metrics: tests
Add tests to the metrics framework. The metrics/test subpackage allows
scraping of the local server. Do a few test scrape of the metrics that
are defined in the metrics middleware.
This also allows metrics integration tests to check if the caching and
dnssec middleware export their metrics correctly.
* update README
* typos
* fix tests
39 files changed, 828 insertions, 259 deletions
diff --git a/core/dnsserver/config.go b/core/dnsserver/config.go index 20ff8389a..123ab70ce 100644 --- a/core/dnsserver/config.go +++ b/core/dnsserver/config.go @@ -41,3 +41,19 @@ func GetConfig(c *caddy.Controller) *Config { ctx.saveConfig(c.Key, &Config{}) return GetConfig(c) } + +// GetMiddleware returns the middleware handler that has been added to the config under name. +// This is useful to inspect if a certain middleware is active in this server. +// Note that this is order dependent and the order is defined in directives.go, i.e. if your middleware +// comes before the middleware you are checking; it will not be there (yet). +func GetMiddleware(c *caddy.Controller, name string) middleware.Handler { + // TODO(miek): calling the handler h(nil) should be a noop... + conf := GetConfig(c) + for _, h := range conf.Middleware { + x := h(nil) + if name == x.Name() { + return x + } + } + return nil +} diff --git a/core/dnsserver/server.go b/core/dnsserver/server.go index aa94dcd8c..cc078d0d8 100644 --- a/core/dnsserver/server.go +++ b/core/dnsserver/server.go @@ -10,7 +10,9 @@ import ( "time" "github.com/miekg/coredns/middleware" + "github.com/miekg/coredns/middleware/metrics/vars" "github.com/miekg/coredns/middleware/pkg/edns" + "github.com/miekg/coredns/middleware/pkg/rcode" "github.com/miekg/coredns/request" "github.com/miekg/dns" @@ -247,14 +249,16 @@ func (s *Server) OnStartupComplete() { } // DefaultErrorFunc responds to an DNS request with an error. -func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rcode int) { +func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) { state := request.Request{W: w, Req: r} answer := new(dns.Msg) - answer.SetRcode(r, rcode) + answer.SetRcode(r, rc) state.SizeAndDo(answer) + vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) + w.WriteMsg(answer) } diff --git a/middleware.md b/middleware.md index acd69f497..98a6adf89 100644 --- a/middleware.md +++ b/middleware.md @@ -36,7 +36,8 @@ TODO(miek): text here on how to hook up middleware. ## Metrics When exporting metrics the *Namespace* should be `middleware.Namespace` (="coredns"), and the -*Subsystem* should be the name of the middleware. +*Subsystem* should be the name of the middleware. The README.md for the middleware should then +also contain a *Metrics* section detailing the metrics. ## Documentation diff --git a/middleware/auto/auto.go b/middleware/auto/auto.go index 7721c194e..65e784459 100644 --- a/middleware/auto/auto.go +++ b/middleware/auto/auto.go @@ -8,6 +8,7 @@ import ( "github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware/file" + "github.com/miekg/coredns/middleware/metrics" "github.com/miekg/coredns/request" "github.com/miekg/dns" @@ -20,6 +21,7 @@ type ( Next middleware.Handler *Zones + metrics *metrics.Metrics loader } @@ -97,3 +99,5 @@ func (a Auto) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i w.WriteMsg(m) return dns.RcodeSuccess, nil } + +func (a Auto) Name() string { return "auto" } diff --git a/middleware/auto/setup.go b/middleware/auto/setup.go index 8c56f90a0..a5e11186a 100644 --- a/middleware/auto/setup.go +++ b/middleware/auto/setup.go @@ -11,6 +11,7 @@ import ( "github.com/miekg/coredns/core/dnsserver" "github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware/file" + "github.com/miekg/coredns/middleware/metrics" "github.com/mholt/caddy" ) @@ -28,10 +29,16 @@ func setup(c *caddy.Controller) error { return middleware.Error("auto", err) } + // If we have enabled prometheus we should add newly discovered zones to it. + met := dnsserver.GetMiddleware(c, "prometheus") + if met != nil { + a.metrics = met.(*metrics.Metrics) + } + walkChan := make(chan bool) c.OnStartup(func() error { - err := a.Zones.Walk(a.loader) + err := a.Walk() if err != nil { return err } @@ -43,7 +50,7 @@ func setup(c *caddy.Controller) error { case <-walkChan: return case <-ticker.C: - a.Zones.Walk(a.loader) + a.Walk() } } }() diff --git a/middleware/auto/walk.go b/middleware/auto/walk.go index 4259d7f17..8c3370e16 100644 --- a/middleware/auto/walk.go +++ b/middleware/auto/walk.go @@ -13,26 +13,26 @@ import ( ) // Walk will recursively walk of the file under l.directory and adds the one that match l.re. -func (z *Zones) Walk(l loader) error { +func (a Auto) Walk() error { // TODO(miek): should add something so that we don't stomp on each other. toDelete := make(map[string]bool) - for _, n := range z.Names() { + for _, n := range a.Zones.Names() { toDelete[n] = true } - filepath.Walk(l.directory, func(path string, info os.FileInfo, err error) error { + filepath.Walk(a.loader.directory, func(path string, info os.FileInfo, err error) error { if info.IsDir() { return nil } - match, origin := matches(l.re, info.Name(), l.template) + match, origin := matches(a.loader.re, info.Name(), a.loader.template) if !match { return nil } - if _, ok := z.Z[origin]; ok { + if _, ok := a.Zones.Z[origin]; ok { // we already have this zone toDelete[origin] = false return nil @@ -50,10 +50,14 @@ func (z *Zones) Walk(l loader) error { return nil } - zo.NoReload = l.noReload - zo.TransferTo = l.transferTo + zo.NoReload = a.loader.noReload + zo.TransferTo = a.loader.transferTo - z.Insert(zo, origin) + a.Zones.Add(zo, origin) + + if a.metrics != nil { + a.metrics.AddZone(origin) + } zo.Notify() @@ -68,7 +72,13 @@ func (z *Zones) Walk(l loader) error { if !ok { continue } - z.Delete(origin) + + if a.metrics != nil { + a.metrics.RemoveZone(origin) + } + + a.Zones.Remove(origin) + log.Printf("[INFO] Deleting zone `%s'", origin) } diff --git a/middleware/auto/walk_test.go b/middleware/auto/walk_test.go index cc420d5b6..f15548dba 100644 --- a/middleware/auto/walk_test.go +++ b/middleware/auto/walk_test.go @@ -37,13 +37,16 @@ func TestWalk(t *testing.T) { template: `${1}`, } - z := &Zones{} + a := Auto{ + loader: ldr, + Zones: &Zones{}, + } - z.Walk(ldr) + a.Walk() // db.example.org and db.example.com should be here (created in createFiles) for _, name := range []string{"example.com.", "example.org."} { - if _, ok := z.Z[name]; !ok { + if _, ok := a.Zones.Z[name]; !ok { t.Errorf("%s should have been added", name) } } diff --git a/middleware/auto/watcher_test.go b/middleware/auto/watcher_test.go index 751c78c0d..a8f219fa1 100644 --- a/middleware/auto/watcher_test.go +++ b/middleware/auto/watcher_test.go @@ -27,15 +27,18 @@ func TestWatcher(t *testing.T) { template: `${1}`, } - z := &Zones{} + a := Auto{ + loader: ldr, + Zones: &Zones{}, + } - z.Walk(ldr) + a.Walk() // example.org and example.com should exist - if x := len(z.Z["example.org."].All()); x != 4 { + if x := len(a.Zones.Z["example.org."].All()); x != 4 { t.Fatalf("expected 4 RRs, got %d", x) } - if x := len(z.Z["example.com."].All()); x != 4 { + if x := len(a.Zones.Z["example.com."].All()); x != 4 { t.Fatalf("expected 4 RRs, got %d", x) } @@ -44,5 +47,6 @@ func TestWatcher(t *testing.T) { t.Fatal(err) } - z.Walk(ldr) + a.Walk() + // TODO(miek): check } diff --git a/middleware/auto/zone.go b/middleware/auto/zone.go index 4c950b908..f825871f7 100644 --- a/middleware/auto/zone.go +++ b/middleware/auto/zone.go @@ -40,9 +40,9 @@ func (z *Zones) Zones(name string) *file.Zone { return zo } -// Insert inserts a new zone into z. If zo.NoReload is false, the +// Add adds a new zone into z. If zo.NoReload is false, the // reload goroutine is started. -func (z *Zones) Insert(zo *file.Zone, name string) { +func (z *Zones) Add(zo *file.Zone, name string) { z.Lock() if z.Z == nil { @@ -51,14 +51,13 @@ func (z *Zones) Insert(zo *file.Zone, name string) { z.Z[name] = zo z.names = append(z.names, name) - zo.Reload() z.Unlock() } -// Delete removes the zone named name from z. It also stop the the zone's reload goroutine. -func (z *Zones) Delete(name string) { +// Remove removes the zone named name from z. It also stop the the zone's reload goroutine. +func (z *Zones) Remove(name string) { z.Lock() if zo, ok := z.Z[name]; ok && !zo.NoReload { @@ -67,10 +66,11 @@ func (z *Zones) Delete(name string) { delete(z.Z, name) - // just regenerate Names (might be bad if you have a lot of zones...) + // TODO(miek): just regenerate Names (might be bad if you have a lot of zones...) z.names = []string{} for n := range z.Z { z.names = append(z.names, n) } + z.Unlock() } diff --git a/middleware/cache/README.md b/middleware/cache/README.md index 55ad8848a..3a4dea43e 100644 --- a/middleware/cache/README.md +++ b/middleware/cache/README.md @@ -35,24 +35,24 @@ There is a third category (`error`) but those responses are never cached. The minimum TTL allowed on resource records is 5 seconds. -If monitoring is enabled (via the *prometheus* directive) then the following extra metrics are added: +## Metrics -* coredns_cache_hit_count_total, and -* coredns_cache_miss_count_total +If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported: -They both work on a per-zone basis and just count the hit and miss counts for each query. +* coredns_cache_size_guage{type} - total elements in the case, type is either "denial" or "success". +* coredns_cache_capacity_guage{type} - total capacity of the cache, type is either "denial" or "success". ## Examples +Enable caching for all zones, but cap everything to a TTL of 10 seconds: + ~~~ cache 10 ~~~ -Enable caching for all zones, but cap everything to a TTL of 10 seconds. +Proxy to Google Public DNS and only cache responses for example.org (or below). ~~~ proxy . 8.8.8.8:53 cache example.org ~~~ - -Proxy to Google Public DNS and only cache responses for example.org (or below). diff --git a/middleware/cache/cache.go b/middleware/cache/cache.go index 6254349c3..d17253c38 100644 --- a/middleware/cache/cache.go +++ b/middleware/cache/cache.go @@ -79,6 +79,9 @@ func (c *ResponseWriter) WriteMsg(res *dns.Msg) error { if key != "" { c.set(res, key, mt, duration) + + cacheSize.WithLabelValues(Success).Set(float64(c.pcache.Len())) + cacheSize.WithLabelValues(Denial).Set(float64(c.ncache.Len())) } setMsgTTL(res, uint32(duration.Seconds())) @@ -103,7 +106,6 @@ func (c *ResponseWriter) set(m *dns.Msg, key string, mt response.Type, duration case response.OtherError: // don't cache these - // TODO(miek): what do we do with these? default: log.Printf("[WARNING] Caching called with unknown classification: %d", mt) } @@ -122,4 +124,9 @@ const ( minTTL = 5 * time.Second defaultCap = 10000 // default capacity of the cache. + + // Success is the class for caching postive caching. + Success = "success" + // Denial is the class defined for negative caching. + Denial = "denial" ) diff --git a/middleware/cache/handler.go b/middleware/cache/handler.go index e307b0b79..95b51970e 100644 --- a/middleware/cache/handler.go +++ b/middleware/cache/handler.go @@ -30,17 +30,15 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) state.SizeAndDo(resp) w.WriteMsg(resp) - cacheHitCount.WithLabelValues(zone).Inc() - return dns.RcodeSuccess, nil } - cacheMissCount.WithLabelValues(zone).Inc() - crr := &ResponseWriter{w, c} return c.Next.ServeDNS(ctx, crr, r) } +func (c *Cache) Name() string { return "cache" } + func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) { k := rawKey(qname, qtype, do) @@ -55,24 +53,24 @@ func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) { } var ( - cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, - Name: "hit_count_total", - Help: "Counter of DNS requests that were found in the cache.", - }, []string{"zone"}) + Name: "size_guage", + Help: "Gauge of number of elements in the cache.", + }, []string{"type"}) - cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, - Name: "miss_count_total", - Help: "Counter of DNS requests that were not found in the cache.", - }, []string{"zone"}) + Name: "capacity_gauge", + Help: "Gauge of cache's capacity.", + }, []string{"type"}) ) const subsystem = "cache" func init() { - prometheus.MustRegister(cacheHitCount) - prometheus.MustRegister(cacheMissCount) + prometheus.MustRegister(cacheSize) + prometheus.MustRegister(cacheCapacity) } diff --git a/middleware/cache/setup.go b/middleware/cache/setup.go index 08c8fefdb..11a35ddc4 100644 --- a/middleware/cache/setup.go +++ b/middleware/cache/setup.go @@ -28,6 +28,10 @@ func setup(c *caddy.Controller) error { return ca }) + // Export the capacity for the metrics. This only happens once, because this is a re-load change only. + cacheCapacity.WithLabelValues(Success).Set(float64(ca.pcap)) + cacheCapacity.WithLabelValues(Denial).Set(float64(ca.ncap)) + return nil } @@ -58,7 +62,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) { for c.NextBlock() { switch c.Val() { // first number is cap, second is an new ttl - case "success": + case Success: args := c.RemainingArgs() if len(args) == 0 { return nil, c.ArgErr() @@ -75,7 +79,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) { } ca.pttl = time.Duration(pttl) * time.Second } - case "denial": + case Denial: args := c.RemainingArgs() if len(args) == 0 { return nil, c.ArgErr() diff --git a/middleware/chaos/chaos.go b/middleware/chaos/chaos.go index 6d83f5dda..39ebe7f49 100644 --- a/middleware/chaos/chaos.go +++ b/middleware/chaos/chaos.go @@ -51,6 +51,8 @@ func (c Chaos) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( return 0, nil } +func (c Chaos) Name() string { return "chaos" } + func trim(s string) string { if len(s) < 256 { return s diff --git a/middleware/dnssec/README.md b/middleware/dnssec/README.md index b87b25b1c..5daebc7f6 100644 --- a/middleware/dnssec/README.md +++ b/middleware/dnssec/README.md @@ -34,9 +34,14 @@ dnssec [ZONES... ] { will be signed with all keys. Generating a key can be done with `dnssec-keygen`: `dnssec-keygen -a ECDSAP256SHA256 <zonename>`. A key created for zone *A* can be safely used for zone *B*. - * `cache_capacity` indicates the capacity of the LRU cache. The dnssec middleware uses LRU cache to manage objects and the default capacity is 10000. +## Metrics + +If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported: + +* coredns_dnssec_size_guage{type} - total elements in the cache, type is "signature". +* coredns_dnssec_capacity_guage{type} - total capacity of the cache, type is "signature". ## Examples diff --git a/middleware/dnssec/handler.go b/middleware/dnssec/handler.go index aaed62bdf..a99588397 100644 --- a/middleware/dnssec/handler.go +++ b/middleware/dnssec/handler.go @@ -40,24 +40,26 @@ func (d Dnssec) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) } var ( - cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, - Name: "hit_count_total", - Help: "Counter of signatures that were found in the cache.", - }, []string{"zone"}) + Name: "size_guage", + Help: "Gauge of number of elements in the cache.", + }, []string{"type"}) - cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, - Name: "miss_count_total", - Help: "Counter of signatures that were not found in the cache.", - }, []string{"zone"}) + Name: "capacity_gauge", + Help: "Gauge of cache's capacity.", + }, []string{"type"}) ) +func (d Dnssec) Name() string { return "dnssec" } + const subsystem = "dnssec" func init() { - prometheus.MustRegister(cacheHitCount) - prometheus.MustRegister(cacheMissCount) + prometheus.MustRegister(cacheSize) + prometheus.MustRegister(cacheCapacity) } diff --git a/middleware/dnssec/responsewriter.go b/middleware/dnssec/responsewriter.go index 547561fb9..e7af62a9e 100644 --- a/middleware/dnssec/responsewriter.go +++ b/middleware/dnssec/responsewriter.go @@ -30,6 +30,8 @@ func (d *ResponseWriter) WriteMsg(res *dns.Msg) error { if state.Do() { res = d.d.Sign(state, zone, time.Now().UTC()) + + cacheSize.WithLabelValues("signature").Set(float64(d.d.cache.Len())) } state.SizeAndDo(res) diff --git a/middleware/dnssec/setup.go b/middleware/dnssec/setup.go index 19a68a853..639303ea9 100644 --- a/middleware/dnssec/setup.go +++ b/middleware/dnssec/setup.go @@ -32,6 +32,9 @@ func setup(c *caddy.Controller) error { return New(zones, keys, next, cache) }) + // Export the capacity for the metrics. This only happens once, because this is a re-load change only. + cacheCapacity.WithLabelValues("signature").Set(float64(capacity)) + return nil } diff --git a/middleware/errors/errors.go b/middleware/errors/errors.go index afaa8cf4d..aca05b54a 100644 --- a/middleware/errors/errors.go +++ b/middleware/errors/errors.go @@ -48,6 +48,8 @@ func (h errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns return rcode, err } +func (h errorHandler) Name() string { return "errors" } + func (h errorHandler) recovery(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) { rec := recover() if rec == nil { diff --git a/middleware/etcd/handler.go b/middleware/etcd/handler.go index 75aa582bd..7c856bd8b 100644 --- a/middleware/etcd/handler.go +++ b/middleware/etcd/handler.go @@ -117,6 +117,8 @@ func (e *Etcd) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( return dns.RcodeSuccess, nil } +func (e *Etcd) Name() string { return "etcd" } + // Err write an error response to the client. func (e *Etcd) Err(zone string, rcode int, state request.Request, debug []msg.Service, err error, opt Options) (int, error) { m := new(dns.Msg) diff --git a/middleware/file/file.go b/middleware/file/file.go index 90d15af79..7c6a65ced 100644 --- a/middleware/file/file.go +++ b/middleware/file/file.go @@ -110,6 +110,8 @@ func (f File) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i return dns.RcodeSuccess, nil } +func (f File) Name() string { return "file" } + // Parse parses the zone in filename and returns a new Zone or an error. func Parse(f io.Reader, origin, fileName string) (*Zone, error) { tokens := dns.ParseZone(f, dns.Fqdn(origin), fileName) diff --git a/middleware/kubernetes/handler.go b/middleware/kubernetes/handler.go index 1b1abf44b..cc59a305f 100644 --- a/middleware/kubernetes/handler.go +++ b/middleware/kubernetes/handler.go @@ -101,6 +101,8 @@ func (k Kubernetes) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.M return dns.RcodeSuccess, nil } +func (k Kubernetes) Name() string { return "kubernetes" } + // Err writes an error response back to the client. func (k Kubernetes) Err(zone string, rcode int, state request.Request) (int, error) { m := new(dns.Msg) diff --git a/middleware/loadbalance/handler.go b/middleware/loadbalance/handler.go index 151eb57d0..8cba67e6f 100644 --- a/middleware/loadbalance/handler.go +++ b/middleware/loadbalance/handler.go @@ -18,3 +18,5 @@ func (rr RoundRobin) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns. wrr := &RoundRobinResponseWriter{w} return rr.Next.ServeDNS(ctx, wrr, r) } + +func (rr RoundRobin) Name() string { return "loadbalance" } diff --git a/middleware/log/log.go b/middleware/log/log.go index dba76341e..0a3cca130 100644 --- a/middleware/log/log.go +++ b/middleware/log/log.go @@ -6,7 +6,7 @@ import ( "time" "github.com/miekg/coredns/middleware" - "github.com/miekg/coredns/middleware/metrics" + "github.com/miekg/coredns/middleware/metrics/vars" "github.com/miekg/coredns/middleware/pkg/dnsrecorder" "github.com/miekg/coredns/middleware/pkg/rcode" "github.com/miekg/coredns/middleware/pkg/replacer" @@ -45,7 +45,7 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) answer.SetRcode(r, rc) state.SizeAndDo(answer) - metrics.Report(state, metrics.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) + vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) w.WriteMsg(answer) } @@ -64,6 +64,8 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) return l.Next.ServeDNS(ctx, w, r) } +func (l Logger) Name() string { return "log" } + // Rule configures the logging middleware. type Rule struct { NameScope string diff --git a/middleware/metrics/README.md b/middleware/metrics/README.md index fb0e6d3aa..46dd34296 100644 --- a/middleware/metrics/README.md +++ b/middleware/metrics/README.md @@ -7,12 +7,10 @@ The following metrics are exported: * coredns_dns_request_count_total{zone, proto, family} * coredns_dns_request_duration_milliseconds{zone} -* coredns_dns_request_size_bytes{zone,, proto} -* coredns_dns_request_transfer_size_bytes{zone,, proto} +* coredns_dns_request_size_bytes{zone, proto} * coredns_dns_request_do_count_total{zone} * coredns_dns_request_type_count_total{zone, type} * coredns_dns_response_size_bytes{zone, proto} -* coredns_dns_response_transfer_size_bytes{zone, proto} * coredns_dns_response_rcode_count_total{zone, rcode} Each counter has a label `zone` which is the zonename used for the request/response. @@ -27,10 +25,7 @@ Extra labels used are: * The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response. If monitoring is enabled, queries that do not enter the middleware chain are exported under the fake -domain "dropped" (without a closing dot). - -Restarting CoreDNS will stop the monitoring. This is a bug. Also [this upstream -Caddy bug](https://github.com/mholt/caddy/issues/675). +name "dropped" (without a closing dot - this is never a valid domain name). ## Syntax @@ -44,3 +39,9 @@ It optionally takes an address to which the metrics are exported; the default is `localhost:9153`. The metrics path is fixed to `/metrics`. ## Examples + +Use an alternative address: + +~~~ +prometheus localhost:9253 +~~~ diff --git a/middleware/metrics/handler.go b/middleware/metrics/handler.go index a0247c517..4c235ab6a 100644 --- a/middleware/metrics/handler.go +++ b/middleware/metrics/handler.go @@ -1,9 +1,8 @@ package metrics import ( - "time" - "github.com/miekg/coredns/middleware" + "github.com/miekg/coredns/middleware/metrics/vars" "github.com/miekg/coredns/middleware/pkg/dnsrecorder" "github.com/miekg/coredns/middleware/pkg/rcode" "github.com/miekg/coredns/request" @@ -17,7 +16,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg state := request.Request{W: w, Req: r} qname := state.QName() - zone := middleware.Zones(m.ZoneNames).Matches(qname) + zone := middleware.Zones(m.ZoneNames()).Matches(qname) if zone == "" { zone = "." } @@ -26,71 +25,9 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg rw := dnsrecorder.New(w) status, err := m.Next.ServeDNS(ctx, rw, r) - Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start) + vars.Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start) return status, err } -// Report is a plain reporting function that the server can use for REFUSED and other -// queries that are turned down because they don't match any middleware. -func Report(req request.Request, zone, rcode string, size int, start time.Time) { - if requestCount == nil { - // no metrics are enabled - return - } - - // Proto and Family - net := req.Proto() - fam := "1" - if req.Family() == 2 { - fam = "2" - } - - typ := req.QType() - - requestCount.WithLabelValues(zone, net, fam).Inc() - requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond)) - - if req.Do() { - requestDo.WithLabelValues(zone).Inc() - } - - if _, known := monitorType[typ]; known { - requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc() - } else { - requestType.WithLabelValues(zone, other).Inc() - } - - if typ == dns.TypeIXFR || typ == dns.TypeAXFR { - responseTransferSize.WithLabelValues(zone, net).Observe(float64(size)) - requestTransferSize.WithLabelValues(zone, net).Observe(float64(req.Size())) - } else { - responseSize.WithLabelValues(zone, net).Observe(float64(size)) - requestSize.WithLabelValues(zone, net).Observe(float64(req.Size())) - } - - responseRcode.WithLabelValues(zone, rcode).Inc() -} - -var monitorType = map[uint16]bool{ - dns.TypeAAAA: true, - dns.TypeA: true, - dns.TypeCNAME: true, - dns.TypeDNSKEY: true, - dns.TypeDS: true, - dns.TypeMX: true, - dns.TypeNSEC3: true, - dns.TypeNSEC: true, - dns.TypeNS: true, - dns.TypePTR: true, - dns.TypeRRSIG: true, - dns.TypeSOA: true, - dns.TypeSRV: true, - dns.TypeTXT: true, - // Meta Qtypes - dns.TypeIXFR: true, - dns.TypeAXFR: true, - dns.TypeANY: true, -} - -const other = "other" +func (m *Metrics) Name() string { return "prometheus" } diff --git a/middleware/metrics/metrics.go b/middleware/metrics/metrics.go index b82a2be86..5f93ec2f5 100644 --- a/middleware/metrics/metrics.go +++ b/middleware/metrics/metrics.go @@ -1,5 +1,4 @@ -// Package metrics implement a handler and middleware that provides Prometheus -// metrics. +// Package metrics implement a handler and middleware that provides Prometheus metrics. package metrics import ( @@ -9,37 +8,51 @@ import ( "sync" "github.com/miekg/coredns/middleware" + "github.com/miekg/coredns/middleware/metrics/vars" "github.com/prometheus/client_golang/prometheus" ) -var ( - requestCount *prometheus.CounterVec - requestDuration *prometheus.HistogramVec - requestSize *prometheus.HistogramVec - requestTransferSize *prometheus.HistogramVec - requestDo *prometheus.CounterVec - requestType *prometheus.CounterVec - - responseSize *prometheus.HistogramVec - responseTransferSize *prometheus.HistogramVec - responseRcode *prometheus.CounterVec -) - // Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics type Metrics struct { - Next middleware.Handler - Addr string - ln net.Listener - mux *http.ServeMux - Once sync.Once - ZoneNames []string + Next middleware.Handler + Addr string + ln net.Listener + mux *http.ServeMux + Once sync.Once + + zoneNames []string + zoneMap map[string]bool + zoneMu sync.RWMutex +} + +// AddZone adds zone z to m. +func (m *Metrics) AddZone(z string) { + m.zoneMu.Lock() + m.zoneMap[z] = true + m.zoneNames = keys(m.zoneMap) + m.zoneMu.Unlock() +} + +// RemoveZone remove zone z from m. +func (m *Metrics) RemoveZone(z string) { + m.zoneMu.Lock() + delete(m.zoneMap, z) + m.zoneNames = keys(m.zoneMap) + m.zoneMu.Unlock() +} + +// ZoneNames returns the zones of m. +func (m *Metrics) ZoneNames() []string { + m.zoneMu.RLock() + s := m.zoneNames + m.zoneMu.RUnlock() + return s } // OnStartup sets up the metrics on startup. func (m *Metrics) OnStartup() error { m.Once.Do(func() { - define() ln, err := net.Listen("tcp", m.Addr) if err != nil { @@ -51,18 +64,16 @@ func (m *Metrics) OnStartup() error { m.mux = http.NewServeMux() - prometheus.MustRegister(requestCount) - prometheus.MustRegister(requestDuration) - prometheus.MustRegister(requestSize) - prometheus.MustRegister(requestTransferSize) - prometheus.MustRegister(requestDo) - prometheus.MustRegister(requestType) + prometheus.MustRegister(vars.RequestCount) + prometheus.MustRegister(vars.RequestDuration) + prometheus.MustRegister(vars.RequestSize) + prometheus.MustRegister(vars.RequestDo) + prometheus.MustRegister(vars.RequestType) - prometheus.MustRegister(responseSize) - prometheus.MustRegister(responseTransferSize) - prometheus.MustRegister(responseRcode) + prometheus.MustRegister(vars.ResponseSize) + prometheus.MustRegister(vars.ResponseRcode) - m.mux.Handle(path, prometheus.Handler()) + m.mux.Handle("/metrics", prometheus.Handler()) go func() { http.Serve(m.ln, m.mux) @@ -79,79 +90,10 @@ func (m *Metrics) OnShutdown() error { return nil } -func define() { - requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_count_total", - Help: "Counter of DNS requests made per zone, protocol and family.", - }, []string{"zone", "proto", "family"}) - - requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_duration_milliseconds", - Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...), - Help: "Histogram of the time (in milliseconds) each request took.", - }, []string{"zone"}) - - requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_size_bytes", - Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).", - Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) - - requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_transfer_size_bytes", - Help: "Size of the incoming zone transfer in bytes.", - Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) - - requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_do_count_total", - Help: "Counter of DNS requests with DO bit set per zone.", - }, []string{"zone"}) - - requestType = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_type_count_total", - Help: "Counter of DNS requests per type, per zone.", - }, []string{"zone", "type"}) - - responseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "response_size_bytes", - Help: "Size of the returned response in bytes.", - Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) - - responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "response_transfer_size_bytes", - Help: "Size of the returned zone transfer in bytes.", - Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) - - responseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "response_rcode_count_total", - Help: "Counter of response status codes.", - }, []string{"zone", "rcode"}) +func keys(m map[string]bool) []string { + sx := []string{} + for k := range m { + sx = append(sx, k) + } + return sx } - -const ( - // Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone. - Dropped = "dropped" - subsystem = "dns" - path = "/metrics" -) diff --git a/middleware/metrics/metrics_test.go b/middleware/metrics/metrics_test.go new file mode 100644 index 000000000..4272350f4 --- /dev/null +++ b/middleware/metrics/metrics_test.go @@ -0,0 +1,83 @@ +package metrics + +import ( + "testing" + + "github.com/miekg/coredns/middleware" + mtest "github.com/miekg/coredns/middleware/metrics/test" + "github.com/miekg/coredns/middleware/pkg/dnsrecorder" + "github.com/miekg/coredns/middleware/test" + + "github.com/miekg/dns" + "golang.org/x/net/context" +) + +func TestMetrics(t *testing.T) { + met := &Metrics{Addr: Addr, zoneMap: make(map[string]bool)} + if err := met.OnStartup(); err != nil { + t.Fatalf("Failed to start metrics handler: %s", err) + } + defer met.OnShutdown() + + met.AddZone("example.org.") + + tests := []struct { + next middleware.Handler + qname string + qtype uint16 + metric string + expectedValue string + }{ + // This all works because 1 bucket (1 zone, 1 type) + { + next: test.NextHandler(dns.RcodeSuccess, nil), + qname: "example.org", + metric: "coredns_dns_request_count_total", + expectedValue: "1", + }, + { + next: test.NextHandler(dns.RcodeSuccess, nil), + qname: "example.org", + metric: "coredns_dns_request_count_total", + expectedValue: "2", + }, + { + next: test.NextHandler(dns.RcodeSuccess, nil), + qname: "example.org", + metric: "coredns_dns_request_type_count_total", + expectedValue: "3", + }, + { + next: test.NextHandler(dns.RcodeSuccess, nil), + qname: "example.org", + metric: "coredns_dns_response_rcode_count_total", + expectedValue: "4", + }, + } + + ctx := context.TODO() + + for i, tc := range tests { + req := new(dns.Msg) + if tc.qtype == 0 { + tc.qtype = dns.TypeA + } + req.SetQuestion(dns.Fqdn(tc.qname), tc.qtype) + met.Next = tc.next + + rec := dnsrecorder.New(&test.ResponseWriter{}) + _, err := met.ServeDNS(ctx, rec, req) + if err != nil { + t.Fatalf("Test %d: Expected no error, but got %s", i, err) + } + + result := mtest.Scrape(t, "http://"+Addr+"/metrics") + + if tc.expectedValue != "" { + got, _ := mtest.MetricValue(tc.metric, result) + if got != tc.expectedValue { + t.Errorf("Test %d: Expected value %s for metrics %s, but got %s", i, tc.expectedValue, tc.metric, got) + } + } + } +} diff --git a/middleware/metrics/setup.go b/middleware/metrics/setup.go index 8c8dd1a75..93a6bf50a 100644 --- a/middleware/metrics/setup.go +++ b/middleware/metrics/setup.go @@ -38,18 +38,17 @@ func setup(c *caddy.Controller) error { func prometheusParse(c *caddy.Controller) (*Metrics, error) { var ( - met = &Metrics{Addr: addr} + met = &Metrics{Addr: Addr, zoneMap: make(map[string]bool)} err error ) for c.Next() { - if len(met.ZoneNames) > 0 { - return met, c.Err("metrics: can only have one metrics module per server") + if len(met.ZoneNames()) > 0 { + return met, c.Err("can only have one metrics module per server") } - met.ZoneNames = make([]string, len(c.ServerBlockKeys)) - copy(met.ZoneNames, c.ServerBlockKeys) - for i := range met.ZoneNames { - met.ZoneNames[i] = middleware.Host(met.ZoneNames[i]).Normalize() + + for _, z := range c.ServerBlockKeys { + met.AddZone(middleware.Host(z).Normalize()) } args := c.RemainingArgs() @@ -78,7 +77,7 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) { return met, e } default: - return met, c.Errf("metrics: unknown item: %s", c.Val()) + return met, c.Errf("unknown item: %s", c.Val()) } } @@ -88,4 +87,4 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) { var metricsOnce sync.Once -const addr = "localhost:9153" +const Addr = "localhost:9153" diff --git a/middleware/metrics/test/scrape.go b/middleware/metrics/test/scrape.go new file mode 100644 index 000000000..d64bef96f --- /dev/null +++ b/middleware/metrics/test/scrape.go @@ -0,0 +1,225 @@ +// Adapted by Miek Gieben for CoreDNS testing. +// +// License from prom2json +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package test will scrape a target and you can inspect the variables. +// Basic usage: +// +// result := Scrape("http://localhost:9153/metrics") +// v := MetricValue("coredns_cache_capacity_gauge", result) +// +package test + +import ( + "fmt" + "io" + "mime" + "net/http" + "testing" + + "github.com/matttproud/golang_protobuf_extensions/pbutil" + "github.com/prometheus/common/expfmt" + + dto "github.com/prometheus/client_model/go" +) + +type ( + // MetricFamily holds a prometheus metric. + MetricFamily struct { + Name string `json:"name"` + Help string `json:"help"` + Type string `json:"type"` + Metrics []interface{} `json:"metrics,omitempty"` // Either metric or summary. + } + + // metric is for all "single value" metrics. + metric struct { + Labels map[string]string `json:"labels,omitempty"` + Value string `json:"value"` + } + + summary struct { + Labels map[string]string `json:"labels,omitempty"` + Quantiles map[string]string `json:"quantiles,omitempty"` + Count string `json:"count"` + Sum string `json:"sum"` + } + + histogram struct { + Labels map[string]string `json:"labels,omitempty"` + Buckets map[string]string `json:"buckets,omitempty"` + Count string `json:"count"` + Sum string `json:"sum"` + } +) + +// Scrape returns the all the vars a []*metricFamily. +func Scrape(t *testing.T, url string) []*MetricFamily { + mfChan := make(chan *dto.MetricFamily, 1024) + + go fetchMetricFamilies(t, url, mfChan) + + result := []*MetricFamily{} + for mf := range mfChan { + result = append(result, newMetricFamily(mf)) + } + return result +} + +// MetricValue returns the value associated with name as a string as well as the labels. +// It only returns the first metrics of the slice. +func MetricValue(name string, mfs []*MetricFamily) (string, map[string]string) { + for _, mf := range mfs { + if mf.Name == name { + // Only works with Gauge and Counter... + return mf.Metrics[0].(metric).Value, mf.Metrics[0].(metric).Labels + } + } + return "", nil +} + +// MetricValueLabel returns the value for name *and* label *value*. +func MetricValueLabel(name, label string, mfs []*MetricFamily) (string, map[string]string) { + // bit hacky is this really handy...? + for _, mf := range mfs { + if mf.Name == name { + for _, m := range mf.Metrics { + for _, v := range m.(metric).Labels { + if v == label { + return m.(metric).Value, m.(metric).Labels + } + } + + } + } + } + return "", nil +} + +func newMetricFamily(dtoMF *dto.MetricFamily) *MetricFamily { + mf := &MetricFamily{ + Name: dtoMF.GetName(), + Help: dtoMF.GetHelp(), + Type: dtoMF.GetType().String(), + Metrics: make([]interface{}, len(dtoMF.Metric)), + } + for i, m := range dtoMF.Metric { + if dtoMF.GetType() == dto.MetricType_SUMMARY { + mf.Metrics[i] = summary{ + Labels: makeLabels(m), + Quantiles: makeQuantiles(m), + Count: fmt.Sprint(m.GetSummary().GetSampleCount()), + Sum: fmt.Sprint(m.GetSummary().GetSampleSum()), + } + } else if dtoMF.GetType() == dto.MetricType_HISTOGRAM { + mf.Metrics[i] = histogram{ + Labels: makeLabels(m), + Buckets: makeBuckets(m), + Count: fmt.Sprint(m.GetHistogram().GetSampleCount()), + Sum: fmt.Sprint(m.GetSummary().GetSampleSum()), + } + } else { + mf.Metrics[i] = metric{ + Labels: makeLabels(m), + Value: fmt.Sprint(value(m)), + } + } + } + return mf +} + +func value(m *dto.Metric) float64 { + if m.Gauge != nil { + return m.GetGauge().GetValue() + } + if m.Counter != nil { + return m.GetCounter().GetValue() + } + if m.Untyped != nil { + return m.GetUntyped().GetValue() + } + return 0. +} + +func makeLabels(m *dto.Metric) map[string]string { + result := map[string]string{} + for _, lp := range m.Label { + result[lp.GetName()] = lp.GetValue() + } + return result +} + +func makeQuantiles(m *dto.Metric) map[string]string { + result := map[string]string{} + for _, q := range m.GetSummary().Quantile { + result[fmt.Sprint(q.GetQuantile())] = fmt.Sprint(q.GetValue()) + } + return result +} + +func makeBuckets(m *dto.Metric) map[string]string { + result := map[string]string{} + for _, b := range m.GetHistogram().Bucket { + result[fmt.Sprint(b.GetUpperBound())] = fmt.Sprint(b.GetCumulativeCount()) + } + return result +} + +func fetchMetricFamilies(t *testing.T, url string, ch chan<- *dto.MetricFamily) { + defer close(ch) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + t.Fatalf("creating GET request for URL %q failed: %s", url, err) + } + req.Header.Add("Accept", acceptHeader) + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("executing GET request for URL %q failed: %s", url, err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("GET request for URL %q returned HTTP status %s", url, resp.Status) + } + + mediatype, params, err := mime.ParseMediaType(resp.Header.Get("Content-Type")) + if err == nil && mediatype == "application/vnd.google.protobuf" && + params["encoding"] == "delimited" && + params["proto"] == "io.prometheus.client.MetricFamily" { + for { + mf := &dto.MetricFamily{} + if _, err = pbutil.ReadDelimited(resp.Body, mf); err != nil { + if err == io.EOF { + break + } + t.Fatalf("reading metric family protocol buffer failed: %s", err) + } + ch <- mf + } + } else { + // We could do further content-type checks here, but the + // fallback for now will anyway be the text format + // version 0.0.4, so just go for it and see if it works. + var parser expfmt.TextParser + metricFamilies, err := parser.TextToMetricFamilies(resp.Body) + if err != nil { + t.Fatal("reading text format failed:", err) + } + for _, mf := range metricFamilies { + ch <- mf + } + } +} + +const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3` diff --git a/middleware/metrics/vars/report.go b/middleware/metrics/vars/report.go new file mode 100644 index 000000000..996826599 --- /dev/null +++ b/middleware/metrics/vars/report.go @@ -0,0 +1,62 @@ +package vars + +import ( + "time" + + "github.com/miekg/coredns/request" + + "github.com/miekg/dns" +) + +// Report reports the metrics data associcated with request. +func Report(req request.Request, zone, rcode string, size int, start time.Time) { + // Proto and Family + net := req.Proto() + fam := "1" + if req.Family() == 2 { + fam = "2" + } + + typ := req.QType() + + RequestCount.WithLabelValues(zone, net, fam).Inc() + RequestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond)) + + if req.Do() { + RequestDo.WithLabelValues(zone).Inc() + } + + if _, known := monitorType[typ]; known { + RequestType.WithLabelValues(zone, dns.Type(typ).String()).Inc() + } else { + RequestType.WithLabelValues(zone, other).Inc() + } + + ResponseSize.WithLabelValues(zone, net).Observe(float64(size)) + RequestSize.WithLabelValues(zone, net).Observe(float64(req.Size())) + + ResponseRcode.WithLabelValues(zone, rcode).Inc() +} + +var monitorType = map[uint16]bool{ + dns.TypeAAAA: true, + dns.TypeA: true, + dns.TypeCNAME: true, + dns.TypeDNSKEY: true, + dns.TypeDS: true, + dns.TypeMX: true, + dns.TypeNSEC3: true, + dns.TypeNSEC: true, + dns.TypeNS: true, + dns.TypePTR: true, + dns.TypeRRSIG: true, + dns.TypeSOA: true, + dns.TypeSRV: true, + dns.TypeTXT: true, + // Meta Qtypes + dns.TypeIXFR: true, + dns.TypeAXFR: true, + dns.TypeANY: true, +} + +const other = "other" diff --git a/middleware/metrics/vars/vars.go b/middleware/metrics/vars/vars.go new file mode 100644 index 000000000..935b6f810 --- /dev/null +++ b/middleware/metrics/vars/vars.go @@ -0,0 +1,68 @@ +package vars + +import ( + "github.com/miekg/coredns/middleware" + + "github.com/prometheus/client_golang/prometheus" +) + +var ( + RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_count_total", + Help: "Counter of DNS requests made per zone, protocol and family.", + }, []string{"zone", "proto", "family"}) + + RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_duration_milliseconds", + Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...), + Help: "Histogram of the time (in milliseconds) each request took.", + }, []string{"zone"}) + + RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_size_bytes", + Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).", + Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, + }, []string{"zone", "proto"}) + + RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_do_count_total", + Help: "Counter of DNS requests with DO bit set per zone.", + }, []string{"zone"}) + + RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_type_count_total", + Help: "Counter of DNS requests per type, per zone.", + }, []string{"zone", "type"}) + + ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "response_size_bytes", + Help: "Size of the returned response in bytes.", + Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, + }, []string{"zone", "proto"}) + + ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "response_rcode_count_total", + Help: "Counter of response status codes.", + }, []string{"zone", "rcode"}) +) + +const ( + subsystem = "dns" + + // Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone. + Dropped = "dropped" +) diff --git a/middleware/middleware.go b/middleware/middleware.go index 804209fa9..bdc8bc2fe 100644 --- a/middleware/middleware.go +++ b/middleware/middleware.go @@ -45,6 +45,7 @@ type ( // chain by returning them unchanged. Handler interface { ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error) + Name() string } // HandlerFunc is a convenience type like dns.HandlerFunc, except @@ -58,6 +59,8 @@ func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns. return f(ctx, w, r) } +func (f HandlerFunc) Name() string { return "handlerfunc" } + // Error returns err with 'middleware/name: ' prefixed to it. func Error(name string, err error) error { return fmt.Errorf("%s/%s: %s", "middleware", name, err) } diff --git a/middleware/normalize.go b/middleware/normalize.go index 2d153ba78..87f3ce703 100644 --- a/middleware/normalize.go +++ b/middleware/normalize.go @@ -51,7 +51,7 @@ func (n Name) Normalize() string { return strings.ToLower(dns.Fqdn(string(n))) } type ( // Host represents a host from the Corefile, may contain port. Host string // Host represents a host from the Corefile, may contain port. - // Addr resprents an address in the Corefile. + // Addr represents an address in the Corefile. Addr string // Addr resprents an address in the Corefile. ) diff --git a/middleware/proxy/proxy.go b/middleware/proxy/proxy.go index a33024f12..1ff0a2f30 100644 --- a/middleware/proxy/proxy.go +++ b/middleware/proxy/proxy.go @@ -102,5 +102,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( return p.Next.ServeDNS(ctx, w, r) } +func (p Proxy) Name() string { return "proxy" } + // defaultTimeout is the default networking timeout for DNS requests. const defaultTimeout = 5 * time.Second diff --git a/middleware/rewrite/rewrite.go b/middleware/rewrite/rewrite.go index c88fa3549..2498d4c34 100644 --- a/middleware/rewrite/rewrite.go +++ b/middleware/rewrite/rewrite.go @@ -52,6 +52,8 @@ func (rw Rewrite) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg return rw.Next.ServeDNS(ctx, w, r) } +func (rw Rewrite) Name() string { return "rewrite" } + // Rule describes an internal location rewrite rule. type Rule interface { // Rewrite rewrites the internal location of the current request. diff --git a/middleware/test/helpers.go b/middleware/test/helpers.go index 157bdce66..0137171a7 100644 --- a/middleware/test/helpers.go +++ b/middleware/test/helpers.go @@ -283,6 +283,7 @@ type ( // Handler interface defines a middleware. Handler interface { ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error) + Name() string } ) @@ -290,3 +291,5 @@ type ( func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { return f(ctx, w, r) } + +func (f HandlerFunc) Name() string { return "handlerfunc" } diff --git a/middleware/whoami/whoami.go b/middleware/whoami/whoami.go index ccf8aefe1..0ef63325f 100644 --- a/middleware/whoami/whoami.go +++ b/middleware/whoami/whoami.go @@ -55,3 +55,5 @@ func (wh Whoami) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) return 0, nil } + +func (wh Whoami) Name() string { return "whoami" } diff --git a/test/metrics_test.go b/test/metrics_test.go index 85cb2a824..d122f2545 100644 --- a/test/metrics_test.go +++ b/test/metrics_test.go @@ -1,12 +1,84 @@ package test -import "testing" +import ( + "io/ioutil" + "log" + "os" + "path" + "testing" + "time" + + "github.com/miekg/coredns/middleware/cache" + "github.com/miekg/coredns/middleware/metrics" + mtest "github.com/miekg/coredns/middleware/metrics/test" + "github.com/miekg/coredns/middleware/metrics/vars" + + "github.com/miekg/dns" +) // Start test server that has metrics enabled. Then tear it down again. func TestMetricsServer(t *testing.T) { - corefile := `.:0 { + corefile := `example.org:0 { chaos CoreDNS-001 miek@miek.nl - prometheus localhost:0 + prometheus +} + +example.com:0 { + proxy . 8.8.4.4:53 + prometheus +} +` + srv, err := CoreDNSServer(corefile) + if err != nil { + t.Fatalf("Could not get CoreDNS serving instance: %s", err) + } + defer srv.Stop() +} + +func TestMetricsRefused(t *testing.T) { + metricName := "coredns_dns_response_rcode_count_total" + + corefile := `example.org:0 { + proxy . 8.8.8.8:53 + prometheus +} +` + srv, err := CoreDNSServer(corefile) + if err != nil { + t.Fatalf("Could not get CoreDNS serving instance: %s", err) + } + defer srv.Stop() + + udp, _ := CoreDNSServerPorts(srv, 0) + + m := new(dns.Msg) + m.SetQuestion("google.com.", dns.TypeA) + + if _, err = dns.Exchange(m, udp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics") + got, labels := mtest.MetricValue(metricName, data) + + if got != "1" { + t.Errorf("Expected value %s for refused, but got %s", "1", got) + } + if labels["zone"] != vars.Dropped { + t.Errorf("Expected zone value %s for refused, but got %s", vars.Dropped, labels["zone"]) + } + if labels["rcode"] != "REFUSED" { + t.Errorf("Expected zone value %s for refused, but got %s", "REFUSED", labels["rcode"]) + } +} + +func TestMetricsCache(t *testing.T) { + metricName := "coredns_cache_size_guage" + + corefile := `example.net:0 { + proxy . 8.8.8.8:53 + prometheus + cache } ` srv, err := CoreDNSServer(corefile) @@ -14,4 +86,88 @@ func TestMetricsServer(t *testing.T) { t.Fatalf("Could not get CoreDNS serving instance: %s", err) } defer srv.Stop() + + udp, _ := CoreDNSServerPorts(srv, 0) + + m := new(dns.Msg) + m.SetQuestion("www.example.net.", dns.TypeA) + + if _, err = dns.Exchange(m, udp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics") + // Get the value for the metrics where the one of the labels values matches "success" + got, _ := mtest.MetricValueLabel(metricName, cache.Success, data) + + if got != "1" { + t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got) + } +} + +func TestMetricsAuto(t *testing.T) { + tmpdir, err := ioutil.TempDir(os.TempDir(), "coredns") + if err != nil { + t.Fatal(err) + } + + // TODO(miek): Random port as string and use that later? + corefile := `org:0 { + auto { + directory ` + tmpdir + ` db\.(.*) {1} 1 + } + prometheus + } +` + + i, err := CoreDNSServer(corefile) + if err != nil { + t.Fatalf("Could not get CoreDNS serving instance: %s", err) + } + + udp, _ := CoreDNSServerPorts(i, 0) + if udp == "" { + t.Fatalf("Could not get UDP listening port") + } + defer i.Stop() + + log.SetOutput(ioutil.Discard) + + // Write db.example.org to get example.org. + if err = ioutil.WriteFile(path.Join(tmpdir, "db.example.org"), []byte(zoneContent), 0644); err != nil { + t.Fatal(err) + } + // TODO(miek): make the auto sleep even less. + time.Sleep(1100 * time.Millisecond) // wait for it to be picked up + + m := new(dns.Msg) + m.SetQuestion("www.example.org.", dns.TypeA) + + if _, err := dns.Exchange(m, udp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + metricName := "coredns_dns_request_count_total" //{zone, proto, family} + + data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics") + // Get the value for the metrics where the one of the labels values matches "example.org." + got, _ := mtest.MetricValueLabel(metricName, "example.org.", data) + + if got != "1" { + t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got) + } + + // Remove db.example.org again. And see if the metric stops increasing. + os.Remove(path.Join(tmpdir, "db.example.org")) + time.Sleep(1100 * time.Millisecond) // wait for it to be picked up + if _, err := dns.Exchange(m, udp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + data = mtest.Scrape(t, "http://"+metrics.Addr+"/metrics") + got, _ = mtest.MetricValueLabel(metricName, "example.org.", data) + + if got != "1" { + t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got) + } } |