diff options
39 files changed, 828 insertions, 259 deletions
diff --git a/core/dnsserver/config.go b/core/dnsserver/config.go index 20ff8389a..123ab70ce 100644 --- a/core/dnsserver/config.go +++ b/core/dnsserver/config.go @@ -41,3 +41,19 @@ func GetConfig(c *caddy.Controller) *Config { ctx.saveConfig(c.Key, &Config{}) return GetConfig(c) } + +// GetMiddleware returns the middleware handler that has been added to the config under name. +// This is useful to inspect if a certain middleware is active in this server. +// Note that this is order dependent and the order is defined in directives.go, i.e. if your middleware +// comes before the middleware you are checking; it will not be there (yet). +func GetMiddleware(c *caddy.Controller, name string) middleware.Handler { + // TODO(miek): calling the handler h(nil) should be a noop... + conf := GetConfig(c) + for _, h := range conf.Middleware { + x := h(nil) + if name == x.Name() { + return x + } + } + return nil +} diff --git a/core/dnsserver/server.go b/core/dnsserver/server.go index aa94dcd8c..cc078d0d8 100644 --- a/core/dnsserver/server.go +++ b/core/dnsserver/server.go @@ -10,7 +10,9 @@ import ( "time" "github.com/miekg/coredns/middleware" + "github.com/miekg/coredns/middleware/metrics/vars" "github.com/miekg/coredns/middleware/pkg/edns" + "github.com/miekg/coredns/middleware/pkg/rcode" "github.com/miekg/coredns/request" "github.com/miekg/dns" @@ -247,14 +249,16 @@ func (s *Server) OnStartupComplete() { } // DefaultErrorFunc responds to an DNS request with an error. -func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rcode int) { +func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) { state := request.Request{W: w, Req: r} answer := new(dns.Msg) - answer.SetRcode(r, rcode) + answer.SetRcode(r, rc) state.SizeAndDo(answer) + vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) + w.WriteMsg(answer) } diff --git a/middleware.md b/middleware.md index acd69f497..98a6adf89 100644 --- a/middleware.md +++ b/middleware.md @@ -36,7 +36,8 @@ TODO(miek): text here on how to hook up middleware. ## Metrics When exporting metrics the *Namespace* should be `middleware.Namespace` (="coredns"), and the -*Subsystem* should be the name of the middleware. +*Subsystem* should be the name of the middleware. The README.md for the middleware should then +also contain a *Metrics* section detailing the metrics. ## Documentation diff --git a/middleware/auto/auto.go b/middleware/auto/auto.go index 7721c194e..65e784459 100644 --- a/middleware/auto/auto.go +++ b/middleware/auto/auto.go @@ -8,6 +8,7 @@ import ( "github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware/file" + "github.com/miekg/coredns/middleware/metrics" "github.com/miekg/coredns/request" "github.com/miekg/dns" @@ -20,6 +21,7 @@ type ( Next middleware.Handler *Zones + metrics *metrics.Metrics loader } @@ -97,3 +99,5 @@ func (a Auto) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i w.WriteMsg(m) return dns.RcodeSuccess, nil } + +func (a Auto) Name() string { return "auto" } diff --git a/middleware/auto/setup.go b/middleware/auto/setup.go index 8c56f90a0..a5e11186a 100644 --- a/middleware/auto/setup.go +++ b/middleware/auto/setup.go @@ -11,6 +11,7 @@ import ( "github.com/miekg/coredns/core/dnsserver" "github.com/miekg/coredns/middleware" "github.com/miekg/coredns/middleware/file" + "github.com/miekg/coredns/middleware/metrics" "github.com/mholt/caddy" ) @@ -28,10 +29,16 @@ func setup(c *caddy.Controller) error { return middleware.Error("auto", err) } + // If we have enabled prometheus we should add newly discovered zones to it. + met := dnsserver.GetMiddleware(c, "prometheus") + if met != nil { + a.metrics = met.(*metrics.Metrics) + } + walkChan := make(chan bool) c.OnStartup(func() error { - err := a.Zones.Walk(a.loader) + err := a.Walk() if err != nil { return err } @@ -43,7 +50,7 @@ func setup(c *caddy.Controller) error { case <-walkChan: return case <-ticker.C: - a.Zones.Walk(a.loader) + a.Walk() } } }() diff --git a/middleware/auto/walk.go b/middleware/auto/walk.go index 4259d7f17..8c3370e16 100644 --- a/middleware/auto/walk.go +++ b/middleware/auto/walk.go @@ -13,26 +13,26 @@ import ( ) // Walk will recursively walk of the file under l.directory and adds the one that match l.re. -func (z *Zones) Walk(l loader) error { +func (a Auto) Walk() error { // TODO(miek): should add something so that we don't stomp on each other. toDelete := make(map[string]bool) - for _, n := range z.Names() { + for _, n := range a.Zones.Names() { toDelete[n] = true } - filepath.Walk(l.directory, func(path string, info os.FileInfo, err error) error { + filepath.Walk(a.loader.directory, func(path string, info os.FileInfo, err error) error { if info.IsDir() { return nil } - match, origin := matches(l.re, info.Name(), l.template) + match, origin := matches(a.loader.re, info.Name(), a.loader.template) if !match { return nil } - if _, ok := z.Z[origin]; ok { + if _, ok := a.Zones.Z[origin]; ok { // we already have this zone toDelete[origin] = false return nil @@ -50,10 +50,14 @@ func (z *Zones) Walk(l loader) error { return nil } - zo.NoReload = l.noReload - zo.TransferTo = l.transferTo + zo.NoReload = a.loader.noReload + zo.TransferTo = a.loader.transferTo - z.Insert(zo, origin) + a.Zones.Add(zo, origin) + + if a.metrics != nil { + a.metrics.AddZone(origin) + } zo.Notify() @@ -68,7 +72,13 @@ func (z *Zones) Walk(l loader) error { if !ok { continue } - z.Delete(origin) + + if a.metrics != nil { + a.metrics.RemoveZone(origin) + } + + a.Zones.Remove(origin) + log.Printf("[INFO] Deleting zone `%s'", origin) } diff --git a/middleware/auto/walk_test.go b/middleware/auto/walk_test.go index cc420d5b6..f15548dba 100644 --- a/middleware/auto/walk_test.go +++ b/middleware/auto/walk_test.go @@ -37,13 +37,16 @@ func TestWalk(t *testing.T) { template: `${1}`, } - z := &Zones{} + a := Auto{ + loader: ldr, + Zones: &Zones{}, + } - z.Walk(ldr) + a.Walk() // db.example.org and db.example.com should be here (created in createFiles) for _, name := range []string{"example.com.", "example.org."} { - if _, ok := z.Z[name]; !ok { + if _, ok := a.Zones.Z[name]; !ok { t.Errorf("%s should have been added", name) } } diff --git a/middleware/auto/watcher_test.go b/middleware/auto/watcher_test.go index 751c78c0d..a8f219fa1 100644 --- a/middleware/auto/watcher_test.go +++ b/middleware/auto/watcher_test.go @@ -27,15 +27,18 @@ func TestWatcher(t *testing.T) { template: `${1}`, } - z := &Zones{} + a := Auto{ + loader: ldr, + Zones: &Zones{}, + } - z.Walk(ldr) + a.Walk() // example.org and example.com should exist - if x := len(z.Z["example.org."].All()); x != 4 { + if x := len(a.Zones.Z["example.org."].All()); x != 4 { t.Fatalf("expected 4 RRs, got %d", x) } - if x := len(z.Z["example.com."].All()); x != 4 { + if x := len(a.Zones.Z["example.com."].All()); x != 4 { t.Fatalf("expected 4 RRs, got %d", x) } @@ -44,5 +47,6 @@ func TestWatcher(t *testing.T) { t.Fatal(err) } - z.Walk(ldr) + a.Walk() + // TODO(miek): check } diff --git a/middleware/auto/zone.go b/middleware/auto/zone.go index 4c950b908..f825871f7 100644 --- a/middleware/auto/zone.go +++ b/middleware/auto/zone.go @@ -40,9 +40,9 @@ func (z *Zones) Zones(name string) *file.Zone { return zo } -// Insert inserts a new zone into z. If zo.NoReload is false, the +// Add adds a new zone into z. If zo.NoReload is false, the // reload goroutine is started. -func (z *Zones) Insert(zo *file.Zone, name string) { +func (z *Zones) Add(zo *file.Zone, name string) { z.Lock() if z.Z == nil { @@ -51,14 +51,13 @@ func (z *Zones) Insert(zo *file.Zone, name string) { z.Z[name] = zo z.names = append(z.names, name) - zo.Reload() z.Unlock() } -// Delete removes the zone named name from z. It also stop the the zone's reload goroutine. -func (z *Zones) Delete(name string) { +// Remove removes the zone named name from z. It also stop the the zone's reload goroutine. +func (z *Zones) Remove(name string) { z.Lock() if zo, ok := z.Z[name]; ok && !zo.NoReload { @@ -67,10 +66,11 @@ func (z *Zones) Delete(name string) { delete(z.Z, name) - // just regenerate Names (might be bad if you have a lot of zones...) + // TODO(miek): just regenerate Names (might be bad if you have a lot of zones...) z.names = []string{} for n := range z.Z { z.names = append(z.names, n) } + z.Unlock() } diff --git a/middleware/cache/README.md b/middleware/cache/README.md index 55ad8848a..3a4dea43e 100644 --- a/middleware/cache/README.md +++ b/middleware/cache/README.md @@ -35,24 +35,24 @@ There is a third category (`error`) but those responses are never cached. The minimum TTL allowed on resource records is 5 seconds. -If monitoring is enabled (via the *prometheus* directive) then the following extra metrics are added: +## Metrics -* coredns_cache_hit_count_total, and -* coredns_cache_miss_count_total +If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported: -They both work on a per-zone basis and just count the hit and miss counts for each query. +* coredns_cache_size_guage{type} - total elements in the case, type is either "denial" or "success". +* coredns_cache_capacity_guage{type} - total capacity of the cache, type is either "denial" or "success". ## Examples +Enable caching for all zones, but cap everything to a TTL of 10 seconds: + ~~~ cache 10 ~~~ -Enable caching for all zones, but cap everything to a TTL of 10 seconds. +Proxy to Google Public DNS and only cache responses for example.org (or below). ~~~ proxy . 8.8.8.8:53 cache example.org ~~~ - -Proxy to Google Public DNS and only cache responses for example.org (or below). diff --git a/middleware/cache/cache.go b/middleware/cache/cache.go index 6254349c3..d17253c38 100644 --- a/middleware/cache/cache.go +++ b/middleware/cache/cache.go @@ -79,6 +79,9 @@ func (c *ResponseWriter) WriteMsg(res *dns.Msg) error { if key != "" { c.set(res, key, mt, duration) + + cacheSize.WithLabelValues(Success).Set(float64(c.pcache.Len())) + cacheSize.WithLabelValues(Denial).Set(float64(c.ncache.Len())) } setMsgTTL(res, uint32(duration.Seconds())) @@ -103,7 +106,6 @@ func (c *ResponseWriter) set(m *dns.Msg, key string, mt response.Type, duration case response.OtherError: // don't cache these - // TODO(miek): what do we do with these? default: log.Printf("[WARNING] Caching called with unknown classification: %d", mt) } @@ -122,4 +124,9 @@ const ( minTTL = 5 * time.Second defaultCap = 10000 // default capacity of the cache. + + // Success is the class for caching postive caching. + Success = "success" + // Denial is the class defined for negative caching. + Denial = "denial" ) diff --git a/middleware/cache/handler.go b/middleware/cache/handler.go index e307b0b79..95b51970e 100644 --- a/middleware/cache/handler.go +++ b/middleware/cache/handler.go @@ -30,17 +30,15 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) state.SizeAndDo(resp) w.WriteMsg(resp) - cacheHitCount.WithLabelValues(zone).Inc() - return dns.RcodeSuccess, nil } - cacheMissCount.WithLabelValues(zone).Inc() - crr := &ResponseWriter{w, c} return c.Next.ServeDNS(ctx, crr, r) } +func (c *Cache) Name() string { return "cache" } + func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) { k := rawKey(qname, qtype, do) @@ -55,24 +53,24 @@ func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) { } var ( - cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, - Name: "hit_count_total", - Help: "Counter of DNS requests that were found in the cache.", - }, []string{"zone"}) + Name: "size_guage", + Help: "Gauge of number of elements in the cache.", + }, []string{"type"}) - cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, - Name: "miss_count_total", - Help: "Counter of DNS requests that were not found in the cache.", - }, []string{"zone"}) + Name: "capacity_gauge", + Help: "Gauge of cache's capacity.", + }, []string{"type"}) ) const subsystem = "cache" func init() { - prometheus.MustRegister(cacheHitCount) - prometheus.MustRegister(cacheMissCount) + prometheus.MustRegister(cacheSize) + prometheus.MustRegister(cacheCapacity) } diff --git a/middleware/cache/setup.go b/middleware/cache/setup.go index 08c8fefdb..11a35ddc4 100644 --- a/middleware/cache/setup.go +++ b/middleware/cache/setup.go @@ -28,6 +28,10 @@ func setup(c *caddy.Controller) error { return ca }) + // Export the capacity for the metrics. This only happens once, because this is a re-load change only. + cacheCapacity.WithLabelValues(Success).Set(float64(ca.pcap)) + cacheCapacity.WithLabelValues(Denial).Set(float64(ca.ncap)) + return nil } @@ -58,7 +62,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) { for c.NextBlock() { switch c.Val() { // first number is cap, second is an new ttl - case "success": + case Success: args := c.RemainingArgs() if len(args) == 0 { return nil, c.ArgErr() @@ -75,7 +79,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) { } ca.pttl = time.Duration(pttl) * time.Second } - case "denial": + case Denial: args := c.RemainingArgs() if len(args) == 0 { return nil, c.ArgErr() diff --git a/middleware/chaos/chaos.go b/middleware/chaos/chaos.go index 6d83f5dda..39ebe7f49 100644 --- a/middleware/chaos/chaos.go +++ b/middleware/chaos/chaos.go @@ -51,6 +51,8 @@ func (c Chaos) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( return 0, nil } +func (c Chaos) Name() string { return "chaos" } + func trim(s string) string { if len(s) < 256 { return s diff --git a/middleware/dnssec/README.md b/middleware/dnssec/README.md index b87b25b1c..5daebc7f6 100644 --- a/middleware/dnssec/README.md +++ b/middleware/dnssec/README.md @@ -34,9 +34,14 @@ dnssec [ZONES... ] { will be signed with all keys. Generating a key can be done with `dnssec-keygen`: `dnssec-keygen -a ECDSAP256SHA256 <zonename>`. A key created for zone *A* can be safely used for zone *B*. - * `cache_capacity` indicates the capacity of the LRU cache. The dnssec middleware uses LRU cache to manage objects and the default capacity is 10000. +## Metrics + +If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported: + +* coredns_dnssec_size_guage{type} - total elements in the cache, type is "signature". +* coredns_dnssec_capacity_guage{type} - total capacity of the cache, type is "signature". ## Examples diff --git a/middleware/dnssec/handler.go b/middleware/dnssec/handler.go index aaed62bdf..a99588397 100644 --- a/middleware/dnssec/handler.go +++ b/middleware/dnssec/handler.go @@ -40,24 +40,26 @@ func (d Dnssec) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) } var ( - cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, - Name: "hit_count_total", - Help: "Counter of signatures that were found in the cache.", - }, []string{"zone"}) + Name: "size_guage", + Help: "Gauge of number of elements in the cache.", + }, []string{"type"}) - cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: middleware.Namespace, Subsystem: subsystem, - Name: "miss_count_total", - Help: "Counter of signatures that were not found in the cache.", - }, []string{"zone"}) + Name: "capacity_gauge", + Help: "Gauge of cache's capacity.", + }, []string{"type"}) ) +func (d Dnssec) Name() string { return "dnssec" } + const subsystem = "dnssec" func init() { - prometheus.MustRegister(cacheHitCount) - prometheus.MustRegister(cacheMissCount) + prometheus.MustRegister(cacheSize) + prometheus.MustRegister(cacheCapacity) } diff --git a/middleware/dnssec/responsewriter.go b/middleware/dnssec/responsewriter.go index 547561fb9..e7af62a9e 100644 --- a/middleware/dnssec/responsewriter.go +++ b/middleware/dnssec/responsewriter.go @@ -30,6 +30,8 @@ func (d *ResponseWriter) WriteMsg(res *dns.Msg) error { if state.Do() { res = d.d.Sign(state, zone, time.Now().UTC()) + + cacheSize.WithLabelValues("signature").Set(float64(d.d.cache.Len())) } state.SizeAndDo(res) diff --git a/middleware/dnssec/setup.go b/middleware/dnssec/setup.go index 19a68a853..639303ea9 100644 --- a/middleware/dnssec/setup.go +++ b/middleware/dnssec/setup.go @@ -32,6 +32,9 @@ func setup(c *caddy.Controller) error { return New(zones, keys, next, cache) }) + // Export the capacity for the metrics. This only happens once, because this is a re-load change only. + cacheCapacity.WithLabelValues("signature").Set(float64(capacity)) + return nil } diff --git a/middleware/errors/errors.go b/middleware/errors/errors.go index afaa8cf4d..aca05b54a 100644 --- a/middleware/errors/errors.go +++ b/middleware/errors/errors.go @@ -48,6 +48,8 @@ func (h errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns return rcode, err } +func (h errorHandler) Name() string { return "errors" } + func (h errorHandler) recovery(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) { rec := recover() if rec == nil { diff --git a/middleware/etcd/handler.go b/middleware/etcd/handler.go index 75aa582bd..7c856bd8b 100644 --- a/middleware/etcd/handler.go +++ b/middleware/etcd/handler.go @@ -117,6 +117,8 @@ func (e *Etcd) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( return dns.RcodeSuccess, nil } +func (e *Etcd) Name() string { return "etcd" } + // Err write an error response to the client. func (e *Etcd) Err(zone string, rcode int, state request.Request, debug []msg.Service, err error, opt Options) (int, error) { m := new(dns.Msg) diff --git a/middleware/file/file.go b/middleware/file/file.go index 90d15af79..7c6a65ced 100644 --- a/middleware/file/file.go +++ b/middleware/file/file.go @@ -110,6 +110,8 @@ func (f File) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i return dns.RcodeSuccess, nil } +func (f File) Name() string { return "file" } + // Parse parses the zone in filename and returns a new Zone or an error. func Parse(f io.Reader, origin, fileName string) (*Zone, error) { tokens := dns.ParseZone(f, dns.Fqdn(origin), fileName) diff --git a/middleware/kubernetes/handler.go b/middleware/kubernetes/handler.go index 1b1abf44b..cc59a305f 100644 --- a/middleware/kubernetes/handler.go +++ b/middleware/kubernetes/handler.go @@ -101,6 +101,8 @@ func (k Kubernetes) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.M return dns.RcodeSuccess, nil } +func (k Kubernetes) Name() string { return "kubernetes" } + // Err writes an error response back to the client. func (k Kubernetes) Err(zone string, rcode int, state request.Request) (int, error) { m := new(dns.Msg) diff --git a/middleware/loadbalance/handler.go b/middleware/loadbalance/handler.go index 151eb57d0..8cba67e6f 100644 --- a/middleware/loadbalance/handler.go +++ b/middleware/loadbalance/handler.go @@ -18,3 +18,5 @@ func (rr RoundRobin) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns. wrr := &RoundRobinResponseWriter{w} return rr.Next.ServeDNS(ctx, wrr, r) } + +func (rr RoundRobin) Name() string { return "loadbalance" } diff --git a/middleware/log/log.go b/middleware/log/log.go index dba76341e..0a3cca130 100644 --- a/middleware/log/log.go +++ b/middleware/log/log.go @@ -6,7 +6,7 @@ import ( "time" "github.com/miekg/coredns/middleware" - "github.com/miekg/coredns/middleware/metrics" + "github.com/miekg/coredns/middleware/metrics/vars" "github.com/miekg/coredns/middleware/pkg/dnsrecorder" "github.com/miekg/coredns/middleware/pkg/rcode" "github.com/miekg/coredns/middleware/pkg/replacer" @@ -45,7 +45,7 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) answer.SetRcode(r, rc) state.SizeAndDo(answer) - metrics.Report(state, metrics.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) + vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now()) w.WriteMsg(answer) } @@ -64,6 +64,8 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) return l.Next.ServeDNS(ctx, w, r) } +func (l Logger) Name() string { return "log" } + // Rule configures the logging middleware. type Rule struct { NameScope string diff --git a/middleware/metrics/README.md b/middleware/metrics/README.md index fb0e6d3aa..46dd34296 100644 --- a/middleware/metrics/README.md +++ b/middleware/metrics/README.md @@ -7,12 +7,10 @@ The following metrics are exported: * coredns_dns_request_count_total{zone, proto, family} * coredns_dns_request_duration_milliseconds{zone} -* coredns_dns_request_size_bytes{zone,, proto} -* coredns_dns_request_transfer_size_bytes{zone,, proto} +* coredns_dns_request_size_bytes{zone, proto} * coredns_dns_request_do_count_total{zone} * coredns_dns_request_type_count_total{zone, type} * coredns_dns_response_size_bytes{zone, proto} -* coredns_dns_response_transfer_size_bytes{zone, proto} * coredns_dns_response_rcode_count_total{zone, rcode} Each counter has a label `zone` which is the zonename used for the request/response. @@ -27,10 +25,7 @@ Extra labels used are: * The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response. If monitoring is enabled, queries that do not enter the middleware chain are exported under the fake -domain "dropped" (without a closing dot). - -Restarting CoreDNS will stop the monitoring. This is a bug. Also [this upstream -Caddy bug](https://github.com/mholt/caddy/issues/675). +name "dropped" (without a closing dot - this is never a valid domain name). ## Syntax @@ -44,3 +39,9 @@ It optionally takes an address to which the metrics are exported; the default is `localhost:9153`. The metrics path is fixed to `/metrics`. ## Examples + +Use an alternative address: + +~~~ +prometheus localhost:9253 +~~~ diff --git a/middleware/metrics/handler.go b/middleware/metrics/handler.go index a0247c517..4c235ab6a 100644 --- a/middleware/metrics/handler.go +++ b/middleware/metrics/handler.go @@ -1,9 +1,8 @@ package metrics import ( - "time" - "github.com/miekg/coredns/middleware" + "github.com/miekg/coredns/middleware/metrics/vars" "github.com/miekg/coredns/middleware/pkg/dnsrecorder" "github.com/miekg/coredns/middleware/pkg/rcode" "github.com/miekg/coredns/request" @@ -17,7 +16,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg state := request.Request{W: w, Req: r} qname := state.QName() - zone := middleware.Zones(m.ZoneNames).Matches(qname) + zone := middleware.Zones(m.ZoneNames()).Matches(qname) if zone == "" { zone = "." } @@ -26,71 +25,9 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg rw := dnsrecorder.New(w) status, err := m.Next.ServeDNS(ctx, rw, r) - Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start) + vars.Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start) return status, err } -// Report is a plain reporting function that the server can use for REFUSED and other -// queries that are turned down because they don't match any middleware. -func Report(req request.Request, zone, rcode string, size int, start time.Time) { - if requestCount == nil { - // no metrics are enabled - return - } - - // Proto and Family - net := req.Proto() - fam := "1" - if req.Family() == 2 { - fam = "2" - } - - typ := req.QType() - - requestCount.WithLabelValues(zone, net, fam).Inc() - requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond)) - - if req.Do() { - requestDo.WithLabelValues(zone).Inc() - } - - if _, known := monitorType[typ]; known { - requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc() - } else { - requestType.WithLabelValues(zone, other).Inc() - } - - if typ == dns.TypeIXFR || typ == dns.TypeAXFR { - responseTransferSize.WithLabelValues(zone, net).Observe(float64(size)) - requestTransferSize.WithLabelValues(zone, net).Observe(float64(req.Size())) - } else { - responseSize.WithLabelValues(zone, net).Observe(float64(size)) - requestSize.WithLabelValues(zone, net).Observe(float64(req.Size())) - } - - responseRcode.WithLabelValues(zone, rcode).Inc() -} - -var monitorType = map[uint16]bool{ - dns.TypeAAAA: true, - dns.TypeA: true, - dns.TypeCNAME: true, - dns.TypeDNSKEY: true, - dns.TypeDS: true, - dns.TypeMX: true, - dns.TypeNSEC3: true, - dns.TypeNSEC: true, - dns.TypeNS: true, - dns.TypePTR: true, - dns.TypeRRSIG: true, - dns.TypeSOA: true, - dns.TypeSRV: true, - dns.TypeTXT: true, - // Meta Qtypes - dns.TypeIXFR: true, - dns.TypeAXFR: true, - dns.TypeANY: true, -} - -const other = "other" +func (m *Metrics) Name() string { return "prometheus" } diff --git a/middleware/metrics/metrics.go b/middleware/metrics/metrics.go index b82a2be86..5f93ec2f5 100644 --- a/middleware/metrics/metrics.go +++ b/middleware/metrics/metrics.go @@ -1,5 +1,4 @@ -// Package metrics implement a handler and middleware that provides Prometheus -// metrics. +// Package metrics implement a handler and middleware that provides Prometheus metrics. package metrics import ( @@ -9,37 +8,51 @@ import ( "sync" "github.com/miekg/coredns/middleware" + "github.com/miekg/coredns/middleware/metrics/vars" "github.com/prometheus/client_golang/prometheus" ) -var ( - requestCount *prometheus.CounterVec - requestDuration *prometheus.HistogramVec - requestSize *prometheus.HistogramVec - requestTransferSize *prometheus.HistogramVec - requestDo *prometheus.CounterVec - requestType *prometheus.CounterVec - - responseSize *prometheus.HistogramVec - responseTransferSize *prometheus.HistogramVec - responseRcode *prometheus.CounterVec -) - // Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics type Metrics struct { - Next middleware.Handler - Addr string - ln net.Listener - mux *http.ServeMux - Once sync.Once - ZoneNames []string + Next middleware.Handler + Addr string + ln net.Listener + mux *http.ServeMux + Once sync.Once + + zoneNames []string + zoneMap map[string]bool + zoneMu sync.RWMutex +} + +// AddZone adds zone z to m. +func (m *Metrics) AddZone(z string) { + m.zoneMu.Lock() + m.zoneMap[z] = true + m.zoneNames = keys(m.zoneMap) + m.zoneMu.Unlock() +} + +// RemoveZone remove zone z from m. +func (m *Metrics) RemoveZone(z string) { + m.zoneMu.Lock() + delete(m.zoneMap, z) + m.zoneNames = keys(m.zoneMap) + m.zoneMu.Unlock() +} + +// ZoneNames returns the zones of m. +func (m *Metrics) ZoneNames() []string { + m.zoneMu.RLock() + s := m.zoneNames + m.zoneMu.RUnlock() + return s } // OnStartup sets up the metrics on startup. func (m *Metrics) OnStartup() error { m.Once.Do(func() { - define() ln, err := net.Listen("tcp", m.Addr) if err != nil { @@ -51,18 +64,16 @@ func (m *Metrics) OnStartup() error { m.mux = http.NewServeMux() - prometheus.MustRegister(requestCount) - prometheus.MustRegister(requestDuration) - prometheus.MustRegister(requestSize) - prometheus.MustRegister(requestTransferSize) - prometheus.MustRegister(requestDo) - prometheus.MustRegister(requestType) + prometheus.MustRegister(vars.RequestCount) + prometheus.MustRegister(vars.RequestDuration) + prometheus.MustRegister(vars.RequestSize) + prometheus.MustRegister(vars.RequestDo) + prometheus.MustRegister(vars.RequestType) - prometheus.MustRegister(responseSize) - prometheus.MustRegister(responseTransferSize) - prometheus.MustRegister(responseRcode) + prometheus.MustRegister(vars.ResponseSize) + prometheus.MustRegister(vars.ResponseRcode) - m.mux.Handle(path, prometheus.Handler()) + m.mux.Handle("/metrics", prometheus.Handler()) go func() { http.Serve(m.ln, m.mux) @@ -79,79 +90,10 @@ func (m *Metrics) OnShutdown() error { return nil } -func define() { - requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_count_total", - Help: "Counter of DNS requests made per zone, protocol and family.", - }, []string{"zone", "proto", "family"}) - - requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_duration_milliseconds", - Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...), - Help: "Histogram of the time (in milliseconds) each request took.", - }, []string{"zone"}) - - requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_size_bytes", - Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).", - Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) - - requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_transfer_size_bytes", - Help: "Size of the incoming zone transfer in bytes.", - Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) - - requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_do_count_total", - Help: "Counter of DNS requests with DO bit set per zone.", - }, []string{"zone"}) - - requestType = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "request_type_count_total", - Help: "Counter of DNS requests per type, per zone.", - }, []string{"zone", "type"}) - - responseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "response_size_bytes", - Help: "Size of the returned response in bytes.", - Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) - - responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "response_transfer_size_bytes", - Help: "Size of the returned zone transfer in bytes.", - Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, - }, []string{"zone", "proto"}) - - responseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{ - Namespace: middleware.Namespace, - Subsystem: subsystem, - Name: "response_rcode_count_total", - Help: "Counter of response status codes.", - }, []string{"zone", "rcode"}) +func keys(m map[string]bool) []string { + sx := []string{} + for k := range m { + sx = append(sx, k) + } + return sx } - -const ( - // Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone. - Dropped = "dropped" - subsystem = "dns" - path = "/metrics" -) diff --git a/middleware/metrics/metrics_test.go b/middleware/metrics/metrics_test.go new file mode 100644 index 000000000..4272350f4 --- /dev/null +++ b/middleware/metrics/metrics_test.go @@ -0,0 +1,83 @@ +package metrics + +import ( + "testing" + + "github.com/miekg/coredns/middleware" + mtest "github.com/miekg/coredns/middleware/metrics/test" + "github.com/miekg/coredns/middleware/pkg/dnsrecorder" + "github.com/miekg/coredns/middleware/test" + + "github.com/miekg/dns" + "golang.org/x/net/context" +) + +func TestMetrics(t *testing.T) { + met := &Metrics{Addr: Addr, zoneMap: make(map[string]bool)} + if err := met.OnStartup(); err != nil { + t.Fatalf("Failed to start metrics handler: %s", err) + } + defer met.OnShutdown() + + met.AddZone("example.org.") + + tests := []struct { + next middleware.Handler + qname string + qtype uint16 + metric string + expectedValue string + }{ + // This all works because 1 bucket (1 zone, 1 type) + { + next: test.NextHandler(dns.RcodeSuccess, nil), + qname: "example.org", + metric: "coredns_dns_request_count_total", + expectedValue: "1", + }, + { + next: test.NextHandler(dns.RcodeSuccess, nil), + qname: "example.org", + metric: "coredns_dns_request_count_total", + expectedValue: "2", + }, + { + next: test.NextHandler(dns.RcodeSuccess, nil), + qname: "example.org", + metric: "coredns_dns_request_type_count_total", + expectedValue: "3", + }, + { + next: test.NextHandler(dns.RcodeSuccess, nil), + qname: "example.org", + metric: "coredns_dns_response_rcode_count_total", + expectedValue: "4", + }, + } + + ctx := context.TODO() + + for i, tc := range tests { + req := new(dns.Msg) + if tc.qtype == 0 { + tc.qtype = dns.TypeA + } + req.SetQuestion(dns.Fqdn(tc.qname), tc.qtype) + met.Next = tc.next + + rec := dnsrecorder.New(&test.ResponseWriter{}) + _, err := met.ServeDNS(ctx, rec, req) + if err != nil { + t.Fatalf("Test %d: Expected no error, but got %s", i, err) + } + + result := mtest.Scrape(t, "http://"+Addr+"/metrics") + + if tc.expectedValue != "" { + got, _ := mtest.MetricValue(tc.metric, result) + if got != tc.expectedValue { + t.Errorf("Test %d: Expected value %s for metrics %s, but got %s", i, tc.expectedValue, tc.metric, got) + } + } + } +} diff --git a/middleware/metrics/setup.go b/middleware/metrics/setup.go index 8c8dd1a75..93a6bf50a 100644 --- a/middleware/metrics/setup.go +++ b/middleware/metrics/setup.go @@ -38,18 +38,17 @@ func setup(c *caddy.Controller) error { func prometheusParse(c *caddy.Controller) (*Metrics, error) { var ( - met = &Metrics{Addr: addr} + met = &Metrics{Addr: Addr, zoneMap: make(map[string]bool)} err error ) for c.Next() { - if len(met.ZoneNames) > 0 { - return met, c.Err("metrics: can only have one metrics module per server") + if len(met.ZoneNames()) > 0 { + return met, c.Err("can only have one metrics module per server") } - met.ZoneNames = make([]string, len(c.ServerBlockKeys)) - copy(met.ZoneNames, c.ServerBlockKeys) - for i := range met.ZoneNames { - met.ZoneNames[i] = middleware.Host(met.ZoneNames[i]).Normalize() + + for _, z := range c.ServerBlockKeys { + met.AddZone(middleware.Host(z).Normalize()) } args := c.RemainingArgs() @@ -78,7 +77,7 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) { return met, e } default: - return met, c.Errf("metrics: unknown item: %s", c.Val()) + return met, c.Errf("unknown item: %s", c.Val()) } } @@ -88,4 +87,4 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) { var metricsOnce sync.Once -const addr = "localhost:9153" +const Addr = "localhost:9153" diff --git a/middleware/metrics/test/scrape.go b/middleware/metrics/test/scrape.go new file mode 100644 index 000000000..d64bef96f --- /dev/null +++ b/middleware/metrics/test/scrape.go @@ -0,0 +1,225 @@ +// Adapted by Miek Gieben for CoreDNS testing. +// +// License from prom2json +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package test will scrape a target and you can inspect the variables. +// Basic usage: +// +// result := Scrape("http://localhost:9153/metrics") +// v := MetricValue("coredns_cache_capacity_gauge", result) +// +package test + +import ( + "fmt" + "io" + "mime" + "net/http" + "testing" + + "github.com/matttproud/golang_protobuf_extensions/pbutil" + "github.com/prometheus/common/expfmt" + + dto "github.com/prometheus/client_model/go" +) + +type ( + // MetricFamily holds a prometheus metric. + MetricFamily struct { + Name string `json:"name"` + Help string `json:"help"` + Type string `json:"type"` + Metrics []interface{} `json:"metrics,omitempty"` // Either metric or summary. + } + + // metric is for all "single value" metrics. + metric struct { + Labels map[string]string `json:"labels,omitempty"` + Value string `json:"value"` + } + + summary struct { + Labels map[string]string `json:"labels,omitempty"` + Quantiles map[string]string `json:"quantiles,omitempty"` + Count string `json:"count"` + Sum string `json:"sum"` + } + + histogram struct { + Labels map[string]string `json:"labels,omitempty"` + Buckets map[string]string `json:"buckets,omitempty"` + Count string `json:"count"` + Sum string `json:"sum"` + } +) + +// Scrape returns the all the vars a []*metricFamily. +func Scrape(t *testing.T, url string) []*MetricFamily { + mfChan := make(chan *dto.MetricFamily, 1024) + + go fetchMetricFamilies(t, url, mfChan) + + result := []*MetricFamily{} + for mf := range mfChan { + result = append(result, newMetricFamily(mf)) + } + return result +} + +// MetricValue returns the value associated with name as a string as well as the labels. +// It only returns the first metrics of the slice. +func MetricValue(name string, mfs []*MetricFamily) (string, map[string]string) { + for _, mf := range mfs { + if mf.Name == name { + // Only works with Gauge and Counter... + return mf.Metrics[0].(metric).Value, mf.Metrics[0].(metric).Labels + } + } + return "", nil +} + +// MetricValueLabel returns the value for name *and* label *value*. +func MetricValueLabel(name, label string, mfs []*MetricFamily) (string, map[string]string) { + // bit hacky is this really handy...? + for _, mf := range mfs { + if mf.Name == name { + for _, m := range mf.Metrics { + for _, v := range m.(metric).Labels { + if v == label { + return m.(metric).Value, m.(metric).Labels + } + } + + } + } + } + return "", nil +} + +func newMetricFamily(dtoMF *dto.MetricFamily) *MetricFamily { + mf := &MetricFamily{ + Name: dtoMF.GetName(), + Help: dtoMF.GetHelp(), + Type: dtoMF.GetType().String(), + Metrics: make([]interface{}, len(dtoMF.Metric)), + } + for i, m := range dtoMF.Metric { + if dtoMF.GetType() == dto.MetricType_SUMMARY { + mf.Metrics[i] = summary{ + Labels: makeLabels(m), + Quantiles: makeQuantiles(m), + Count: fmt.Sprint(m.GetSummary().GetSampleCount()), + Sum: fmt.Sprint(m.GetSummary().GetSampleSum()), + } + } else if dtoMF.GetType() == dto.MetricType_HISTOGRAM { + mf.Metrics[i] = histogram{ + Labels: makeLabels(m), + Buckets: makeBuckets(m), + Count: fmt.Sprint(m.GetHistogram().GetSampleCount()), + Sum: fmt.Sprint(m.GetSummary().GetSampleSum()), + } + } else { + mf.Metrics[i] = metric{ + Labels: makeLabels(m), + Value: fmt.Sprint(value(m)), + } + } + } + return mf +} + +func value(m *dto.Metric) float64 { + if m.Gauge != nil { + return m.GetGauge().GetValue() + } + if m.Counter != nil { + return m.GetCounter().GetValue() + } + if m.Untyped != nil { + return m.GetUntyped().GetValue() + } + return 0. +} + +func makeLabels(m *dto.Metric) map[string]string { + result := map[string]string{} + for _, lp := range m.Label { + result[lp.GetName()] = lp.GetValue() + } + return result +} + +func makeQuantiles(m *dto.Metric) map[string]string { + result := map[string]string{} + for _, q := range m.GetSummary().Quantile { + result[fmt.Sprint(q.GetQuantile())] = fmt.Sprint(q.GetValue()) + } + return result +} + +func makeBuckets(m *dto.Metric) map[string]string { + result := map[string]string{} + for _, b := range m.GetHistogram().Bucket { + result[fmt.Sprint(b.GetUpperBound())] = fmt.Sprint(b.GetCumulativeCount()) + } + return result +} + +func fetchMetricFamilies(t *testing.T, url string, ch chan<- *dto.MetricFamily) { + defer close(ch) + req, err := http.NewRequest("GET", url, nil) + if err != nil { + t.Fatalf("creating GET request for URL %q failed: %s", url, err) + } + req.Header.Add("Accept", acceptHeader) + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("executing GET request for URL %q failed: %s", url, err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("GET request for URL %q returned HTTP status %s", url, resp.Status) + } + + mediatype, params, err := mime.ParseMediaType(resp.Header.Get("Content-Type")) + if err == nil && mediatype == "application/vnd.google.protobuf" && + params["encoding"] == "delimited" && + params["proto"] == "io.prometheus.client.MetricFamily" { + for { + mf := &dto.MetricFamily{} + if _, err = pbutil.ReadDelimited(resp.Body, mf); err != nil { + if err == io.EOF { + break + } + t.Fatalf("reading metric family protocol buffer failed: %s", err) + } + ch <- mf + } + } else { + // We could do further content-type checks here, but the + // fallback for now will anyway be the text format + // version 0.0.4, so just go for it and see if it works. + var parser expfmt.TextParser + metricFamilies, err := parser.TextToMetricFamilies(resp.Body) + if err != nil { + t.Fatal("reading text format failed:", err) + } + for _, mf := range metricFamilies { + ch <- mf + } + } +} + +const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3` diff --git a/middleware/metrics/vars/report.go b/middleware/metrics/vars/report.go new file mode 100644 index 000000000..996826599 --- /dev/null +++ b/middleware/metrics/vars/report.go @@ -0,0 +1,62 @@ +package vars + +import ( + "time" + + "github.com/miekg/coredns/request" + + "github.com/miekg/dns" +) + +// Report reports the metrics data associcated with request. +func Report(req request.Request, zone, rcode string, size int, start time.Time) { + // Proto and Family + net := req.Proto() + fam := "1" + if req.Family() == 2 { + fam = "2" + } + + typ := req.QType() + + RequestCount.WithLabelValues(zone, net, fam).Inc() + RequestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond)) + + if req.Do() { + RequestDo.WithLabelValues(zone).Inc() + } + + if _, known := monitorType[typ]; known { + RequestType.WithLabelValues(zone, dns.Type(typ).String()).Inc() + } else { + RequestType.WithLabelValues(zone, other).Inc() + } + + ResponseSize.WithLabelValues(zone, net).Observe(float64(size)) + RequestSize.WithLabelValues(zone, net).Observe(float64(req.Size())) + + ResponseRcode.WithLabelValues(zone, rcode).Inc() +} + +var monitorType = map[uint16]bool{ + dns.TypeAAAA: true, + dns.TypeA: true, + dns.TypeCNAME: true, + dns.TypeDNSKEY: true, + dns.TypeDS: true, + dns.TypeMX: true, + dns.TypeNSEC3: true, + dns.TypeNSEC: true, + dns.TypeNS: true, + dns.TypePTR: true, + dns.TypeRRSIG: true, + dns.TypeSOA: true, + dns.TypeSRV: true, + dns.TypeTXT: true, + // Meta Qtypes + dns.TypeIXFR: true, + dns.TypeAXFR: true, + dns.TypeANY: true, +} + +const other = "other" diff --git a/middleware/metrics/vars/vars.go b/middleware/metrics/vars/vars.go new file mode 100644 index 000000000..935b6f810 --- /dev/null +++ b/middleware/metrics/vars/vars.go @@ -0,0 +1,68 @@ +package vars + +import ( + "github.com/miekg/coredns/middleware" + + "github.com/prometheus/client_golang/prometheus" +) + +var ( + RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_count_total", + Help: "Counter of DNS requests made per zone, protocol and family.", + }, []string{"zone", "proto", "family"}) + + RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_duration_milliseconds", + Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...), + Help: "Histogram of the time (in milliseconds) each request took.", + }, []string{"zone"}) + + RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_size_bytes", + Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).", + Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, + }, []string{"zone", "proto"}) + + RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_do_count_total", + Help: "Counter of DNS requests with DO bit set per zone.", + }, []string{"zone"}) + + RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "request_type_count_total", + Help: "Counter of DNS requests per type, per zone.", + }, []string{"zone", "type"}) + + ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "response_size_bytes", + Help: "Size of the returned response in bytes.", + Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3}, + }, []string{"zone", "proto"}) + + ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: middleware.Namespace, + Subsystem: subsystem, + Name: "response_rcode_count_total", + Help: "Counter of response status codes.", + }, []string{"zone", "rcode"}) +) + +const ( + subsystem = "dns" + + // Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone. + Dropped = "dropped" +) diff --git a/middleware/middleware.go b/middleware/middleware.go index 804209fa9..bdc8bc2fe 100644 --- a/middleware/middleware.go +++ b/middleware/middleware.go @@ -45,6 +45,7 @@ type ( // chain by returning them unchanged. Handler interface { ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error) + Name() string } // HandlerFunc is a convenience type like dns.HandlerFunc, except @@ -58,6 +59,8 @@ func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns. return f(ctx, w, r) } +func (f HandlerFunc) Name() string { return "handlerfunc" } + // Error returns err with 'middleware/name: ' prefixed to it. func Error(name string, err error) error { return fmt.Errorf("%s/%s: %s", "middleware", name, err) } diff --git a/middleware/normalize.go b/middleware/normalize.go index 2d153ba78..87f3ce703 100644 --- a/middleware/normalize.go +++ b/middleware/normalize.go @@ -51,7 +51,7 @@ func (n Name) Normalize() string { return strings.ToLower(dns.Fqdn(string(n))) } type ( // Host represents a host from the Corefile, may contain port. Host string // Host represents a host from the Corefile, may contain port. - // Addr resprents an address in the Corefile. + // Addr represents an address in the Corefile. Addr string // Addr resprents an address in the Corefile. ) diff --git a/middleware/proxy/proxy.go b/middleware/proxy/proxy.go index a33024f12..1ff0a2f30 100644 --- a/middleware/proxy/proxy.go +++ b/middleware/proxy/proxy.go @@ -102,5 +102,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) ( return p.Next.ServeDNS(ctx, w, r) } +func (p Proxy) Name() string { return "proxy" } + // defaultTimeout is the default networking timeout for DNS requests. const defaultTimeout = 5 * time.Second diff --git a/middleware/rewrite/rewrite.go b/middleware/rewrite/rewrite.go index c88fa3549..2498d4c34 100644 --- a/middleware/rewrite/rewrite.go +++ b/middleware/rewrite/rewrite.go @@ -52,6 +52,8 @@ func (rw Rewrite) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg return rw.Next.ServeDNS(ctx, w, r) } +func (rw Rewrite) Name() string { return "rewrite" } + // Rule describes an internal location rewrite rule. type Rule interface { // Rewrite rewrites the internal location of the current request. diff --git a/middleware/test/helpers.go b/middleware/test/helpers.go index 157bdce66..0137171a7 100644 --- a/middleware/test/helpers.go +++ b/middleware/test/helpers.go @@ -283,6 +283,7 @@ type ( // Handler interface defines a middleware. Handler interface { ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error) + Name() string } ) @@ -290,3 +291,5 @@ type ( func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { return f(ctx, w, r) } + +func (f HandlerFunc) Name() string { return "handlerfunc" } diff --git a/middleware/whoami/whoami.go b/middleware/whoami/whoami.go index ccf8aefe1..0ef63325f 100644 --- a/middleware/whoami/whoami.go +++ b/middleware/whoami/whoami.go @@ -55,3 +55,5 @@ func (wh Whoami) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) return 0, nil } + +func (wh Whoami) Name() string { return "whoami" } diff --git a/test/metrics_test.go b/test/metrics_test.go index 85cb2a824..d122f2545 100644 --- a/test/metrics_test.go +++ b/test/metrics_test.go @@ -1,12 +1,84 @@ package test -import "testing" +import ( + "io/ioutil" + "log" + "os" + "path" + "testing" + "time" + + "github.com/miekg/coredns/middleware/cache" + "github.com/miekg/coredns/middleware/metrics" + mtest "github.com/miekg/coredns/middleware/metrics/test" + "github.com/miekg/coredns/middleware/metrics/vars" + + "github.com/miekg/dns" +) // Start test server that has metrics enabled. Then tear it down again. func TestMetricsServer(t *testing.T) { - corefile := `.:0 { + corefile := `example.org:0 { chaos CoreDNS-001 miek@miek.nl - prometheus localhost:0 + prometheus +} + +example.com:0 { + proxy . 8.8.4.4:53 + prometheus +} +` + srv, err := CoreDNSServer(corefile) + if err != nil { + t.Fatalf("Could not get CoreDNS serving instance: %s", err) + } + defer srv.Stop() +} + +func TestMetricsRefused(t *testing.T) { + metricName := "coredns_dns_response_rcode_count_total" + + corefile := `example.org:0 { + proxy . 8.8.8.8:53 + prometheus +} +` + srv, err := CoreDNSServer(corefile) + if err != nil { + t.Fatalf("Could not get CoreDNS serving instance: %s", err) + } + defer srv.Stop() + + udp, _ := CoreDNSServerPorts(srv, 0) + + m := new(dns.Msg) + m.SetQuestion("google.com.", dns.TypeA) + + if _, err = dns.Exchange(m, udp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics") + got, labels := mtest.MetricValue(metricName, data) + + if got != "1" { + t.Errorf("Expected value %s for refused, but got %s", "1", got) + } + if labels["zone"] != vars.Dropped { + t.Errorf("Expected zone value %s for refused, but got %s", vars.Dropped, labels["zone"]) + } + if labels["rcode"] != "REFUSED" { + t.Errorf("Expected zone value %s for refused, but got %s", "REFUSED", labels["rcode"]) + } +} + +func TestMetricsCache(t *testing.T) { + metricName := "coredns_cache_size_guage" + + corefile := `example.net:0 { + proxy . 8.8.8.8:53 + prometheus + cache } ` srv, err := CoreDNSServer(corefile) @@ -14,4 +86,88 @@ func TestMetricsServer(t *testing.T) { t.Fatalf("Could not get CoreDNS serving instance: %s", err) } defer srv.Stop() + + udp, _ := CoreDNSServerPorts(srv, 0) + + m := new(dns.Msg) + m.SetQuestion("www.example.net.", dns.TypeA) + + if _, err = dns.Exchange(m, udp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics") + // Get the value for the metrics where the one of the labels values matches "success" + got, _ := mtest.MetricValueLabel(metricName, cache.Success, data) + + if got != "1" { + t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got) + } +} + +func TestMetricsAuto(t *testing.T) { + tmpdir, err := ioutil.TempDir(os.TempDir(), "coredns") + if err != nil { + t.Fatal(err) + } + + // TODO(miek): Random port as string and use that later? + corefile := `org:0 { + auto { + directory ` + tmpdir + ` db\.(.*) {1} 1 + } + prometheus + } +` + + i, err := CoreDNSServer(corefile) + if err != nil { + t.Fatalf("Could not get CoreDNS serving instance: %s", err) + } + + udp, _ := CoreDNSServerPorts(i, 0) + if udp == "" { + t.Fatalf("Could not get UDP listening port") + } + defer i.Stop() + + log.SetOutput(ioutil.Discard) + + // Write db.example.org to get example.org. + if err = ioutil.WriteFile(path.Join(tmpdir, "db.example.org"), []byte(zoneContent), 0644); err != nil { + t.Fatal(err) + } + // TODO(miek): make the auto sleep even less. + time.Sleep(1100 * time.Millisecond) // wait for it to be picked up + + m := new(dns.Msg) + m.SetQuestion("www.example.org.", dns.TypeA) + + if _, err := dns.Exchange(m, udp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + metricName := "coredns_dns_request_count_total" //{zone, proto, family} + + data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics") + // Get the value for the metrics where the one of the labels values matches "example.org." + got, _ := mtest.MetricValueLabel(metricName, "example.org.", data) + + if got != "1" { + t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got) + } + + // Remove db.example.org again. And see if the metric stops increasing. + os.Remove(path.Join(tmpdir, "db.example.org")) + time.Sleep(1100 * time.Millisecond) // wait for it to be picked up + if _, err := dns.Exchange(m, udp); err != nil { + t.Fatalf("Could not send message: %s", err) + } + + data = mtest.Scrape(t, "http://"+metrics.Addr+"/metrics") + got, _ = mtest.MetricValueLabel(metricName, "example.org.", data) + + if got != "1" { + t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got) + } } |