aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/dnsserver/config.go16
-rw-r--r--core/dnsserver/server.go8
-rw-r--r--middleware.md3
-rw-r--r--middleware/auto/auto.go4
-rw-r--r--middleware/auto/setup.go11
-rw-r--r--middleware/auto/walk.go28
-rw-r--r--middleware/auto/walk_test.go9
-rw-r--r--middleware/auto/watcher_test.go14
-rw-r--r--middleware/auto/zone.go12
-rw-r--r--middleware/cache/README.md14
-rw-r--r--middleware/cache/cache.go9
-rw-r--r--middleware/cache/handler.go26
-rw-r--r--middleware/cache/setup.go8
-rw-r--r--middleware/chaos/chaos.go2
-rw-r--r--middleware/dnssec/README.md7
-rw-r--r--middleware/dnssec/handler.go22
-rw-r--r--middleware/dnssec/responsewriter.go2
-rw-r--r--middleware/dnssec/setup.go3
-rw-r--r--middleware/errors/errors.go2
-rw-r--r--middleware/etcd/handler.go2
-rw-r--r--middleware/file/file.go2
-rw-r--r--middleware/kubernetes/handler.go2
-rw-r--r--middleware/loadbalance/handler.go2
-rw-r--r--middleware/log/log.go6
-rw-r--r--middleware/metrics/README.md15
-rw-r--r--middleware/metrics/handler.go71
-rw-r--r--middleware/metrics/metrics.go156
-rw-r--r--middleware/metrics/metrics_test.go83
-rw-r--r--middleware/metrics/setup.go17
-rw-r--r--middleware/metrics/test/scrape.go225
-rw-r--r--middleware/metrics/vars/report.go62
-rw-r--r--middleware/metrics/vars/vars.go68
-rw-r--r--middleware/middleware.go3
-rw-r--r--middleware/normalize.go2
-rw-r--r--middleware/proxy/proxy.go2
-rw-r--r--middleware/rewrite/rewrite.go2
-rw-r--r--middleware/test/helpers.go3
-rw-r--r--middleware/whoami/whoami.go2
-rw-r--r--test/metrics_test.go162
39 files changed, 828 insertions, 259 deletions
diff --git a/core/dnsserver/config.go b/core/dnsserver/config.go
index 20ff8389a..123ab70ce 100644
--- a/core/dnsserver/config.go
+++ b/core/dnsserver/config.go
@@ -41,3 +41,19 @@ func GetConfig(c *caddy.Controller) *Config {
ctx.saveConfig(c.Key, &Config{})
return GetConfig(c)
}
+
+// GetMiddleware returns the middleware handler that has been added to the config under name.
+// This is useful to inspect if a certain middleware is active in this server.
+// Note that this is order dependent and the order is defined in directives.go, i.e. if your middleware
+// comes before the middleware you are checking; it will not be there (yet).
+func GetMiddleware(c *caddy.Controller, name string) middleware.Handler {
+ // TODO(miek): calling the handler h(nil) should be a noop...
+ conf := GetConfig(c)
+ for _, h := range conf.Middleware {
+ x := h(nil)
+ if name == x.Name() {
+ return x
+ }
+ }
+ return nil
+}
diff --git a/core/dnsserver/server.go b/core/dnsserver/server.go
index aa94dcd8c..cc078d0d8 100644
--- a/core/dnsserver/server.go
+++ b/core/dnsserver/server.go
@@ -10,7 +10,9 @@ import (
"time"
"github.com/miekg/coredns/middleware"
+ "github.com/miekg/coredns/middleware/metrics/vars"
"github.com/miekg/coredns/middleware/pkg/edns"
+ "github.com/miekg/coredns/middleware/pkg/rcode"
"github.com/miekg/coredns/request"
"github.com/miekg/dns"
@@ -247,14 +249,16 @@ func (s *Server) OnStartupComplete() {
}
// DefaultErrorFunc responds to an DNS request with an error.
-func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rcode int) {
+func DefaultErrorFunc(w dns.ResponseWriter, r *dns.Msg, rc int) {
state := request.Request{W: w, Req: r}
answer := new(dns.Msg)
- answer.SetRcode(r, rcode)
+ answer.SetRcode(r, rc)
state.SizeAndDo(answer)
+ vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
+
w.WriteMsg(answer)
}
diff --git a/middleware.md b/middleware.md
index acd69f497..98a6adf89 100644
--- a/middleware.md
+++ b/middleware.md
@@ -36,7 +36,8 @@ TODO(miek): text here on how to hook up middleware.
## Metrics
When exporting metrics the *Namespace* should be `middleware.Namespace` (="coredns"), and the
-*Subsystem* should be the name of the middleware.
+*Subsystem* should be the name of the middleware. The README.md for the middleware should then
+also contain a *Metrics* section detailing the metrics.
## Documentation
diff --git a/middleware/auto/auto.go b/middleware/auto/auto.go
index 7721c194e..65e784459 100644
--- a/middleware/auto/auto.go
+++ b/middleware/auto/auto.go
@@ -8,6 +8,7 @@ import (
"github.com/miekg/coredns/middleware"
"github.com/miekg/coredns/middleware/file"
+ "github.com/miekg/coredns/middleware/metrics"
"github.com/miekg/coredns/request"
"github.com/miekg/dns"
@@ -20,6 +21,7 @@ type (
Next middleware.Handler
*Zones
+ metrics *metrics.Metrics
loader
}
@@ -97,3 +99,5 @@ func (a Auto) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i
w.WriteMsg(m)
return dns.RcodeSuccess, nil
}
+
+func (a Auto) Name() string { return "auto" }
diff --git a/middleware/auto/setup.go b/middleware/auto/setup.go
index 8c56f90a0..a5e11186a 100644
--- a/middleware/auto/setup.go
+++ b/middleware/auto/setup.go
@@ -11,6 +11,7 @@ import (
"github.com/miekg/coredns/core/dnsserver"
"github.com/miekg/coredns/middleware"
"github.com/miekg/coredns/middleware/file"
+ "github.com/miekg/coredns/middleware/metrics"
"github.com/mholt/caddy"
)
@@ -28,10 +29,16 @@ func setup(c *caddy.Controller) error {
return middleware.Error("auto", err)
}
+ // If we have enabled prometheus we should add newly discovered zones to it.
+ met := dnsserver.GetMiddleware(c, "prometheus")
+ if met != nil {
+ a.metrics = met.(*metrics.Metrics)
+ }
+
walkChan := make(chan bool)
c.OnStartup(func() error {
- err := a.Zones.Walk(a.loader)
+ err := a.Walk()
if err != nil {
return err
}
@@ -43,7 +50,7 @@ func setup(c *caddy.Controller) error {
case <-walkChan:
return
case <-ticker.C:
- a.Zones.Walk(a.loader)
+ a.Walk()
}
}
}()
diff --git a/middleware/auto/walk.go b/middleware/auto/walk.go
index 4259d7f17..8c3370e16 100644
--- a/middleware/auto/walk.go
+++ b/middleware/auto/walk.go
@@ -13,26 +13,26 @@ import (
)
// Walk will recursively walk of the file under l.directory and adds the one that match l.re.
-func (z *Zones) Walk(l loader) error {
+func (a Auto) Walk() error {
// TODO(miek): should add something so that we don't stomp on each other.
toDelete := make(map[string]bool)
- for _, n := range z.Names() {
+ for _, n := range a.Zones.Names() {
toDelete[n] = true
}
- filepath.Walk(l.directory, func(path string, info os.FileInfo, err error) error {
+ filepath.Walk(a.loader.directory, func(path string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
- match, origin := matches(l.re, info.Name(), l.template)
+ match, origin := matches(a.loader.re, info.Name(), a.loader.template)
if !match {
return nil
}
- if _, ok := z.Z[origin]; ok {
+ if _, ok := a.Zones.Z[origin]; ok {
// we already have this zone
toDelete[origin] = false
return nil
@@ -50,10 +50,14 @@ func (z *Zones) Walk(l loader) error {
return nil
}
- zo.NoReload = l.noReload
- zo.TransferTo = l.transferTo
+ zo.NoReload = a.loader.noReload
+ zo.TransferTo = a.loader.transferTo
- z.Insert(zo, origin)
+ a.Zones.Add(zo, origin)
+
+ if a.metrics != nil {
+ a.metrics.AddZone(origin)
+ }
zo.Notify()
@@ -68,7 +72,13 @@ func (z *Zones) Walk(l loader) error {
if !ok {
continue
}
- z.Delete(origin)
+
+ if a.metrics != nil {
+ a.metrics.RemoveZone(origin)
+ }
+
+ a.Zones.Remove(origin)
+
log.Printf("[INFO] Deleting zone `%s'", origin)
}
diff --git a/middleware/auto/walk_test.go b/middleware/auto/walk_test.go
index cc420d5b6..f15548dba 100644
--- a/middleware/auto/walk_test.go
+++ b/middleware/auto/walk_test.go
@@ -37,13 +37,16 @@ func TestWalk(t *testing.T) {
template: `${1}`,
}
- z := &Zones{}
+ a := Auto{
+ loader: ldr,
+ Zones: &Zones{},
+ }
- z.Walk(ldr)
+ a.Walk()
// db.example.org and db.example.com should be here (created in createFiles)
for _, name := range []string{"example.com.", "example.org."} {
- if _, ok := z.Z[name]; !ok {
+ if _, ok := a.Zones.Z[name]; !ok {
t.Errorf("%s should have been added", name)
}
}
diff --git a/middleware/auto/watcher_test.go b/middleware/auto/watcher_test.go
index 751c78c0d..a8f219fa1 100644
--- a/middleware/auto/watcher_test.go
+++ b/middleware/auto/watcher_test.go
@@ -27,15 +27,18 @@ func TestWatcher(t *testing.T) {
template: `${1}`,
}
- z := &Zones{}
+ a := Auto{
+ loader: ldr,
+ Zones: &Zones{},
+ }
- z.Walk(ldr)
+ a.Walk()
// example.org and example.com should exist
- if x := len(z.Z["example.org."].All()); x != 4 {
+ if x := len(a.Zones.Z["example.org."].All()); x != 4 {
t.Fatalf("expected 4 RRs, got %d", x)
}
- if x := len(z.Z["example.com."].All()); x != 4 {
+ if x := len(a.Zones.Z["example.com."].All()); x != 4 {
t.Fatalf("expected 4 RRs, got %d", x)
}
@@ -44,5 +47,6 @@ func TestWatcher(t *testing.T) {
t.Fatal(err)
}
- z.Walk(ldr)
+ a.Walk()
+ // TODO(miek): check
}
diff --git a/middleware/auto/zone.go b/middleware/auto/zone.go
index 4c950b908..f825871f7 100644
--- a/middleware/auto/zone.go
+++ b/middleware/auto/zone.go
@@ -40,9 +40,9 @@ func (z *Zones) Zones(name string) *file.Zone {
return zo
}
-// Insert inserts a new zone into z. If zo.NoReload is false, the
+// Add adds a new zone into z. If zo.NoReload is false, the
// reload goroutine is started.
-func (z *Zones) Insert(zo *file.Zone, name string) {
+func (z *Zones) Add(zo *file.Zone, name string) {
z.Lock()
if z.Z == nil {
@@ -51,14 +51,13 @@ func (z *Zones) Insert(zo *file.Zone, name string) {
z.Z[name] = zo
z.names = append(z.names, name)
-
zo.Reload()
z.Unlock()
}
-// Delete removes the zone named name from z. It also stop the the zone's reload goroutine.
-func (z *Zones) Delete(name string) {
+// Remove removes the zone named name from z. It also stop the the zone's reload goroutine.
+func (z *Zones) Remove(name string) {
z.Lock()
if zo, ok := z.Z[name]; ok && !zo.NoReload {
@@ -67,10 +66,11 @@ func (z *Zones) Delete(name string) {
delete(z.Z, name)
- // just regenerate Names (might be bad if you have a lot of zones...)
+ // TODO(miek): just regenerate Names (might be bad if you have a lot of zones...)
z.names = []string{}
for n := range z.Z {
z.names = append(z.names, n)
}
+
z.Unlock()
}
diff --git a/middleware/cache/README.md b/middleware/cache/README.md
index 55ad8848a..3a4dea43e 100644
--- a/middleware/cache/README.md
+++ b/middleware/cache/README.md
@@ -35,24 +35,24 @@ There is a third category (`error`) but those responses are never cached.
The minimum TTL allowed on resource records is 5 seconds.
-If monitoring is enabled (via the *prometheus* directive) then the following extra metrics are added:
+## Metrics
-* coredns_cache_hit_count_total, and
-* coredns_cache_miss_count_total
+If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
-They both work on a per-zone basis and just count the hit and miss counts for each query.
+* coredns_cache_size_guage{type} - total elements in the case, type is either "denial" or "success".
+* coredns_cache_capacity_guage{type} - total capacity of the cache, type is either "denial" or "success".
## Examples
+Enable caching for all zones, but cap everything to a TTL of 10 seconds:
+
~~~
cache 10
~~~
-Enable caching for all zones, but cap everything to a TTL of 10 seconds.
+Proxy to Google Public DNS and only cache responses for example.org (or below).
~~~
proxy . 8.8.8.8:53
cache example.org
~~~
-
-Proxy to Google Public DNS and only cache responses for example.org (or below).
diff --git a/middleware/cache/cache.go b/middleware/cache/cache.go
index 6254349c3..d17253c38 100644
--- a/middleware/cache/cache.go
+++ b/middleware/cache/cache.go
@@ -79,6 +79,9 @@ func (c *ResponseWriter) WriteMsg(res *dns.Msg) error {
if key != "" {
c.set(res, key, mt, duration)
+
+ cacheSize.WithLabelValues(Success).Set(float64(c.pcache.Len()))
+ cacheSize.WithLabelValues(Denial).Set(float64(c.ncache.Len()))
}
setMsgTTL(res, uint32(duration.Seconds()))
@@ -103,7 +106,6 @@ func (c *ResponseWriter) set(m *dns.Msg, key string, mt response.Type, duration
case response.OtherError:
// don't cache these
- // TODO(miek): what do we do with these?
default:
log.Printf("[WARNING] Caching called with unknown classification: %d", mt)
}
@@ -122,4 +124,9 @@ const (
minTTL = 5 * time.Second
defaultCap = 10000 // default capacity of the cache.
+
+ // Success is the class for caching postive caching.
+ Success = "success"
+ // Denial is the class defined for negative caching.
+ Denial = "denial"
)
diff --git a/middleware/cache/handler.go b/middleware/cache/handler.go
index e307b0b79..95b51970e 100644
--- a/middleware/cache/handler.go
+++ b/middleware/cache/handler.go
@@ -30,17 +30,15 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
state.SizeAndDo(resp)
w.WriteMsg(resp)
- cacheHitCount.WithLabelValues(zone).Inc()
-
return dns.RcodeSuccess, nil
}
- cacheMissCount.WithLabelValues(zone).Inc()
-
crr := &ResponseWriter{w, c}
return c.Next.ServeDNS(ctx, crr, r)
}
+func (c *Cache) Name() string { return "cache" }
+
func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) {
k := rawKey(qname, qtype, do)
@@ -55,24 +53,24 @@ func (c *Cache) get(qname string, qtype uint16, do bool) (*item, bool, bool) {
}
var (
- cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{
+ cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
- Name: "hit_count_total",
- Help: "Counter of DNS requests that were found in the cache.",
- }, []string{"zone"})
+ Name: "size_guage",
+ Help: "Gauge of number of elements in the cache.",
+ }, []string{"type"})
- cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{
+ cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
- Name: "miss_count_total",
- Help: "Counter of DNS requests that were not found in the cache.",
- }, []string{"zone"})
+ Name: "capacity_gauge",
+ Help: "Gauge of cache's capacity.",
+ }, []string{"type"})
)
const subsystem = "cache"
func init() {
- prometheus.MustRegister(cacheHitCount)
- prometheus.MustRegister(cacheMissCount)
+ prometheus.MustRegister(cacheSize)
+ prometheus.MustRegister(cacheCapacity)
}
diff --git a/middleware/cache/setup.go b/middleware/cache/setup.go
index 08c8fefdb..11a35ddc4 100644
--- a/middleware/cache/setup.go
+++ b/middleware/cache/setup.go
@@ -28,6 +28,10 @@ func setup(c *caddy.Controller) error {
return ca
})
+ // Export the capacity for the metrics. This only happens once, because this is a re-load change only.
+ cacheCapacity.WithLabelValues(Success).Set(float64(ca.pcap))
+ cacheCapacity.WithLabelValues(Denial).Set(float64(ca.ncap))
+
return nil
}
@@ -58,7 +62,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
for c.NextBlock() {
switch c.Val() {
// first number is cap, second is an new ttl
- case "success":
+ case Success:
args := c.RemainingArgs()
if len(args) == 0 {
return nil, c.ArgErr()
@@ -75,7 +79,7 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
}
ca.pttl = time.Duration(pttl) * time.Second
}
- case "denial":
+ case Denial:
args := c.RemainingArgs()
if len(args) == 0 {
return nil, c.ArgErr()
diff --git a/middleware/chaos/chaos.go b/middleware/chaos/chaos.go
index 6d83f5dda..39ebe7f49 100644
--- a/middleware/chaos/chaos.go
+++ b/middleware/chaos/chaos.go
@@ -51,6 +51,8 @@ func (c Chaos) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
return 0, nil
}
+func (c Chaos) Name() string { return "chaos" }
+
func trim(s string) string {
if len(s) < 256 {
return s
diff --git a/middleware/dnssec/README.md b/middleware/dnssec/README.md
index b87b25b1c..5daebc7f6 100644
--- a/middleware/dnssec/README.md
+++ b/middleware/dnssec/README.md
@@ -34,9 +34,14 @@ dnssec [ZONES... ] {
will be signed with all keys. Generating a key can be done with `dnssec-keygen`: `dnssec-keygen -a
ECDSAP256SHA256 <zonename>`. A key created for zone *A* can be safely used for zone *B*.
-
* `cache_capacity` indicates the capacity of the LRU cache. The dnssec middleware uses LRU cache to manage
objects and the default capacity is 10000.
+## Metrics
+
+If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
+
+* coredns_dnssec_size_guage{type} - total elements in the cache, type is "signature".
+* coredns_dnssec_capacity_guage{type} - total capacity of the cache, type is "signature".
## Examples
diff --git a/middleware/dnssec/handler.go b/middleware/dnssec/handler.go
index aaed62bdf..a99588397 100644
--- a/middleware/dnssec/handler.go
+++ b/middleware/dnssec/handler.go
@@ -40,24 +40,26 @@ func (d Dnssec) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
}
var (
- cacheHitCount = prometheus.NewCounterVec(prometheus.CounterOpts{
+ cacheSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
- Name: "hit_count_total",
- Help: "Counter of signatures that were found in the cache.",
- }, []string{"zone"})
+ Name: "size_guage",
+ Help: "Gauge of number of elements in the cache.",
+ }, []string{"type"})
- cacheMissCount = prometheus.NewCounterVec(prometheus.CounterOpts{
+ cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: middleware.Namespace,
Subsystem: subsystem,
- Name: "miss_count_total",
- Help: "Counter of signatures that were not found in the cache.",
- }, []string{"zone"})
+ Name: "capacity_gauge",
+ Help: "Gauge of cache's capacity.",
+ }, []string{"type"})
)
+func (d Dnssec) Name() string { return "dnssec" }
+
const subsystem = "dnssec"
func init() {
- prometheus.MustRegister(cacheHitCount)
- prometheus.MustRegister(cacheMissCount)
+ prometheus.MustRegister(cacheSize)
+ prometheus.MustRegister(cacheCapacity)
}
diff --git a/middleware/dnssec/responsewriter.go b/middleware/dnssec/responsewriter.go
index 547561fb9..e7af62a9e 100644
--- a/middleware/dnssec/responsewriter.go
+++ b/middleware/dnssec/responsewriter.go
@@ -30,6 +30,8 @@ func (d *ResponseWriter) WriteMsg(res *dns.Msg) error {
if state.Do() {
res = d.d.Sign(state, zone, time.Now().UTC())
+
+ cacheSize.WithLabelValues("signature").Set(float64(d.d.cache.Len()))
}
state.SizeAndDo(res)
diff --git a/middleware/dnssec/setup.go b/middleware/dnssec/setup.go
index 19a68a853..639303ea9 100644
--- a/middleware/dnssec/setup.go
+++ b/middleware/dnssec/setup.go
@@ -32,6 +32,9 @@ func setup(c *caddy.Controller) error {
return New(zones, keys, next, cache)
})
+ // Export the capacity for the metrics. This only happens once, because this is a re-load change only.
+ cacheCapacity.WithLabelValues("signature").Set(float64(capacity))
+
return nil
}
diff --git a/middleware/errors/errors.go b/middleware/errors/errors.go
index afaa8cf4d..aca05b54a 100644
--- a/middleware/errors/errors.go
+++ b/middleware/errors/errors.go
@@ -48,6 +48,8 @@ func (h errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns
return rcode, err
}
+func (h errorHandler) Name() string { return "errors" }
+
func (h errorHandler) recovery(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) {
rec := recover()
if rec == nil {
diff --git a/middleware/etcd/handler.go b/middleware/etcd/handler.go
index 75aa582bd..7c856bd8b 100644
--- a/middleware/etcd/handler.go
+++ b/middleware/etcd/handler.go
@@ -117,6 +117,8 @@ func (e *Etcd) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
return dns.RcodeSuccess, nil
}
+func (e *Etcd) Name() string { return "etcd" }
+
// Err write an error response to the client.
func (e *Etcd) Err(zone string, rcode int, state request.Request, debug []msg.Service, err error, opt Options) (int, error) {
m := new(dns.Msg)
diff --git a/middleware/file/file.go b/middleware/file/file.go
index 90d15af79..7c6a65ced 100644
--- a/middleware/file/file.go
+++ b/middleware/file/file.go
@@ -110,6 +110,8 @@ func (f File) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (i
return dns.RcodeSuccess, nil
}
+func (f File) Name() string { return "file" }
+
// Parse parses the zone in filename and returns a new Zone or an error.
func Parse(f io.Reader, origin, fileName string) (*Zone, error) {
tokens := dns.ParseZone(f, dns.Fqdn(origin), fileName)
diff --git a/middleware/kubernetes/handler.go b/middleware/kubernetes/handler.go
index 1b1abf44b..cc59a305f 100644
--- a/middleware/kubernetes/handler.go
+++ b/middleware/kubernetes/handler.go
@@ -101,6 +101,8 @@ func (k Kubernetes) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.M
return dns.RcodeSuccess, nil
}
+func (k Kubernetes) Name() string { return "kubernetes" }
+
// Err writes an error response back to the client.
func (k Kubernetes) Err(zone string, rcode int, state request.Request) (int, error) {
m := new(dns.Msg)
diff --git a/middleware/loadbalance/handler.go b/middleware/loadbalance/handler.go
index 151eb57d0..8cba67e6f 100644
--- a/middleware/loadbalance/handler.go
+++ b/middleware/loadbalance/handler.go
@@ -18,3 +18,5 @@ func (rr RoundRobin) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.
wrr := &RoundRobinResponseWriter{w}
return rr.Next.ServeDNS(ctx, wrr, r)
}
+
+func (rr RoundRobin) Name() string { return "loadbalance" }
diff --git a/middleware/log/log.go b/middleware/log/log.go
index dba76341e..0a3cca130 100644
--- a/middleware/log/log.go
+++ b/middleware/log/log.go
@@ -6,7 +6,7 @@ import (
"time"
"github.com/miekg/coredns/middleware"
- "github.com/miekg/coredns/middleware/metrics"
+ "github.com/miekg/coredns/middleware/metrics/vars"
"github.com/miekg/coredns/middleware/pkg/dnsrecorder"
"github.com/miekg/coredns/middleware/pkg/rcode"
"github.com/miekg/coredns/middleware/pkg/replacer"
@@ -45,7 +45,7 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
answer.SetRcode(r, rc)
state.SizeAndDo(answer)
- metrics.Report(state, metrics.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
+ vars.Report(state, vars.Dropped, rcode.ToString(rc), answer.Len(), time.Now())
w.WriteMsg(answer)
}
@@ -64,6 +64,8 @@ func (l Logger) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
return l.Next.ServeDNS(ctx, w, r)
}
+func (l Logger) Name() string { return "log" }
+
// Rule configures the logging middleware.
type Rule struct {
NameScope string
diff --git a/middleware/metrics/README.md b/middleware/metrics/README.md
index fb0e6d3aa..46dd34296 100644
--- a/middleware/metrics/README.md
+++ b/middleware/metrics/README.md
@@ -7,12 +7,10 @@ The following metrics are exported:
* coredns_dns_request_count_total{zone, proto, family}
* coredns_dns_request_duration_milliseconds{zone}
-* coredns_dns_request_size_bytes{zone,, proto}
-* coredns_dns_request_transfer_size_bytes{zone,, proto}
+* coredns_dns_request_size_bytes{zone, proto}
* coredns_dns_request_do_count_total{zone}
* coredns_dns_request_type_count_total{zone, type}
* coredns_dns_response_size_bytes{zone, proto}
-* coredns_dns_response_transfer_size_bytes{zone, proto}
* coredns_dns_response_rcode_count_total{zone, rcode}
Each counter has a label `zone` which is the zonename used for the request/response.
@@ -27,10 +25,7 @@ Extra labels used are:
* The `response_rcode_count_total` has an extra label `rcode` which holds the rcode of the response.
If monitoring is enabled, queries that do not enter the middleware chain are exported under the fake
-domain "dropped" (without a closing dot).
-
-Restarting CoreDNS will stop the monitoring. This is a bug. Also [this upstream
-Caddy bug](https://github.com/mholt/caddy/issues/675).
+name "dropped" (without a closing dot - this is never a valid domain name).
## Syntax
@@ -44,3 +39,9 @@ It optionally takes an address to which the metrics are exported; the default
is `localhost:9153`. The metrics path is fixed to `/metrics`.
## Examples
+
+Use an alternative address:
+
+~~~
+prometheus localhost:9253
+~~~
diff --git a/middleware/metrics/handler.go b/middleware/metrics/handler.go
index a0247c517..4c235ab6a 100644
--- a/middleware/metrics/handler.go
+++ b/middleware/metrics/handler.go
@@ -1,9 +1,8 @@
package metrics
import (
- "time"
-
"github.com/miekg/coredns/middleware"
+ "github.com/miekg/coredns/middleware/metrics/vars"
"github.com/miekg/coredns/middleware/pkg/dnsrecorder"
"github.com/miekg/coredns/middleware/pkg/rcode"
"github.com/miekg/coredns/request"
@@ -17,7 +16,7 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
state := request.Request{W: w, Req: r}
qname := state.QName()
- zone := middleware.Zones(m.ZoneNames).Matches(qname)
+ zone := middleware.Zones(m.ZoneNames()).Matches(qname)
if zone == "" {
zone = "."
}
@@ -26,71 +25,9 @@ func (m *Metrics) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
rw := dnsrecorder.New(w)
status, err := m.Next.ServeDNS(ctx, rw, r)
- Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start)
+ vars.Report(state, zone, rcode.ToString(rw.Rcode), rw.Size, rw.Start)
return status, err
}
-// Report is a plain reporting function that the server can use for REFUSED and other
-// queries that are turned down because they don't match any middleware.
-func Report(req request.Request, zone, rcode string, size int, start time.Time) {
- if requestCount == nil {
- // no metrics are enabled
- return
- }
-
- // Proto and Family
- net := req.Proto()
- fam := "1"
- if req.Family() == 2 {
- fam = "2"
- }
-
- typ := req.QType()
-
- requestCount.WithLabelValues(zone, net, fam).Inc()
- requestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
-
- if req.Do() {
- requestDo.WithLabelValues(zone).Inc()
- }
-
- if _, known := monitorType[typ]; known {
- requestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
- } else {
- requestType.WithLabelValues(zone, other).Inc()
- }
-
- if typ == dns.TypeIXFR || typ == dns.TypeAXFR {
- responseTransferSize.WithLabelValues(zone, net).Observe(float64(size))
- requestTransferSize.WithLabelValues(zone, net).Observe(float64(req.Size()))
- } else {
- responseSize.WithLabelValues(zone, net).Observe(float64(size))
- requestSize.WithLabelValues(zone, net).Observe(float64(req.Size()))
- }
-
- responseRcode.WithLabelValues(zone, rcode).Inc()
-}
-
-var monitorType = map[uint16]bool{
- dns.TypeAAAA: true,
- dns.TypeA: true,
- dns.TypeCNAME: true,
- dns.TypeDNSKEY: true,
- dns.TypeDS: true,
- dns.TypeMX: true,
- dns.TypeNSEC3: true,
- dns.TypeNSEC: true,
- dns.TypeNS: true,
- dns.TypePTR: true,
- dns.TypeRRSIG: true,
- dns.TypeSOA: true,
- dns.TypeSRV: true,
- dns.TypeTXT: true,
- // Meta Qtypes
- dns.TypeIXFR: true,
- dns.TypeAXFR: true,
- dns.TypeANY: true,
-}
-
-const other = "other"
+func (m *Metrics) Name() string { return "prometheus" }
diff --git a/middleware/metrics/metrics.go b/middleware/metrics/metrics.go
index b82a2be86..5f93ec2f5 100644
--- a/middleware/metrics/metrics.go
+++ b/middleware/metrics/metrics.go
@@ -1,5 +1,4 @@
-// Package metrics implement a handler and middleware that provides Prometheus
-// metrics.
+// Package metrics implement a handler and middleware that provides Prometheus metrics.
package metrics
import (
@@ -9,37 +8,51 @@ import (
"sync"
"github.com/miekg/coredns/middleware"
+ "github.com/miekg/coredns/middleware/metrics/vars"
"github.com/prometheus/client_golang/prometheus"
)
-var (
- requestCount *prometheus.CounterVec
- requestDuration *prometheus.HistogramVec
- requestSize *prometheus.HistogramVec
- requestTransferSize *prometheus.HistogramVec
- requestDo *prometheus.CounterVec
- requestType *prometheus.CounterVec
-
- responseSize *prometheus.HistogramVec
- responseTransferSize *prometheus.HistogramVec
- responseRcode *prometheus.CounterVec
-)
-
// Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics
type Metrics struct {
- Next middleware.Handler
- Addr string
- ln net.Listener
- mux *http.ServeMux
- Once sync.Once
- ZoneNames []string
+ Next middleware.Handler
+ Addr string
+ ln net.Listener
+ mux *http.ServeMux
+ Once sync.Once
+
+ zoneNames []string
+ zoneMap map[string]bool
+ zoneMu sync.RWMutex
+}
+
+// AddZone adds zone z to m.
+func (m *Metrics) AddZone(z string) {
+ m.zoneMu.Lock()
+ m.zoneMap[z] = true
+ m.zoneNames = keys(m.zoneMap)
+ m.zoneMu.Unlock()
+}
+
+// RemoveZone remove zone z from m.
+func (m *Metrics) RemoveZone(z string) {
+ m.zoneMu.Lock()
+ delete(m.zoneMap, z)
+ m.zoneNames = keys(m.zoneMap)
+ m.zoneMu.Unlock()
+}
+
+// ZoneNames returns the zones of m.
+func (m *Metrics) ZoneNames() []string {
+ m.zoneMu.RLock()
+ s := m.zoneNames
+ m.zoneMu.RUnlock()
+ return s
}
// OnStartup sets up the metrics on startup.
func (m *Metrics) OnStartup() error {
m.Once.Do(func() {
- define()
ln, err := net.Listen("tcp", m.Addr)
if err != nil {
@@ -51,18 +64,16 @@ func (m *Metrics) OnStartup() error {
m.mux = http.NewServeMux()
- prometheus.MustRegister(requestCount)
- prometheus.MustRegister(requestDuration)
- prometheus.MustRegister(requestSize)
- prometheus.MustRegister(requestTransferSize)
- prometheus.MustRegister(requestDo)
- prometheus.MustRegister(requestType)
+ prometheus.MustRegister(vars.RequestCount)
+ prometheus.MustRegister(vars.RequestDuration)
+ prometheus.MustRegister(vars.RequestSize)
+ prometheus.MustRegister(vars.RequestDo)
+ prometheus.MustRegister(vars.RequestType)
- prometheus.MustRegister(responseSize)
- prometheus.MustRegister(responseTransferSize)
- prometheus.MustRegister(responseRcode)
+ prometheus.MustRegister(vars.ResponseSize)
+ prometheus.MustRegister(vars.ResponseRcode)
- m.mux.Handle(path, prometheus.Handler())
+ m.mux.Handle("/metrics", prometheus.Handler())
go func() {
http.Serve(m.ln, m.mux)
@@ -79,79 +90,10 @@ func (m *Metrics) OnShutdown() error {
return nil
}
-func define() {
- requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
- Namespace: middleware.Namespace,
- Subsystem: subsystem,
- Name: "request_count_total",
- Help: "Counter of DNS requests made per zone, protocol and family.",
- }, []string{"zone", "proto", "family"})
-
- requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
- Namespace: middleware.Namespace,
- Subsystem: subsystem,
- Name: "request_duration_milliseconds",
- Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000}...),
- Help: "Histogram of the time (in milliseconds) each request took.",
- }, []string{"zone"})
-
- requestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
- Namespace: middleware.Namespace,
- Subsystem: subsystem,
- Name: "request_size_bytes",
- Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
- Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
- }, []string{"zone", "proto"})
-
- requestTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
- Namespace: middleware.Namespace,
- Subsystem: subsystem,
- Name: "request_transfer_size_bytes",
- Help: "Size of the incoming zone transfer in bytes.",
- Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
- }, []string{"zone", "proto"})
-
- requestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
- Namespace: middleware.Namespace,
- Subsystem: subsystem,
- Name: "request_do_count_total",
- Help: "Counter of DNS requests with DO bit set per zone.",
- }, []string{"zone"})
-
- requestType = prometheus.NewCounterVec(prometheus.CounterOpts{
- Namespace: middleware.Namespace,
- Subsystem: subsystem,
- Name: "request_type_count_total",
- Help: "Counter of DNS requests per type, per zone.",
- }, []string{"zone", "type"})
-
- responseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
- Namespace: middleware.Namespace,
- Subsystem: subsystem,
- Name: "response_size_bytes",
- Help: "Size of the returned response in bytes.",
- Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
- }, []string{"zone", "proto"})
-
- responseTransferSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
- Namespace: middleware.Namespace,
- Subsystem: subsystem,
- Name: "response_transfer_size_bytes",
- Help: "Size of the returned zone transfer in bytes.",
- Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
- }, []string{"zone", "proto"})
-
- responseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
- Namespace: middleware.Namespace,
- Subsystem: subsystem,
- Name: "response_rcode_count_total",
- Help: "Counter of response status codes.",
- }, []string{"zone", "rcode"})
+func keys(m map[string]bool) []string {
+ sx := []string{}
+ for k := range m {
+ sx = append(sx, k)
+ }
+ return sx
}
-
-const (
- // Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone.
- Dropped = "dropped"
- subsystem = "dns"
- path = "/metrics"
-)
diff --git a/middleware/metrics/metrics_test.go b/middleware/metrics/metrics_test.go
new file mode 100644
index 000000000..4272350f4
--- /dev/null
+++ b/middleware/metrics/metrics_test.go
@@ -0,0 +1,83 @@
+package metrics
+
+import (
+ "testing"
+
+ "github.com/miekg/coredns/middleware"
+ mtest "github.com/miekg/coredns/middleware/metrics/test"
+ "github.com/miekg/coredns/middleware/pkg/dnsrecorder"
+ "github.com/miekg/coredns/middleware/test"
+
+ "github.com/miekg/dns"
+ "golang.org/x/net/context"
+)
+
+func TestMetrics(t *testing.T) {
+ met := &Metrics{Addr: Addr, zoneMap: make(map[string]bool)}
+ if err := met.OnStartup(); err != nil {
+ t.Fatalf("Failed to start metrics handler: %s", err)
+ }
+ defer met.OnShutdown()
+
+ met.AddZone("example.org.")
+
+ tests := []struct {
+ next middleware.Handler
+ qname string
+ qtype uint16
+ metric string
+ expectedValue string
+ }{
+ // This all works because 1 bucket (1 zone, 1 type)
+ {
+ next: test.NextHandler(dns.RcodeSuccess, nil),
+ qname: "example.org",
+ metric: "coredns_dns_request_count_total",
+ expectedValue: "1",
+ },
+ {
+ next: test.NextHandler(dns.RcodeSuccess, nil),
+ qname: "example.org",
+ metric: "coredns_dns_request_count_total",
+ expectedValue: "2",
+ },
+ {
+ next: test.NextHandler(dns.RcodeSuccess, nil),
+ qname: "example.org",
+ metric: "coredns_dns_request_type_count_total",
+ expectedValue: "3",
+ },
+ {
+ next: test.NextHandler(dns.RcodeSuccess, nil),
+ qname: "example.org",
+ metric: "coredns_dns_response_rcode_count_total",
+ expectedValue: "4",
+ },
+ }
+
+ ctx := context.TODO()
+
+ for i, tc := range tests {
+ req := new(dns.Msg)
+ if tc.qtype == 0 {
+ tc.qtype = dns.TypeA
+ }
+ req.SetQuestion(dns.Fqdn(tc.qname), tc.qtype)
+ met.Next = tc.next
+
+ rec := dnsrecorder.New(&test.ResponseWriter{})
+ _, err := met.ServeDNS(ctx, rec, req)
+ if err != nil {
+ t.Fatalf("Test %d: Expected no error, but got %s", i, err)
+ }
+
+ result := mtest.Scrape(t, "http://"+Addr+"/metrics")
+
+ if tc.expectedValue != "" {
+ got, _ := mtest.MetricValue(tc.metric, result)
+ if got != tc.expectedValue {
+ t.Errorf("Test %d: Expected value %s for metrics %s, but got %s", i, tc.expectedValue, tc.metric, got)
+ }
+ }
+ }
+}
diff --git a/middleware/metrics/setup.go b/middleware/metrics/setup.go
index 8c8dd1a75..93a6bf50a 100644
--- a/middleware/metrics/setup.go
+++ b/middleware/metrics/setup.go
@@ -38,18 +38,17 @@ func setup(c *caddy.Controller) error {
func prometheusParse(c *caddy.Controller) (*Metrics, error) {
var (
- met = &Metrics{Addr: addr}
+ met = &Metrics{Addr: Addr, zoneMap: make(map[string]bool)}
err error
)
for c.Next() {
- if len(met.ZoneNames) > 0 {
- return met, c.Err("metrics: can only have one metrics module per server")
+ if len(met.ZoneNames()) > 0 {
+ return met, c.Err("can only have one metrics module per server")
}
- met.ZoneNames = make([]string, len(c.ServerBlockKeys))
- copy(met.ZoneNames, c.ServerBlockKeys)
- for i := range met.ZoneNames {
- met.ZoneNames[i] = middleware.Host(met.ZoneNames[i]).Normalize()
+
+ for _, z := range c.ServerBlockKeys {
+ met.AddZone(middleware.Host(z).Normalize())
}
args := c.RemainingArgs()
@@ -78,7 +77,7 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) {
return met, e
}
default:
- return met, c.Errf("metrics: unknown item: %s", c.Val())
+ return met, c.Errf("unknown item: %s", c.Val())
}
}
@@ -88,4 +87,4 @@ func prometheusParse(c *caddy.Controller) (*Metrics, error) {
var metricsOnce sync.Once
-const addr = "localhost:9153"
+const Addr = "localhost:9153"
diff --git a/middleware/metrics/test/scrape.go b/middleware/metrics/test/scrape.go
new file mode 100644
index 000000000..d64bef96f
--- /dev/null
+++ b/middleware/metrics/test/scrape.go
@@ -0,0 +1,225 @@
+// Adapted by Miek Gieben for CoreDNS testing.
+//
+// License from prom2json
+// Copyright 2014 Prometheus Team
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package test will scrape a target and you can inspect the variables.
+// Basic usage:
+//
+// result := Scrape("http://localhost:9153/metrics")
+// v := MetricValue("coredns_cache_capacity_gauge", result)
+//
+package test
+
+import (
+ "fmt"
+ "io"
+ "mime"
+ "net/http"
+ "testing"
+
+ "github.com/matttproud/golang_protobuf_extensions/pbutil"
+ "github.com/prometheus/common/expfmt"
+
+ dto "github.com/prometheus/client_model/go"
+)
+
+type (
+ // MetricFamily holds a prometheus metric.
+ MetricFamily struct {
+ Name string `json:"name"`
+ Help string `json:"help"`
+ Type string `json:"type"`
+ Metrics []interface{} `json:"metrics,omitempty"` // Either metric or summary.
+ }
+
+ // metric is for all "single value" metrics.
+ metric struct {
+ Labels map[string]string `json:"labels,omitempty"`
+ Value string `json:"value"`
+ }
+
+ summary struct {
+ Labels map[string]string `json:"labels,omitempty"`
+ Quantiles map[string]string `json:"quantiles,omitempty"`
+ Count string `json:"count"`
+ Sum string `json:"sum"`
+ }
+
+ histogram struct {
+ Labels map[string]string `json:"labels,omitempty"`
+ Buckets map[string]string `json:"buckets,omitempty"`
+ Count string `json:"count"`
+ Sum string `json:"sum"`
+ }
+)
+
+// Scrape returns the all the vars a []*metricFamily.
+func Scrape(t *testing.T, url string) []*MetricFamily {
+ mfChan := make(chan *dto.MetricFamily, 1024)
+
+ go fetchMetricFamilies(t, url, mfChan)
+
+ result := []*MetricFamily{}
+ for mf := range mfChan {
+ result = append(result, newMetricFamily(mf))
+ }
+ return result
+}
+
+// MetricValue returns the value associated with name as a string as well as the labels.
+// It only returns the first metrics of the slice.
+func MetricValue(name string, mfs []*MetricFamily) (string, map[string]string) {
+ for _, mf := range mfs {
+ if mf.Name == name {
+ // Only works with Gauge and Counter...
+ return mf.Metrics[0].(metric).Value, mf.Metrics[0].(metric).Labels
+ }
+ }
+ return "", nil
+}
+
+// MetricValueLabel returns the value for name *and* label *value*.
+func MetricValueLabel(name, label string, mfs []*MetricFamily) (string, map[string]string) {
+ // bit hacky is this really handy...?
+ for _, mf := range mfs {
+ if mf.Name == name {
+ for _, m := range mf.Metrics {
+ for _, v := range m.(metric).Labels {
+ if v == label {
+ return m.(metric).Value, m.(metric).Labels
+ }
+ }
+
+ }
+ }
+ }
+ return "", nil
+}
+
+func newMetricFamily(dtoMF *dto.MetricFamily) *MetricFamily {
+ mf := &MetricFamily{
+ Name: dtoMF.GetName(),
+ Help: dtoMF.GetHelp(),
+ Type: dtoMF.GetType().String(),
+ Metrics: make([]interface{}, len(dtoMF.Metric)),
+ }
+ for i, m := range dtoMF.Metric {
+ if dtoMF.GetType() == dto.MetricType_SUMMARY {
+ mf.Metrics[i] = summary{
+ Labels: makeLabels(m),
+ Quantiles: makeQuantiles(m),
+ Count: fmt.Sprint(m.GetSummary().GetSampleCount()),
+ Sum: fmt.Sprint(m.GetSummary().GetSampleSum()),
+ }
+ } else if dtoMF.GetType() == dto.MetricType_HISTOGRAM {
+ mf.Metrics[i] = histogram{
+ Labels: makeLabels(m),
+ Buckets: makeBuckets(m),
+ Count: fmt.Sprint(m.GetHistogram().GetSampleCount()),
+ Sum: fmt.Sprint(m.GetSummary().GetSampleSum()),
+ }
+ } else {
+ mf.Metrics[i] = metric{
+ Labels: makeLabels(m),
+ Value: fmt.Sprint(value(m)),
+ }
+ }
+ }
+ return mf
+}
+
+func value(m *dto.Metric) float64 {
+ if m.Gauge != nil {
+ return m.GetGauge().GetValue()
+ }
+ if m.Counter != nil {
+ return m.GetCounter().GetValue()
+ }
+ if m.Untyped != nil {
+ return m.GetUntyped().GetValue()
+ }
+ return 0.
+}
+
+func makeLabels(m *dto.Metric) map[string]string {
+ result := map[string]string{}
+ for _, lp := range m.Label {
+ result[lp.GetName()] = lp.GetValue()
+ }
+ return result
+}
+
+func makeQuantiles(m *dto.Metric) map[string]string {
+ result := map[string]string{}
+ for _, q := range m.GetSummary().Quantile {
+ result[fmt.Sprint(q.GetQuantile())] = fmt.Sprint(q.GetValue())
+ }
+ return result
+}
+
+func makeBuckets(m *dto.Metric) map[string]string {
+ result := map[string]string{}
+ for _, b := range m.GetHistogram().Bucket {
+ result[fmt.Sprint(b.GetUpperBound())] = fmt.Sprint(b.GetCumulativeCount())
+ }
+ return result
+}
+
+func fetchMetricFamilies(t *testing.T, url string, ch chan<- *dto.MetricFamily) {
+ defer close(ch)
+ req, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ t.Fatalf("creating GET request for URL %q failed: %s", url, err)
+ }
+ req.Header.Add("Accept", acceptHeader)
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ t.Fatalf("executing GET request for URL %q failed: %s", url, err)
+ }
+ defer resp.Body.Close()
+ if resp.StatusCode != http.StatusOK {
+ t.Fatalf("GET request for URL %q returned HTTP status %s", url, resp.Status)
+ }
+
+ mediatype, params, err := mime.ParseMediaType(resp.Header.Get("Content-Type"))
+ if err == nil && mediatype == "application/vnd.google.protobuf" &&
+ params["encoding"] == "delimited" &&
+ params["proto"] == "io.prometheus.client.MetricFamily" {
+ for {
+ mf := &dto.MetricFamily{}
+ if _, err = pbutil.ReadDelimited(resp.Body, mf); err != nil {
+ if err == io.EOF {
+ break
+ }
+ t.Fatalf("reading metric family protocol buffer failed: %s", err)
+ }
+ ch <- mf
+ }
+ } else {
+ // We could do further content-type checks here, but the
+ // fallback for now will anyway be the text format
+ // version 0.0.4, so just go for it and see if it works.
+ var parser expfmt.TextParser
+ metricFamilies, err := parser.TextToMetricFamilies(resp.Body)
+ if err != nil {
+ t.Fatal("reading text format failed:", err)
+ }
+ for _, mf := range metricFamilies {
+ ch <- mf
+ }
+ }
+}
+
+const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3`
diff --git a/middleware/metrics/vars/report.go b/middleware/metrics/vars/report.go
new file mode 100644
index 000000000..996826599
--- /dev/null
+++ b/middleware/metrics/vars/report.go
@@ -0,0 +1,62 @@
+package vars
+
+import (
+ "time"
+
+ "github.com/miekg/coredns/request"
+
+ "github.com/miekg/dns"
+)
+
+// Report reports the metrics data associcated with request.
+func Report(req request.Request, zone, rcode string, size int, start time.Time) {
+ // Proto and Family
+ net := req.Proto()
+ fam := "1"
+ if req.Family() == 2 {
+ fam = "2"
+ }
+
+ typ := req.QType()
+
+ RequestCount.WithLabelValues(zone, net, fam).Inc()
+ RequestDuration.WithLabelValues(zone).Observe(float64(time.Since(start) / time.Millisecond))
+
+ if req.Do() {
+ RequestDo.WithLabelValues(zone).Inc()
+ }
+
+ if _, known := monitorType[typ]; known {
+ RequestType.WithLabelValues(zone, dns.Type(typ).String()).Inc()
+ } else {
+ RequestType.WithLabelValues(zone, other).Inc()
+ }
+
+ ResponseSize.WithLabelValues(zone, net).Observe(float64(size))
+ RequestSize.WithLabelValues(zone, net).Observe(float64(req.Size()))
+
+ ResponseRcode.WithLabelValues(zone, rcode).Inc()
+}
+
+var monitorType = map[uint16]bool{
+ dns.TypeAAAA: true,
+ dns.TypeA: true,
+ dns.TypeCNAME: true,
+ dns.TypeDNSKEY: true,
+ dns.TypeDS: true,
+ dns.TypeMX: true,
+ dns.TypeNSEC3: true,
+ dns.TypeNSEC: true,
+ dns.TypeNS: true,
+ dns.TypePTR: true,
+ dns.TypeRRSIG: true,
+ dns.TypeSOA: true,
+ dns.TypeSRV: true,
+ dns.TypeTXT: true,
+ // Meta Qtypes
+ dns.TypeIXFR: true,
+ dns.TypeAXFR: true,
+ dns.TypeANY: true,
+}
+
+const other = "other"
diff --git a/middleware/metrics/vars/vars.go b/middleware/metrics/vars/vars.go
new file mode 100644
index 000000000..935b6f810
--- /dev/null
+++ b/middleware/metrics/vars/vars.go
@@ -0,0 +1,68 @@
+package vars
+
+import (
+ "github.com/miekg/coredns/middleware"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+var (
+ RequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
+ Namespace: middleware.Namespace,
+ Subsystem: subsystem,
+ Name: "request_count_total",
+ Help: "Counter of DNS requests made per zone, protocol and family.",
+ }, []string{"zone", "proto", "family"})
+
+ RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+ Namespace: middleware.Namespace,
+ Subsystem: subsystem,
+ Name: "request_duration_milliseconds",
+ Buckets: append(prometheus.DefBuckets, []float64{50, 100, 200, 500, 1000, 2000, 3000, 4000, 5000, 10000}...),
+ Help: "Histogram of the time (in milliseconds) each request took.",
+ }, []string{"zone"})
+
+ RequestSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+ Namespace: middleware.Namespace,
+ Subsystem: subsystem,
+ Name: "request_size_bytes",
+ Help: "Size of the EDNS0 UDP buffer in bytes (64K for TCP).",
+ Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
+ }, []string{"zone", "proto"})
+
+ RequestDo = prometheus.NewCounterVec(prometheus.CounterOpts{
+ Namespace: middleware.Namespace,
+ Subsystem: subsystem,
+ Name: "request_do_count_total",
+ Help: "Counter of DNS requests with DO bit set per zone.",
+ }, []string{"zone"})
+
+ RequestType = prometheus.NewCounterVec(prometheus.CounterOpts{
+ Namespace: middleware.Namespace,
+ Subsystem: subsystem,
+ Name: "request_type_count_total",
+ Help: "Counter of DNS requests per type, per zone.",
+ }, []string{"zone", "type"})
+
+ ResponseSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+ Namespace: middleware.Namespace,
+ Subsystem: subsystem,
+ Name: "response_size_bytes",
+ Help: "Size of the returned response in bytes.",
+ Buckets: []float64{0, 100, 200, 300, 400, 511, 1023, 2047, 4095, 8291, 16e3, 32e3, 48e3, 64e3},
+ }, []string{"zone", "proto"})
+
+ ResponseRcode = prometheus.NewCounterVec(prometheus.CounterOpts{
+ Namespace: middleware.Namespace,
+ Subsystem: subsystem,
+ Name: "response_rcode_count_total",
+ Help: "Counter of response status codes.",
+ }, []string{"zone", "rcode"})
+)
+
+const (
+ subsystem = "dns"
+
+ // Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone.
+ Dropped = "dropped"
+)
diff --git a/middleware/middleware.go b/middleware/middleware.go
index 804209fa9..bdc8bc2fe 100644
--- a/middleware/middleware.go
+++ b/middleware/middleware.go
@@ -45,6 +45,7 @@ type (
// chain by returning them unchanged.
Handler interface {
ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error)
+ Name() string
}
// HandlerFunc is a convenience type like dns.HandlerFunc, except
@@ -58,6 +59,8 @@ func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.
return f(ctx, w, r)
}
+func (f HandlerFunc) Name() string { return "handlerfunc" }
+
// Error returns err with 'middleware/name: ' prefixed to it.
func Error(name string, err error) error { return fmt.Errorf("%s/%s: %s", "middleware", name, err) }
diff --git a/middleware/normalize.go b/middleware/normalize.go
index 2d153ba78..87f3ce703 100644
--- a/middleware/normalize.go
+++ b/middleware/normalize.go
@@ -51,7 +51,7 @@ func (n Name) Normalize() string { return strings.ToLower(dns.Fqdn(string(n))) }
type (
// Host represents a host from the Corefile, may contain port.
Host string // Host represents a host from the Corefile, may contain port.
- // Addr resprents an address in the Corefile.
+ // Addr represents an address in the Corefile.
Addr string // Addr resprents an address in the Corefile.
)
diff --git a/middleware/proxy/proxy.go b/middleware/proxy/proxy.go
index a33024f12..1ff0a2f30 100644
--- a/middleware/proxy/proxy.go
+++ b/middleware/proxy/proxy.go
@@ -102,5 +102,7 @@ func (p Proxy) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (
return p.Next.ServeDNS(ctx, w, r)
}
+func (p Proxy) Name() string { return "proxy" }
+
// defaultTimeout is the default networking timeout for DNS requests.
const defaultTimeout = 5 * time.Second
diff --git a/middleware/rewrite/rewrite.go b/middleware/rewrite/rewrite.go
index c88fa3549..2498d4c34 100644
--- a/middleware/rewrite/rewrite.go
+++ b/middleware/rewrite/rewrite.go
@@ -52,6 +52,8 @@ func (rw Rewrite) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
return rw.Next.ServeDNS(ctx, w, r)
}
+func (rw Rewrite) Name() string { return "rewrite" }
+
// Rule describes an internal location rewrite rule.
type Rule interface {
// Rewrite rewrites the internal location of the current request.
diff --git a/middleware/test/helpers.go b/middleware/test/helpers.go
index 157bdce66..0137171a7 100644
--- a/middleware/test/helpers.go
+++ b/middleware/test/helpers.go
@@ -283,6 +283,7 @@ type (
// Handler interface defines a middleware.
Handler interface {
ServeDNS(context.Context, dns.ResponseWriter, *dns.Msg) (int, error)
+ Name() string
}
)
@@ -290,3 +291,5 @@ type (
func (f HandlerFunc) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
return f(ctx, w, r)
}
+
+func (f HandlerFunc) Name() string { return "handlerfunc" }
diff --git a/middleware/whoami/whoami.go b/middleware/whoami/whoami.go
index ccf8aefe1..0ef63325f 100644
--- a/middleware/whoami/whoami.go
+++ b/middleware/whoami/whoami.go
@@ -55,3 +55,5 @@ func (wh Whoami) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
return 0, nil
}
+
+func (wh Whoami) Name() string { return "whoami" }
diff --git a/test/metrics_test.go b/test/metrics_test.go
index 85cb2a824..d122f2545 100644
--- a/test/metrics_test.go
+++ b/test/metrics_test.go
@@ -1,12 +1,84 @@
package test
-import "testing"
+import (
+ "io/ioutil"
+ "log"
+ "os"
+ "path"
+ "testing"
+ "time"
+
+ "github.com/miekg/coredns/middleware/cache"
+ "github.com/miekg/coredns/middleware/metrics"
+ mtest "github.com/miekg/coredns/middleware/metrics/test"
+ "github.com/miekg/coredns/middleware/metrics/vars"
+
+ "github.com/miekg/dns"
+)
// Start test server that has metrics enabled. Then tear it down again.
func TestMetricsServer(t *testing.T) {
- corefile := `.:0 {
+ corefile := `example.org:0 {
chaos CoreDNS-001 miek@miek.nl
- prometheus localhost:0
+ prometheus
+}
+
+example.com:0 {
+ proxy . 8.8.4.4:53
+ prometheus
+}
+`
+ srv, err := CoreDNSServer(corefile)
+ if err != nil {
+ t.Fatalf("Could not get CoreDNS serving instance: %s", err)
+ }
+ defer srv.Stop()
+}
+
+func TestMetricsRefused(t *testing.T) {
+ metricName := "coredns_dns_response_rcode_count_total"
+
+ corefile := `example.org:0 {
+ proxy . 8.8.8.8:53
+ prometheus
+}
+`
+ srv, err := CoreDNSServer(corefile)
+ if err != nil {
+ t.Fatalf("Could not get CoreDNS serving instance: %s", err)
+ }
+ defer srv.Stop()
+
+ udp, _ := CoreDNSServerPorts(srv, 0)
+
+ m := new(dns.Msg)
+ m.SetQuestion("google.com.", dns.TypeA)
+
+ if _, err = dns.Exchange(m, udp); err != nil {
+ t.Fatalf("Could not send message: %s", err)
+ }
+
+ data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
+ got, labels := mtest.MetricValue(metricName, data)
+
+ if got != "1" {
+ t.Errorf("Expected value %s for refused, but got %s", "1", got)
+ }
+ if labels["zone"] != vars.Dropped {
+ t.Errorf("Expected zone value %s for refused, but got %s", vars.Dropped, labels["zone"])
+ }
+ if labels["rcode"] != "REFUSED" {
+ t.Errorf("Expected zone value %s for refused, but got %s", "REFUSED", labels["rcode"])
+ }
+}
+
+func TestMetricsCache(t *testing.T) {
+ metricName := "coredns_cache_size_guage"
+
+ corefile := `example.net:0 {
+ proxy . 8.8.8.8:53
+ prometheus
+ cache
}
`
srv, err := CoreDNSServer(corefile)
@@ -14,4 +86,88 @@ func TestMetricsServer(t *testing.T) {
t.Fatalf("Could not get CoreDNS serving instance: %s", err)
}
defer srv.Stop()
+
+ udp, _ := CoreDNSServerPorts(srv, 0)
+
+ m := new(dns.Msg)
+ m.SetQuestion("www.example.net.", dns.TypeA)
+
+ if _, err = dns.Exchange(m, udp); err != nil {
+ t.Fatalf("Could not send message: %s", err)
+ }
+
+ data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
+ // Get the value for the metrics where the one of the labels values matches "success"
+ got, _ := mtest.MetricValueLabel(metricName, cache.Success, data)
+
+ if got != "1" {
+ t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
+ }
+}
+
+func TestMetricsAuto(t *testing.T) {
+ tmpdir, err := ioutil.TempDir(os.TempDir(), "coredns")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // TODO(miek): Random port as string and use that later?
+ corefile := `org:0 {
+ auto {
+ directory ` + tmpdir + ` db\.(.*) {1} 1
+ }
+ prometheus
+ }
+`
+
+ i, err := CoreDNSServer(corefile)
+ if err != nil {
+ t.Fatalf("Could not get CoreDNS serving instance: %s", err)
+ }
+
+ udp, _ := CoreDNSServerPorts(i, 0)
+ if udp == "" {
+ t.Fatalf("Could not get UDP listening port")
+ }
+ defer i.Stop()
+
+ log.SetOutput(ioutil.Discard)
+
+ // Write db.example.org to get example.org.
+ if err = ioutil.WriteFile(path.Join(tmpdir, "db.example.org"), []byte(zoneContent), 0644); err != nil {
+ t.Fatal(err)
+ }
+ // TODO(miek): make the auto sleep even less.
+ time.Sleep(1100 * time.Millisecond) // wait for it to be picked up
+
+ m := new(dns.Msg)
+ m.SetQuestion("www.example.org.", dns.TypeA)
+
+ if _, err := dns.Exchange(m, udp); err != nil {
+ t.Fatalf("Could not send message: %s", err)
+ }
+
+ metricName := "coredns_dns_request_count_total" //{zone, proto, family}
+
+ data := mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
+ // Get the value for the metrics where the one of the labels values matches "example.org."
+ got, _ := mtest.MetricValueLabel(metricName, "example.org.", data)
+
+ if got != "1" {
+ t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
+ }
+
+ // Remove db.example.org again. And see if the metric stops increasing.
+ os.Remove(path.Join(tmpdir, "db.example.org"))
+ time.Sleep(1100 * time.Millisecond) // wait for it to be picked up
+ if _, err := dns.Exchange(m, udp); err != nil {
+ t.Fatalf("Could not send message: %s", err)
+ }
+
+ data = mtest.Scrape(t, "http://"+metrics.Addr+"/metrics")
+ got, _ = mtest.MetricValueLabel(metricName, "example.org.", data)
+
+ if got != "1" {
+ t.Errorf("Expected value %s for %s, but got %s", "1", metricName, got)
+ }
}