aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Miek Gieben <miek@miek.nl> 2018-04-27 19:37:49 +0100
committerGravatar GitHub <noreply@github.com> 2018-04-27 19:37:49 +0100
commitbfc647d4edf4e6033e5fc6c7a9d4ddce1b3ddd76 (patch)
treef62a4ec1b8d4a9d7741a27b158ec301516ae2e79
parent85f549b529c91da1ab5bf133b0b4ddf429bbc022 (diff)
downloadcoredns-bfc647d4edf4e6033e5fc6c7a9d4ddce1b3ddd76.tar.gz
coredns-bfc647d4edf4e6033e5fc6c7a9d4ddce1b3ddd76.tar.zst
coredns-bfc647d4edf4e6033e5fc6c7a9d4ddce1b3ddd76.zip
Cache metrics server (#1746)
* plugin/cache: per server metrics Use per server metrics in the cache plugin as well. This required some plumbing changes. Also use request.Request more. * fix cherry-pick
-rw-r--r--plugin/cache/README.md12
-rw-r--r--plugin/cache/cache.go9
-rw-r--r--plugin/cache/cache_test.go13
-rw-r--r--plugin/cache/handler.go58
-rw-r--r--plugin/cache/setup.go11
5 files changed, 44 insertions, 59 deletions
diff --git a/plugin/cache/README.md b/plugin/cache/README.md
index 539b03ee6..3af79cb5f 100644
--- a/plugin/cache/README.md
+++ b/plugin/cache/README.md
@@ -62,13 +62,13 @@ Eviction is done per shard - i.e. when a shard reaches capacity, items are evict
If monitoring is enabled (via the *prometheus* directive) then the following metrics are exported:
-* `coredns_cache_size{type}` - Total elements in the cache by cache type.
-* `coredns_cache_capacity{type}` - Total capacity of the cache by cache type.
-* `coredns_cache_hits_total{type}` - Counter of cache hits by cache type.
-* `coredns_cache_misses_total{}` - Counter of cache misses.
-* `coredns_cache_drops_total{}` - Counter of dropped messages.
+* `coredns_cache_size{server, type}` - Total elements in the cache by cache type.
+* `coredns_cache_hits_total{server, type}` - Counter of cache hits by cache type.
+* `coredns_cache_misses_total{server}` - Counter of cache misses.
+* `coredns_cache_drops_total{server}` - Counter of dropped messages.
-Cache types are either "denial" or "success".
+Cache types are either "denial" or "success". `Server` is the server handling the request, see the
+metrics plugin for documentation.
## Examples
diff --git a/plugin/cache/cache.go b/plugin/cache/cache.go
index 12cbdb9ed..972c2b5e1 100644
--- a/plugin/cache/cache.go
+++ b/plugin/cache/cache.go
@@ -102,7 +102,8 @@ func hash(qname string, qtype uint16, do bool) uint32 {
type ResponseWriter struct {
dns.ResponseWriter
*Cache
- state request.Request
+ state request.Request
+ server string // Server handling the request.
prefetch bool // When true write nothing back to the client.
}
@@ -132,11 +133,11 @@ func (w *ResponseWriter) WriteMsg(res *dns.Msg) error {
if w.state.Match(res) {
w.set(res, key, mt, duration)
- cacheSize.WithLabelValues(Success).Set(float64(w.pcache.Len()))
- cacheSize.WithLabelValues(Denial).Set(float64(w.ncache.Len()))
+ cacheSize.WithLabelValues(w.server, Success).Set(float64(w.pcache.Len()))
+ cacheSize.WithLabelValues(w.server, Denial).Set(float64(w.ncache.Len()))
} else {
// Don't log it, but increment counter
- cacheDrops.Inc()
+ cacheDrops.WithLabelValues(w.server).Inc()
}
}
diff --git a/plugin/cache/cache_test.go b/plugin/cache/cache_test.go
index 131803ea7..812dd2b36 100644
--- a/plugin/cache/cache_test.go
+++ b/plugin/cache/cache_test.go
@@ -8,6 +8,7 @@ import (
"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/pkg/response"
"github.com/coredns/coredns/plugin/test"
+ "github.com/coredns/coredns/request"
"github.com/miekg/dns"
)
@@ -162,21 +163,19 @@ func TestCache(t *testing.T) {
for _, tc := range cacheTestCases {
m := tc.in.Msg()
m = cacheMsg(m, tc)
- do := tc.in.Do
+
+ state := request.Request{W: nil, Req: m}
mt, _ := response.Typify(m, utc)
- k := key(m, mt, do)
+ k := key(m, mt, state.Do())
crr.set(m, k, mt, c.pttl)
- name := plugin.Name(m.Question[0].Name).Normalize()
- qtype := m.Question[0].Qtype
-
- i, _ := c.get(time.Now().UTC(), name, qtype, do)
+ i, _ := c.get(time.Now().UTC(), state, "dns://:53")
ok := i != nil
if ok != tc.shouldCache {
- t.Errorf("cached message that should not have been cached: %s", name)
+ t.Errorf("cached message that should not have been cached: %s", state.Name())
continue
}
diff --git a/plugin/cache/handler.go b/plugin/cache/handler.go
index d6ed0dcad..598640568 100644
--- a/plugin/cache/handler.go
+++ b/plugin/cache/handler.go
@@ -7,6 +7,7 @@ import (
"time"
"github.com/coredns/coredns/plugin"
+ "github.com/coredns/coredns/plugin/metrics"
"github.com/coredns/coredns/request"
"github.com/miekg/dns"
@@ -17,18 +18,16 @@ import (
func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
state := request.Request{W: w, Req: r}
- qname := state.Name()
- qtype := state.QType()
- zone := plugin.Zones(c.Zones).Matches(qname)
+ zone := plugin.Zones(c.Zones).Matches(state.Name())
if zone == "" {
return plugin.NextOrFailure(c.Name(), c.Next, ctx, w, r)
}
- do := state.Do() // TODO(): might need more from OPT record? Like the actual bufsize?
-
now := c.now().UTC()
- i, ttl := c.get(now, qname, qtype, do)
+ server := metrics.WithServer(ctx)
+
+ i, ttl := c.get(now, state, server)
if i != nil && ttl > 0 {
resp := i.toMsg(r, now)
@@ -42,14 +41,16 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
threshold := int(math.Ceil(float64(c.percentage) / 100 * float64(i.origTTL)))
if i.Freq.Hits() >= c.prefetch && ttl <= threshold {
go func() {
- cachePrefetches.Inc()
+ cachePrefetches.WithLabelValues(server).Inc()
// When prefetching we loose the item i, and with it the frequency
// that we've gathered sofar. See we copy the frequencies info back
// into the new item that was stored in the cache.
- prr := &ResponseWriter{ResponseWriter: w, Cache: c, prefetch: true, state: state}
+ prr := &ResponseWriter{ResponseWriter: w, Cache: c,
+ prefetch: true, state: state,
+ server: server}
plugin.NextOrFailure(c.Name(), c.Next, ctx, prr, r)
- if i1 := c.exists(qname, qtype, do); i1 != nil {
+ if i1 := c.exists(state); i1 != nil {
i1.Freq.Reset(now, i.Freq.Hits())
}
}()
@@ -58,31 +59,31 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
return dns.RcodeSuccess, nil
}
- crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state}
+ crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server}
return plugin.NextOrFailure(c.Name(), c.Next, ctx, crr, r)
}
// Name implements the Handler interface.
func (c *Cache) Name() string { return "cache" }
-func (c *Cache) get(now time.Time, qname string, qtype uint16, do bool) (*item, int) {
- k := hash(qname, qtype, do)
+func (c *Cache) get(now time.Time, state request.Request, server string) (*item, int) {
+ k := hash(state.Name(), state.QType(), state.Do())
if i, ok := c.ncache.Get(k); ok {
- cacheHits.WithLabelValues(Denial).Inc()
+ cacheHits.WithLabelValues(server, Denial).Inc()
return i.(*item), i.(*item).ttl(now)
}
if i, ok := c.pcache.Get(k); ok {
- cacheHits.WithLabelValues(Success).Inc()
+ cacheHits.WithLabelValues(server, Success).Inc()
return i.(*item), i.(*item).ttl(now)
}
- cacheMisses.Inc()
+ cacheMisses.WithLabelValues(server).Inc()
return nil, 0
}
-func (c *Cache) exists(qname string, qtype uint16, do bool) *item {
- k := hash(qname, qtype, do)
+func (c *Cache) exists(state request.Request) *item {
+ k := hash(state.Name(), state.QType(), state.Do())
if i, ok := c.ncache.Get(k); ok {
return i.(*item)
}
@@ -98,42 +99,35 @@ var (
Subsystem: "cache",
Name: "size",
Help: "The number of elements in the cache.",
- }, []string{"type"})
-
- cacheCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
- Namespace: plugin.Namespace,
- Subsystem: "cache",
- Name: "capacity",
- Help: "The cache's capacity.",
- }, []string{"type"})
+ }, []string{"server", "type"})
cacheHits = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "hits_total",
Help: "The count of cache hits.",
- }, []string{"type"})
+ }, []string{"server", "type"})
- cacheMisses = prometheus.NewCounter(prometheus.CounterOpts{
+ cacheMisses = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "misses_total",
Help: "The count of cache misses.",
- })
+ }, []string{"server"})
- cachePrefetches = prometheus.NewCounter(prometheus.CounterOpts{
+ cachePrefetches = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "prefetch_total",
Help: "The number of time the cache has prefetched a cached item.",
- })
+ }, []string{"server"})
- cacheDrops = prometheus.NewCounter(prometheus.CounterOpts{
+ cacheDrops = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: plugin.Namespace,
Subsystem: "cache",
Name: "drops_total",
Help: "The number responses that are not cached, because the reply is malformed.",
- })
+ }, []string{"server"})
)
var once sync.Once
diff --git a/plugin/cache/setup.go b/plugin/cache/setup.go
index 57233bd66..17a588885 100644
--- a/plugin/cache/setup.go
+++ b/plugin/cache/setup.go
@@ -36,21 +36,12 @@ func setup(c *caddy.Controller) error {
c.OnStartup(func() error {
once.Do(func() {
metrics.MustRegister(c,
- cacheSize, cacheCapacity,
- cacheHits, cacheMisses,
+ cacheSize, cacheHits, cacheMisses,
cachePrefetches, cacheDrops)
})
return nil
})
- // Initialize all counters and gauges.
- cacheSize.WithLabelValues(Success)
- cacheSize.WithLabelValues(Denial)
- cacheCapacity.WithLabelValues(Success).Set(float64(ca.pcap))
- cacheCapacity.WithLabelValues(Denial).Set(float64(ca.ncap))
- cacheHits.WithLabelValues(Success)
- cacheHits.WithLabelValues(Denial)
-
return nil
}