aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Miek Gieben <miek@miek.nl> 2016-04-29 07:28:35 +0100
committerGravatar Miek Gieben <miek@miek.nl> 2016-04-29 07:28:35 +0100
commit9e9d72655d865f2d136ad3b0439735cde53b679f (patch)
treef0c365ffb5fbe9599374ab2551463d610f79ebd6
parenta1478f891dd6849835973720df6b5651a8ea88fd (diff)
downloadcoredns-9e9d72655d865f2d136ad3b0439735cde53b679f.tar.gz
coredns-9e9d72655d865f2d136ad3b0439735cde53b679f.tar.zst
coredns-9e9d72655d865f2d136ad3b0439735cde53b679f.zip
Make middleware survive a restart (#142)
Make middleware that sets up a (http) handler survive a graceful restart. We calls the middleware's Shutdown function(s). If restart fails the Start function is called again. * middleware/health: OK * middleware/pprof: OK * middleware/metrics: OK All restart OK.
-rw-r--r--core/restart.go6
-rw-r--r--core/setup/health.go5
-rw-r--r--core/setup/metrics.go9
-rw-r--r--core/setup/pprof.go1
-rw-r--r--core/sigtrap.go24
-rw-r--r--middleware/health/health.go28
-rw-r--r--middleware/metrics/metrics.go33
-rw-r--r--middleware/pprof/README.md2
-rw-r--r--middleware/pprof/pprof.go37
-rw-r--r--server/server.go41
10 files changed, 132 insertions, 54 deletions
diff --git a/core/restart.go b/core/restart.go
index 1c053c13c..aa77e152d 100644
--- a/core/restart.go
+++ b/core/restart.go
@@ -117,6 +117,9 @@ func Restart(newCorefile Input) error {
}
wpipe.Close()
+ // Run all shutdown functions for the middleware, if child start fails, restart them all...
+ executeShutdownCallbacks("SIGUSR1")
+
// Determine whether child startup succeeded
answer, readErr := ioutil.ReadAll(sigrpipe)
if answer == nil || len(answer) == 0 {
@@ -125,6 +128,9 @@ func Restart(newCorefile Input) error {
if readErr != nil {
log.Printf("[ERROR] Restart: additionally, error communicating with child process: %v", readErr)
}
+ // re-call all startup functions.
+ // TODO(miek): this needs to be tested, somehow.
+ executeStartupCallbacks("SIGUSR1")
return errIncompleteRestart
}
diff --git a/core/setup/health.go b/core/setup/health.go
index 608147b13..542cb3260 100644
--- a/core/setup/health.go
+++ b/core/setup/health.go
@@ -11,8 +11,9 @@ func Health(c *Controller) (middleware.Middleware, error) {
return nil, err
}
- h := health.Health{Addr: addr}
- c.Startup = append(c.Startup, h.ListenAndServe)
+ h := &health.Health{Addr: addr}
+ c.Startup = append(c.Startup, h.Start)
+ c.Shutdown = append(c.Shutdown, h.Shutdown)
return nil, nil
}
diff --git a/core/setup/metrics.go b/core/setup/metrics.go
index e54f5b011..e88d93c86 100644
--- a/core/setup/metrics.go
+++ b/core/setup/metrics.go
@@ -12,18 +12,19 @@ const addr = "localhost:9153"
var metricsOnce sync.Once
func Prometheus(c *Controller) (middleware.Middleware, error) {
- met, err := parsePrometheus(c)
+ m, err := parsePrometheus(c)
if err != nil {
return nil, err
}
metricsOnce.Do(func() {
- c.Startup = append(c.Startup, met.Start)
+ c.Startup = append(c.Startup, m.Start)
+ c.Shutdown = append(c.Shutdown, m.Shutdown)
})
return func(next middleware.Handler) middleware.Handler {
- met.Next = next
- return met
+ m.Next = next
+ return m
}, nil
}
diff --git a/core/setup/pprof.go b/core/setup/pprof.go
index e202bfc00..cf6b49636 100644
--- a/core/setup/pprof.go
+++ b/core/setup/pprof.go
@@ -27,6 +27,7 @@ func PProf(c *Controller) (middleware.Middleware, error) {
handler := &pprof.Handler{}
pprofOnce.Do(func() {
c.Startup = append(c.Startup, handler.Start)
+ c.Shutdown = append(c.Shutdown, handler.Shutdown)
})
return func(next middleware.Handler) middleware.Handler {
diff --git a/core/sigtrap.go b/core/sigtrap.go
index 3b74efb02..f40dd971a 100644
--- a/core/sigtrap.go
+++ b/core/sigtrap.go
@@ -68,4 +68,26 @@ func executeShutdownCallbacks(signame string) (exitCode int) {
return
}
-var shutdownCallbacksOnce sync.Once
+// executeStartupCallbacks executes the startup callbacks as initiated
+// by signame. This is used when on restart when the child failed to start and
+// all middleware executed their shutdown functions
+func executeStartupCallbacks(signame string) (exitCode int) {
+ startupCallbacksOnce.Do(func() {
+ serversMu.Lock()
+ errs := server.StartupCallbacks(servers)
+ serversMu.Unlock()
+
+ if len(errs) > 0 {
+ for _, err := range errs {
+ log.Printf("[ERROR] %s shutdown: %v", signame, err)
+ }
+ exitCode = 1
+ }
+ })
+ return
+}
+
+var (
+ shutdownCallbacksOnce sync.Once
+ startupCallbacksOnce sync.Once
+)
diff --git a/middleware/health/health.go b/middleware/health/health.go
index 8c1665e57..035c9ca7a 100644
--- a/middleware/health/health.go
+++ b/middleware/health/health.go
@@ -3,6 +3,7 @@ package health
import (
"io"
"log"
+ "net"
"net/http"
"sync"
)
@@ -11,28 +12,45 @@ var once sync.Once
type Health struct {
Addr string
+ ln net.Listener
+ mux *http.ServeMux
}
func health(w http.ResponseWriter, r *http.Request) {
io.WriteString(w, ok)
}
-func (h Health) ListenAndServe() error {
+func (h *Health) Start() error {
if h.Addr == "" {
h.Addr = defAddr
}
+
once.Do(func() {
- http.HandleFunc("/health", health)
+ if ln, err := net.Listen("tcp", h.Addr); err != nil {
+ log.Printf("[ERROR] Failed to start health handler: %s", err)
+ return
+ } else {
+ h.ln = ln
+ }
+ h.mux = http.NewServeMux()
+
+ h.mux.HandleFunc(path, health)
go func() {
- if err := http.ListenAndServe(h.Addr, nil); err != nil {
- log.Printf("[ERROR] Failed to start health handler: %s", err)
- }
+ http.Serve(h.ln, h.mux)
}()
})
return nil
}
+func (h *Health) Shutdown() error {
+ if h.ln != nil {
+ return h.ln.Close()
+ }
+ return nil
+}
+
const (
ok = "OK"
defAddr = ":8080"
+ path = "/health"
)
diff --git a/middleware/metrics/metrics.go b/middleware/metrics/metrics.go
index b525042f7..42f1439eb 100644
--- a/middleware/metrics/metrics.go
+++ b/middleware/metrics/metrics.go
@@ -2,6 +2,7 @@ package metrics
import (
"log"
+ "net"
"net/http"
"sync"
@@ -17,12 +18,12 @@ var (
responseRcode *prometheus.CounterVec
)
-const path = "/metrics"
-
// Metrics holds the prometheus configuration. The metrics' path is fixed to be /metrics
type Metrics struct {
Next middleware.Handler
- Addr string // where to we listen
+ Addr string
+ ln net.Listener
+ mux *http.ServeMux
Once sync.Once
ZoneNames []string
}
@@ -31,21 +32,35 @@ func (m *Metrics) Start() error {
m.Once.Do(func() {
define()
+ if ln, err := net.Listen("tcp", m.Addr); err != nil {
+ log.Printf("[ERROR] Failed to start metrics handler: %s", err)
+ return
+ } else {
+ m.ln = ln
+ }
+ m.mux = http.NewServeMux()
+
prometheus.MustRegister(requestCount)
prometheus.MustRegister(requestDuration)
prometheus.MustRegister(responseSize)
prometheus.MustRegister(responseRcode)
- http.Handle(path, prometheus.Handler())
+ m.mux.Handle(path, prometheus.Handler())
+
go func() {
- if err := http.ListenAndServe(m.Addr, nil); err != nil {
- log.Printf("[ERROR] Failed to start prometheus handler: %s", err)
- }
+ http.Serve(m.ln, m.mux)
}()
})
return nil
}
+func (m *Metrics) Shutdown() error {
+ if m.ln != nil {
+ return m.ln.Close()
+ }
+ return nil
+}
+
func define() {
requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: middleware.Namespace,
@@ -80,7 +95,7 @@ func define() {
const (
// Dropped indicates we dropped the query before any handling. It has no closing dot, so it can not be a valid zone.
- Dropped = "dropped"
-
+ Dropped = "dropped"
subsystem = "dns"
+ path = "/metrics"
)
diff --git a/middleware/pprof/README.md b/middleware/pprof/README.md
index 7798f5a63..c62605520 100644
--- a/middleware/pprof/README.md
+++ b/middleware/pprof/README.md
@@ -1,7 +1,7 @@
# pprof
pprof publishes runtime profiling data at endpoints under /debug/pprof. You can visit /debug/pprof
-on your site for an index of the available endpoints. By default it will listen on localhost:8053.
+on your site for an index of the available endpoints. By default it will listen on localhost:6053.
> This is a debugging tool. Certain requests (such as collecting execution traces) can be slow. If
> you use pprof on a live site, consider restricting access or enabling it only temporarily.
diff --git a/middleware/pprof/pprof.go b/middleware/pprof/pprof.go
index 677690ff8..2c812f246 100644
--- a/middleware/pprof/pprof.go
+++ b/middleware/pprof/pprof.go
@@ -2,8 +2,9 @@ package pprof
import (
"log"
+ "net"
"net/http"
- _ "net/http/pprof"
+ pp "net/http/pprof"
"github.com/miekg/coredns/middleware"
@@ -11,10 +12,10 @@ import (
"golang.org/x/net/context"
)
-const addr = "localhost:8053"
-
type Handler struct {
Next middleware.Handler
+ ln net.Listener
+ mux *http.ServeMux
}
// ServeDNS passes all other requests up the chain.
@@ -23,10 +24,34 @@ func (h *Handler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
}
func (h *Handler) Start() error {
+ if ln, err := net.Listen("tcp", addr); err != nil {
+ log.Printf("[ERROR] Failed to start pprof handler: %s", err)
+ return err
+ } else {
+ h.ln = ln
+ }
+
+ h.mux = http.NewServeMux()
+ h.mux.HandleFunc(path+"/", pp.Index)
+ h.mux.HandleFunc(path+"/cmdline", pp.Cmdline)
+ h.mux.HandleFunc(path+"/profile", pp.Profile)
+ h.mux.HandleFunc(path+"/symbol", pp.Symbol)
+ h.mux.HandleFunc(path+"/trace", pp.Trace)
+
go func() {
- if err := http.ListenAndServe(addr, nil); err != nil {
- log.Printf("[ERROR] Failed to start pprof handler: %s", err)
- }
+ http.Serve(h.ln, h.mux)
}()
return nil
}
+
+func (h *Handler) Shutdown() error {
+ if h.ln != nil {
+ return h.ln.Close()
+ }
+ return nil
+}
+
+const (
+ addr = "localhost:6053"
+ path = "/debug/pprof"
+)
diff --git a/server/server.go b/server/server.go
index 068a54a31..af5dd35e1 100644
--- a/server/server.go
+++ b/server/server.go
@@ -445,32 +445,6 @@ func (s *Server) RunFirstStartupFuncs() error {
return nil
}
-// tcpKeepAliveListener sets TCP keep-alive timeouts on accepted
-// connections. It's used by ListenAndServe and ListenAndServeTLS so
-// dead TCP connections (e.g. closing laptop mid-download) eventually
-// go away.
-//
-// Borrowed from the Go standard library.
-type tcpKeepAliveListener struct {
- *net.TCPListener
-}
-
-// Accept accepts the connection with a keep-alive enabled.
-func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) {
- tc, err := ln.AcceptTCP()
- if err != nil {
- return
- }
- tc.SetKeepAlive(true)
- tc.SetKeepAlivePeriod(3 * time.Minute)
- return tc, nil
-}
-
-// File implements ListenerFile; returns the underlying file of the listener.
-func (ln tcpKeepAliveListener) File() (*os.File, error) {
- return ln.TCPListener.File()
-}
-
// ShutdownCallbacks executes all the shutdown callbacks
// for all the virtualhosts in servers, and returns all the
// errors generated during their execution. In other words,
@@ -493,6 +467,21 @@ func ShutdownCallbacks(servers []*Server) []error {
return errs
}
+func StartupCallbacks(servers []*Server) []error {
+ var errs []error
+ for _, s := range servers {
+ for _, zone := range s.zones {
+ for _, startupFunc := range zone.config.Startup {
+ err := startupFunc()
+ if err != nil {
+ errs = append(errs, err)
+ }
+ }
+ }
+ }
+ return errs
+}
+
func RcodeNoClientWrite(rcode int) bool {
switch rcode {
case dns.RcodeServerFailure: