mirror of
https://github.com/slackhq/nebula.git
synced 2026-05-10 22:22:27 -07:00
383 lines
9.7 KiB
Go
383 lines
9.7 KiB
Go
package nebula
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"net"
|
|
"net/netip"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"github.com/gaissmai/bart"
|
|
"github.com/miekg/dns"
|
|
"github.com/slackhq/nebula/config"
|
|
)
|
|
|
|
type dnsServer struct {
|
|
sync.RWMutex
|
|
l *slog.Logger
|
|
ctx context.Context
|
|
dnsMap4 map[string]netip.Addr
|
|
dnsMap6 map[string]netip.Addr
|
|
hostMap *HostMap
|
|
myVpnAddrsTable *bart.Lite
|
|
|
|
mux *dns.ServeMux
|
|
|
|
// enabled mirrors `lighthouse.serve_dns && lighthouse.am_lighthouse`.
|
|
// Start, Add, and reload consult it so callers don't need to know the
|
|
// gating rules. When it toggles off via reload, accumulated records are
|
|
// cleared so a later re-enable starts with a fresh map populated from
|
|
// new handshakes.
|
|
enabled atomic.Bool
|
|
|
|
serverMu sync.Mutex
|
|
server *dns.Server
|
|
// started is closed once `server` has finished binding (or after
|
|
// ListenAndServe returns on a bind failure). Stop waits on it before
|
|
// calling Shutdown to avoid the miekg/dns "server not started" race
|
|
// where a Shutdown that arrives before bind completes is silently
|
|
// ignored, leaving the listener running forever.
|
|
started chan struct{}
|
|
addr string
|
|
}
|
|
|
|
// newDnsServerFromConfig builds a dnsServer, applies the initial config, and
|
|
// registers a reload callback. The reload callback is registered before the
|
|
// initial config is applied, so a SIGHUP can later enable, fix, or disable
|
|
// DNS even if the initial application failed.
|
|
//
|
|
// The dnsServer internally gates on `lighthouse.serve_dns &&
|
|
// lighthouse.am_lighthouse`. Start and Add are safe to call unconditionally,
|
|
// they no-op when DNS isn't enabled. Each Start invocation owns a ctx-cancel
|
|
// watcher that tears the listener down on nebula shutdown. The returned
|
|
// pointer is always non-nil, even on error.
|
|
func newDnsServerFromConfig(ctx context.Context, l *slog.Logger, cs *CertState, hostMap *HostMap, c *config.C) (*dnsServer, error) {
|
|
ds := &dnsServer{
|
|
l: l,
|
|
ctx: ctx,
|
|
dnsMap4: make(map[string]netip.Addr),
|
|
dnsMap6: make(map[string]netip.Addr),
|
|
hostMap: hostMap,
|
|
myVpnAddrsTable: cs.myVpnAddrsTable,
|
|
}
|
|
ds.mux = dns.NewServeMux()
|
|
ds.mux.HandleFunc(".", ds.handleDnsRequest)
|
|
|
|
c.RegisterReloadCallback(func(c *config.C) {
|
|
if err := ds.reload(c, false); err != nil {
|
|
ds.l.Error("Failed to reload DNS responder from config", "error", err)
|
|
}
|
|
})
|
|
|
|
if err := ds.reload(c, true); err != nil {
|
|
return ds, err
|
|
}
|
|
return ds, nil
|
|
}
|
|
|
|
// reload applies the latest config and reconciles the running state with it:
|
|
// - enabled toggled on -> spawn a runner
|
|
// - enabled toggled off -> stop the runner
|
|
// - listen address changed (while running) -> restart on the new address
|
|
// - everything else -> no-op
|
|
//
|
|
// On the initial call it only records configuration; Control.Start is what
|
|
// launches the first runner via dnsStart.
|
|
func (d *dnsServer) reload(c *config.C, initial bool) error {
|
|
wantsDns := c.GetBool("lighthouse.serve_dns", false)
|
|
amLighthouse := c.GetBool("lighthouse.am_lighthouse", false)
|
|
enabled := wantsDns && amLighthouse
|
|
newAddr := getDnsServerAddr(c)
|
|
|
|
d.serverMu.Lock()
|
|
running := d.server
|
|
runningStarted := d.started
|
|
sameAddr := d.addr == newAddr
|
|
d.addr = newAddr
|
|
d.enabled.Store(enabled)
|
|
d.serverMu.Unlock()
|
|
|
|
if initial {
|
|
if wantsDns && !amLighthouse {
|
|
d.l.Warn("DNS server refusing to run because this host is not a lighthouse.")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
if !enabled {
|
|
if running != nil {
|
|
d.Stop()
|
|
}
|
|
// Drop any records that accumulated while enabled; a later re-enable
|
|
// will repopulate from fresh handshakes.
|
|
d.clearRecords()
|
|
return nil
|
|
}
|
|
|
|
if running == nil {
|
|
// Was disabled (or never started); bring it up now.
|
|
go d.Start()
|
|
return nil
|
|
}
|
|
|
|
if sameAddr {
|
|
return nil
|
|
}
|
|
|
|
d.shutdownServer(running, runningStarted, "reload")
|
|
// Old Start goroutine has now exited; bring up a fresh listener on the
|
|
// new address.
|
|
go d.Start()
|
|
return nil
|
|
}
|
|
|
|
// shutdownServer waits for the server to finish binding (so Shutdown actually
|
|
// stops it rather than no-oping) and then shuts it down.
|
|
func (d *dnsServer) shutdownServer(srv *dns.Server, started chan struct{}, reason string) {
|
|
if srv == nil {
|
|
return
|
|
}
|
|
if started != nil {
|
|
<-started
|
|
}
|
|
if err := srv.Shutdown(); err != nil {
|
|
d.l.Warn("Failed to shut down the DNS responder", "reason", reason, "error", err)
|
|
}
|
|
}
|
|
|
|
// Start binds and serves the DNS responder. Blocks until Stop is called or
|
|
// the listener errors. Safe to call when DNS is disabled (returns
|
|
// immediately). This is what Control.dnsStart points at.
|
|
//
|
|
// Must be invoked after the tun device is active so that lighthouse.dns.host
|
|
// may bind to a nebula IP.
|
|
func (d *dnsServer) Start() {
|
|
if !d.enabled.Load() {
|
|
return
|
|
}
|
|
|
|
started := make(chan struct{})
|
|
d.serverMu.Lock()
|
|
if d.ctx.Err() != nil {
|
|
d.serverMu.Unlock()
|
|
return
|
|
}
|
|
addr := d.addr
|
|
server := &dns.Server{
|
|
Addr: addr,
|
|
Net: "udp",
|
|
Handler: d.mux,
|
|
NotifyStartedFunc: func() { close(started) },
|
|
}
|
|
d.server = server
|
|
d.started = started
|
|
d.serverMu.Unlock()
|
|
|
|
// Per-invocation ctx watcher. Exits when Start does, so we don't leak a
|
|
// watcher per reload-driven restart.
|
|
done := make(chan struct{})
|
|
go func() {
|
|
select {
|
|
case <-d.ctx.Done():
|
|
d.shutdownServer(server, started, "shutdown")
|
|
case <-done:
|
|
}
|
|
}()
|
|
|
|
d.l.Info("Starting DNS responder", "dnsListener", addr)
|
|
err := server.ListenAndServe()
|
|
close(done)
|
|
|
|
// If the listener never bound (bind error) NotifyStartedFunc never fires,
|
|
// so close started here to release any Stop caller waiting on it.
|
|
select {
|
|
case <-started:
|
|
default:
|
|
close(started)
|
|
}
|
|
|
|
if err != nil {
|
|
d.l.Warn("Failed to run the DNS responder", "error", err)
|
|
}
|
|
}
|
|
|
|
// Stop shuts down the active server, if any. Idempotent.
|
|
func (d *dnsServer) Stop() {
|
|
d.serverMu.Lock()
|
|
srv := d.server
|
|
started := d.started
|
|
d.server = nil
|
|
d.started = nil
|
|
d.serverMu.Unlock()
|
|
d.shutdownServer(srv, started, "stop")
|
|
}
|
|
|
|
// Query returns the address for the given name and query type. The second
|
|
// return value reports whether the name is known at all (in either A or AAAA),
|
|
// which lets callers distinguish NODATA from NXDOMAIN.
|
|
func (d *dnsServer) Query(q uint16, data string) (netip.Addr, bool) {
|
|
data = strings.ToLower(data)
|
|
d.RLock()
|
|
defer d.RUnlock()
|
|
addr4, haveV4 := d.dnsMap4[data]
|
|
addr6, haveV6 := d.dnsMap6[data]
|
|
nameExists := haveV4 || haveV6
|
|
switch q {
|
|
case dns.TypeA:
|
|
if haveV4 {
|
|
return addr4, nameExists
|
|
}
|
|
case dns.TypeAAAA:
|
|
if haveV6 {
|
|
return addr6, nameExists
|
|
}
|
|
}
|
|
|
|
return netip.Addr{}, nameExists
|
|
}
|
|
|
|
func (d *dnsServer) QueryCert(data string) string {
|
|
if len(data) < 2 {
|
|
return ""
|
|
}
|
|
ip, err := netip.ParseAddr(data[:len(data)-1])
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
hostinfo := d.hostMap.QueryVpnAddr(ip)
|
|
if hostinfo == nil {
|
|
return ""
|
|
}
|
|
|
|
q := hostinfo.GetCert()
|
|
if q == nil {
|
|
return ""
|
|
}
|
|
|
|
b, err := q.Certificate.MarshalJSON()
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
// clearRecords drops all DNS records.
|
|
func (d *dnsServer) clearRecords() {
|
|
d.Lock()
|
|
defer d.Unlock()
|
|
clear(d.dnsMap4)
|
|
clear(d.dnsMap6)
|
|
}
|
|
|
|
// Add adds the first IPv4 and IPv6 address that appears in `addresses` as the record for `host`
|
|
func (d *dnsServer) Add(host string, addresses []netip.Addr) {
|
|
if !d.enabled.Load() {
|
|
return
|
|
}
|
|
host = strings.ToLower(host)
|
|
d.Lock()
|
|
defer d.Unlock()
|
|
haveV4 := false
|
|
haveV6 := false
|
|
for _, addr := range addresses {
|
|
if addr.Is4() && !haveV4 {
|
|
d.dnsMap4[host] = addr
|
|
haveV4 = true
|
|
} else if addr.Is6() && !haveV6 {
|
|
d.dnsMap6[host] = addr
|
|
haveV6 = true
|
|
}
|
|
if haveV4 && haveV6 {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *dnsServer) isSelfNebulaOrLocalhost(addr string) bool {
|
|
a, _, _ := net.SplitHostPort(addr)
|
|
b, err := netip.ParseAddr(a)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
|
|
if b.IsLoopback() {
|
|
return true
|
|
}
|
|
|
|
//if we found it in this table, it's good
|
|
return d.myVpnAddrsTable.Contains(b)
|
|
}
|
|
|
|
func (d *dnsServer) parseQuery(m *dns.Msg, w dns.ResponseWriter) {
|
|
debugEnabled := d.l.Enabled(context.Background(), slog.LevelDebug)
|
|
// Per RFC 2308 §2.2, a name that exists but has no record of the requested
|
|
// type must be answered with NOERROR and an empty answer section (NODATA),
|
|
// not NXDOMAIN (RFC 2308 §2.1), which is reserved for names that do not
|
|
// exist at all.
|
|
anyNameExists := false
|
|
for _, q := range m.Question {
|
|
switch q.Qtype {
|
|
case dns.TypeA, dns.TypeAAAA:
|
|
qType := dns.TypeToString[q.Qtype]
|
|
if debugEnabled {
|
|
d.l.Debug("DNS query", "type", qType, "name", q.Name)
|
|
}
|
|
ip, nameExists := d.Query(q.Qtype, q.Name)
|
|
if nameExists {
|
|
anyNameExists = true
|
|
}
|
|
if ip.IsValid() {
|
|
rr, err := dns.NewRR(fmt.Sprintf("%s %s %s", q.Name, qType, ip))
|
|
if err == nil {
|
|
m.Answer = append(m.Answer, rr)
|
|
}
|
|
}
|
|
case dns.TypeTXT:
|
|
// We only answer these queries from nebula nodes or localhost
|
|
if !d.isSelfNebulaOrLocalhost(w.RemoteAddr().String()) {
|
|
return
|
|
}
|
|
if debugEnabled {
|
|
d.l.Debug("DNS query", "type", "TXT", "name", q.Name)
|
|
}
|
|
ip := d.QueryCert(q.Name)
|
|
if ip != "" {
|
|
rr, err := dns.NewRR(fmt.Sprintf("%s TXT %s", q.Name, ip))
|
|
if err == nil {
|
|
m.Answer = append(m.Answer, rr)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(m.Answer) == 0 && !anyNameExists {
|
|
m.Rcode = dns.RcodeNameError
|
|
}
|
|
}
|
|
|
|
func (d *dnsServer) handleDnsRequest(w dns.ResponseWriter, r *dns.Msg) {
|
|
m := new(dns.Msg)
|
|
m.SetReply(r)
|
|
m.Compress = false
|
|
|
|
switch r.Opcode {
|
|
case dns.OpcodeQuery:
|
|
d.parseQuery(m, w)
|
|
}
|
|
|
|
w.WriteMsg(m)
|
|
}
|
|
|
|
func getDnsServerAddr(c *config.C) string {
|
|
dnsHost := strings.TrimSpace(c.GetString("lighthouse.dns.host", ""))
|
|
// Old guidance was to provide the literal `[::]` in `lighthouse.dns.host` but that won't resolve.
|
|
if dnsHost == "[::]" {
|
|
dnsHost = "::"
|
|
}
|
|
return net.JoinHostPort(dnsHost, strconv.Itoa(c.GetInt("lighthouse.dns.port", 53)))
|
|
}
|