322 lines
14 KiB
Racket
322 lines
14 KiB
Racket
#lang racket/base
|
|
;;
|
|
;;; Copyright 2010, 2011, 2012, 2013 Tony Garnock-Jones <tonyg@ccs.neu.edu>
|
|
;;;
|
|
;;; This file is part of marketplace-dns.
|
|
;;;
|
|
;;; marketplace-dns is free software: you can redistribute it and/or
|
|
;;; modify it under the terms of the GNU General Public License as
|
|
;;; published by the Free Software Foundation, either version 3 of the
|
|
;;; License, or (at your option) any later version.
|
|
;;;
|
|
;;; marketplace-dns is distributed in the hope that it will be useful,
|
|
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
;;; General Public License for more details.
|
|
;;;
|
|
;;; You should have received a copy of the GNU General Public License
|
|
;;; along with marketplace-dns. If not, see
|
|
;;; <http://www.gnu.org/licenses/>.
|
|
|
|
(require racket/set)
|
|
(require racket/match)
|
|
(require "api.rkt")
|
|
(require "codec.rkt")
|
|
(require "zonedb.rkt")
|
|
(require syndicate/actor)
|
|
(require syndicate/drivers/udp)
|
|
(require syndicate/drivers/timer)
|
|
(require "tk-dns.rkt")
|
|
|
|
(provide network-query
|
|
(struct-out network-reply))
|
|
|
|
;; DJB's rules for handling DNS responses. Some of these are handled
|
|
;; here (specifically, rules 2 through 5, in the action of
|
|
;; filter-dns-reply), some are handled in resolver.rkt (rule 1, in the
|
|
;; action of answer-from-zone), and some are handled in the
|
|
;; interaction between the resolver and the network-query modules
|
|
;; (rule 1 as well, the interplay between CNAME expansion and
|
|
;; recursion):
|
|
|
|
;; <blockquote>
|
|
;; When a cache receives a normal DNS response, it learns exactly one
|
|
;; of the following five pieces of information:
|
|
;;
|
|
;; 1. ``The query was not answered because the query name is an
|
|
;; alias. I need to change the query name and try again.'' This
|
|
;; applies if the answer section of the response contains a CNAME
|
|
;; record for the query name and CNAME does not match the query type.
|
|
;;
|
|
;; 2. ``The query name has no records answering the query, and is also
|
|
;; guaranteed to have no records of any other type.'' This applies if
|
|
;; the response code is NXDOMAIN and #1 doesn't apply. The amount of
|
|
;; time that this information can be cached depends on the contents of
|
|
;; the SOA record in the authority section of the response, if there
|
|
;; is one.
|
|
;;
|
|
;; 3. ``The query name has one or more records answering the query.''
|
|
;; This applies if the answer section of the response contains one or
|
|
;; more records under the query name matching the query type, and #1
|
|
;; doesn't apply, and #2 doesn't apply.
|
|
;;
|
|
;; 4. ``The query was not answered because the server does not have
|
|
;; the answer. I need to contact other servers.'' This applies if the
|
|
;; authority section of the response contains NS records, and the
|
|
;; authority section of the response does not contain SOA records, and
|
|
;; #1 doesn't apply, and #2 doesn't apply, and #3 doesn't apply. The
|
|
;; ``other servers'' are named in the NS records in the authority
|
|
;; section.
|
|
;;
|
|
;; 5. ``The query name has no records answering the query, but it may
|
|
;; have records of another type.'' This applies if #1 doesn't apply,
|
|
;; and #2 doesn't apply, and #3 doesn't apply, and #4 doesn't
|
|
;; apply. The amount of time that this information can be cached
|
|
;; depends on the contents of the SOA record in the authority section,
|
|
;; if there is one.
|
|
;;
|
|
;; This procedure requires an incredible amount of bug-prone parsing
|
|
;; for a very small amount of information. The underlying problem is
|
|
;; that DNS was designed to declare information in a human-oriented
|
|
;; format, rather than to support crucial operations in the simplest
|
|
;; possible way.
|
|
;; </blockquote>
|
|
|
|
;;---------------------------------------------------------------------------
|
|
|
|
;; DJB's djbdns logic for determining whether a response is a lame
|
|
;; referral or not is as follows (see his query.c in areas dealing
|
|
;; with the variable "flagreferral" and calls to the function
|
|
;; "log_lame"):
|
|
;;
|
|
;; If a response -
|
|
;;
|
|
;; 1. has response-code no-error (0), and
|
|
;; 2. has no CNAME records in the answer section for the domain we're
|
|
;; interested in, and
|
|
;; 3. has no records in the answer section for the domain and type
|
|
;; we're interested in, and
|
|
;; 4. has no SOA records in the authority section, and
|
|
;; 5. has at least one NS record in the authority section, and
|
|
;; 6. that NS record's name is equal to our bailiwick or is not in our
|
|
;; bailiwick,
|
|
;;
|
|
;; then it is a lame referral.
|
|
;;
|
|
;; Anything with non-zero response-code is clearly not a referral, so
|
|
;; that explains (1). If either of checks (2) and (3) fail then the
|
|
;; answer is a real, sensible answer to the question we posed. I'm not
|
|
;; 100% on why (4) is there; presumably it's to be conservative, and
|
|
;; not treat something possibly-valid as definitely-lame? Rules (5)
|
|
;; and (6) are the real heart of lameness, where a referral is given
|
|
;; to somewhere that can't be more authoritative than the responder
|
|
;; was supposed to be.
|
|
;;
|
|
;; We modify check (4) to ignore SOA records not in bailiwick, just
|
|
;; for consistency. It's correct to leave (5) and (6) alone because
|
|
;; it's incorrect for a server to refer us to anywhere at the same
|
|
;; level of the tree or further up the tree, but we do apply them to
|
|
;; every NS record rather than just the first, which is slightly
|
|
;; stricter than DJB's rule.
|
|
|
|
;;---------------------------------------------------------------------------
|
|
|
|
(define first-timeout 3) ;; seconds
|
|
|
|
;; A NetworkReply is a (network-reply UniqueID Maybe<CompleteAnswer>)
|
|
;; representing the final result of a network query.
|
|
(struct network-reply (unique-id answer) #:transparent)
|
|
|
|
;; (: next-timeout : Natural -> (Option Natural))
|
|
(define (next-timeout timeout)
|
|
(cond
|
|
[(equal? timeout 3) 11]
|
|
[(equal? timeout 11) 45]
|
|
[else #f]))
|
|
|
|
;; (: make-dns-query-message : Question Nonnegative-Integer -> DNSMessage)
|
|
(define (make-dns-query-message q query-id)
|
|
(dns-message query-id
|
|
'request
|
|
'query
|
|
'non-authoritative
|
|
'not-truncated
|
|
'no-recursion-desired
|
|
'no-recursion-available
|
|
'no-error
|
|
(list q)
|
|
'()
|
|
'()
|
|
'()))
|
|
|
|
;; (define-type CheckedAnswer (U (Option CompleteAnswer) 'bad-answer 'lame-delegation))
|
|
|
|
;; (: filter-dns-reply : Question DNSMessage DomainName -> CheckedAnswer)
|
|
;; Filters RRs from the answer, authorities, and additional sections
|
|
;; of the passed-in `message`, returning the set of RRs surviving the
|
|
;; filter. RRs are only accepted if their `rr-name` falls in the
|
|
;; bailiwick of the given `zone-origin`. All of this only happens if
|
|
;; the passed-in message's `dns-message-response-code` is `'no-error`:
|
|
;; if it's `'name-error`, then `#f` is returned, and if it's any other
|
|
;; code, `'bad-answer` is returned.
|
|
;;
|
|
;; In cases where a CompleteAnswer would otherwise be returned, if the
|
|
;; answer is in fact a lame delegation (see notes above), then
|
|
;; 'lame-delegation is returned instead.
|
|
(define (filter-dns-reply q message zone-origin)
|
|
(case (dns-message-response-code message)
|
|
[(no-error)
|
|
;; (: f : (Listof RR) -> (Setof RR))
|
|
(define (f l)
|
|
(list->set (filter (lambda (claim-rr) (in-bailiwick? (rr-name claim-rr) zone-origin)) l)))
|
|
;; Here's where we do the "lame referral" check. This code is
|
|
;; nice and simple (though wrong) without it. Ho hum.
|
|
(define answers (f (dns-message-answers message)))
|
|
(define unfiltered-authorities (dns-message-authorities message))
|
|
(define non-subzone-ns-rrs ;; all NS authorities not for a STRICT subzone of our zone-origin
|
|
(filter (lambda (rr) (and (eqv? (rdata-type (rr-rdata rr)) 'ns)
|
|
(or (equal? (rr-name rr) zone-origin)
|
|
(not (in-bailiwick? (rr-name rr) zone-origin)))))
|
|
unfiltered-authorities))
|
|
(define authorities (f unfiltered-authorities))
|
|
(define answers-to-q ;; answers specifically to the question we asked
|
|
(set-filter (lambda (rr) (equal? (rr-name rr) (question-name q))) answers))
|
|
(define lame?
|
|
(and (set-empty? (filter-by-type answers-to-q 'cname))
|
|
(set-empty? (filter-rrs answers-to-q (question-type q) (question-class q)))
|
|
(set-empty? (filter-by-type authorities 'soa))
|
|
(not (null? non-subzone-ns-rrs))))
|
|
(if lame?
|
|
'lame-delegation
|
|
(complete-answer answers
|
|
authorities
|
|
(f (dns-message-additional message))))]
|
|
[(name-error) #f]
|
|
[else
|
|
(log-info "Abnormal response-code ~v in response to questions ~v"
|
|
(dns-message-response-code message)
|
|
(dns-message-questions message))
|
|
'bad-answer]))
|
|
|
|
;; (: ip->host-name : IPv4 -> String)
|
|
(define (ip->host-name ip-address)
|
|
(match-define (vector a b c d) ip-address)
|
|
(format "~a.~a.~a.~a" a b c d))
|
|
|
|
;; (: make-dns-address : IPv4 -> UdpAddress)
|
|
(define (make-dns-address ip-address)
|
|
(udp-remote-address (ip->host-name ip-address) 53))
|
|
|
|
;; (: network-query : (All (ParentState)
|
|
;; UdpAddress Question DomainName (Listof DomainName) Any ->
|
|
;; Void))
|
|
(define (network-query s q zone-origin server-names unique-id)
|
|
(spawn*
|
|
#:name (list 'network-query q)
|
|
(field [timeout first-timeout]
|
|
[known-addresses #hash()] ;; Hash DomainName (Listof UdpAddress)
|
|
[remaining-addresses '()] ;; Listof UdpAddress
|
|
[current-name #f] ;; Option DomainName
|
|
[remaining-names server-names]) ;; Listof DomainName
|
|
|
|
(define (on-answer ans server-ip)
|
|
(match ans
|
|
['bad-answer ;; can come from filter-dns-reply
|
|
(try-next-server)]
|
|
['lame-delegation ;; can come from filter-dns-reply
|
|
(log-info "Lame delegation received from ~v (at ~v) in bailiwick ~v in response to ~v"
|
|
(current-name)
|
|
server-ip
|
|
zone-origin
|
|
q)
|
|
(when (and (current-name) server-ip)
|
|
;; Actually remove the offending IP address so it's never tried again.
|
|
(known-addresses (hash-update (known-addresses)
|
|
(current-name)
|
|
(lambda (addrs) (remove server-ip addrs)))))
|
|
(try-next-server)]
|
|
[(and (or (? complete-answer?) #f) ans)
|
|
(send! (network-reply unique-id ans))]))
|
|
|
|
(define (try-next-server)
|
|
(if (not (timeout))
|
|
;; No more timeouts to try, so give up.
|
|
(on-answer (empty-complete-answer) #f)
|
|
(match (remaining-addresses)
|
|
['()
|
|
(match (remaining-names)
|
|
['()
|
|
;; No more addresses to try with this timeout. Refill the list
|
|
;; and bump the timeout and retry.
|
|
;; TODO: randomize ordering of servers in list. (Don't forget the initial fill.)
|
|
(timeout (next-timeout (timeout)))
|
|
(current-name #f)
|
|
(remaining-names server-names)
|
|
(try-next-server)]
|
|
[(cons next-name new-remaining-names)
|
|
(current-name next-name)
|
|
(remaining-names new-remaining-names)
|
|
(if (hash-has-key? (known-addresses) next-name)
|
|
(begin (remaining-addresses (hash-ref (known-addresses) (current-name)))
|
|
(try-next-server))
|
|
(let ((subq (ns-question next-name q)))
|
|
(react (on-start (send! subq))
|
|
(stop-when (message (answered-question subq $ans))
|
|
(define ips
|
|
(for/list [(a (extract-addresses next-name ans))]
|
|
(make-dns-address a)))
|
|
(known-addresses (hash-set (known-addresses) next-name ips))
|
|
(remaining-addresses ips)
|
|
(try-next-server)))))])]
|
|
[(cons current-ip new-remaining-ips)
|
|
(remaining-addresses new-remaining-ips)
|
|
(define rpc-id (gensym 'network-query/allocate-query-id))
|
|
(react (on-start (send! `(request ,rpc-id allocate-query-id)))
|
|
(stop-when (message `(reply ,rpc-id ,(? exact-nonnegative-integer? $id)))
|
|
(remaining-addresses new-remaining-ips)
|
|
(send-request id current-ip)))])))
|
|
|
|
(define (send-request query-id server-ip)
|
|
(define query (make-dns-query-message q query-id))
|
|
(define reply-wait-id (list s query-id 'reply-wait))
|
|
(define timeout-id (list s query-id 'timeout))
|
|
(define start-time (current-inexact-milliseconds))
|
|
(log-debug "Sending ~v ~v to ~v ~v with ~v seconds of timeout"
|
|
q query-id
|
|
zone-origin server-ip
|
|
(timeout))
|
|
;; NB: ANALYSIS: Previous implementation of this used a
|
|
;; quasi-join, where one endpoint deleted the other. Here the two
|
|
;; stop-when clauses do a similar job. Also, we can pull the
|
|
;; `release-query-id` send up to an on-stop clause.
|
|
(react (on-start
|
|
(send! (dns-request query s server-ip))
|
|
(send! (set-timer timeout-id (* (timeout) 1000) 'relative)))
|
|
(on-stop
|
|
(send! (list 'release-query-id query-id)))
|
|
(stop-when (message (timer-expired timeout-id _))
|
|
(log-debug "Timed out ~v ~v to ~v ~v after ~v seconds"
|
|
q query-id
|
|
zone-origin server-ip
|
|
(timeout))
|
|
(try-next-server))
|
|
(stop-when (message (dns-reply
|
|
(? (lambda (m) (= (dns-message-id m)
|
|
(dns-message-id query)))
|
|
$reply-message)
|
|
$source
|
|
s))
|
|
;; TODO: maybe receive only specifically from the queried IP address?
|
|
(log-debug "Answer to ~v from ~v ~v in ~v ms~n-- Answers: ~v~n-- Authorities: ~v~n-- Additional: ~v"
|
|
q zone-origin server-ip
|
|
(inexact->exact (round (- (current-inexact-milliseconds) start-time)))
|
|
(dns-message-answers reply-message)
|
|
(dns-message-authorities reply-message)
|
|
(dns-message-additional reply-message))
|
|
(on-answer (filter-dns-reply q reply-message zone-origin) server-ip))))
|
|
|
|
(try-next-server)))
|
|
|
|
(define ((dns-message-id-matches? expected) m)
|
|
(= (dns-message-id m) expected))
|