#lang racket/base (require racket/set) (require racket/match) (require "api.rkt") (require "codec.rkt") (require "zonedb.rkt") (require "../racket-matrix/os2.rkt") (require "../racket-matrix/os2-udp.rkt") (require "../racket-matrix/os2-timer.rkt") (require "os2-dns.rkt") (provide network-query (struct-out network-reply)) ;; DJB's rules for handling DNS responses. Some of these are handled ;; here (specifically, rules 2 through 5, in the action of ;; filter-dns-reply), some are handled in resolver.rkt (rule 1, in the ;; action of answer-from-zone), and some are handled in the ;; interaction between the resolver and the network-query modules ;; (rule 1 as well, the interplay between CNAME expansion and ;; recursion): ;;
;; When a cache receives a normal DNS response, it learns exactly one ;; of the following five pieces of information: ;; ;; 1. ``The query was not answered because the query name is an ;; alias. I need to change the query name and try again.'' This ;; applies if the answer section of the response contains a CNAME ;; record for the query name and CNAME does not match the query type. ;; ;; 2. ``The query name has no records answering the query, and is also ;; guaranteed to have no records of any other type.'' This applies if ;; the response code is NXDOMAIN and #1 doesn't apply. The amount of ;; time that this information can be cached depends on the contents of ;; the SOA record in the authority section of the response, if there ;; is one. ;; ;; 3. ``The query name has one or more records answering the query.'' ;; This applies if the answer section of the response contains one or ;; more records under the query name matching the query type, and #1 ;; doesn't apply, and #2 doesn't apply. ;; ;; 4. ``The query was not answered because the server does not have ;; the answer. I need to contact other servers.'' This applies if the ;; authority section of the response contains NS records, and the ;; authority section of the response does not contain SOA records, and ;; #1 doesn't apply, and #2 doesn't apply, and #3 doesn't apply. The ;; ``other servers'' are named in the NS records in the authority ;; section. ;; ;; 5. ``The query name has no records answering the query, but it may ;; have records of another type.'' This applies if #1 doesn't apply, ;; and #2 doesn't apply, and #3 doesn't apply, and #4 doesn't ;; apply. The amount of time that this information can be cached ;; depends on the contents of the SOA record in the authority section, ;; if there is one. ;; ;; This procedure requires an incredible amount of bug-prone parsing ;; for a very small amount of information. The underlying problem is ;; that DNS was designed to declare information in a human-oriented ;; format, rather than to support crucial operations in the simplest ;; possible way. ;;
;;--------------------------------------------------------------------------- ;; DJB's djbdns logic for determining whether a response is a lame ;; referral or not is as follows (see his query.c in areas dealing ;; with the variable "flagreferral" and calls to the function ;; "log_lame"): ;; ;; If a response - ;; ;; 1. has response-code no-error (0), and ;; 2. has no CNAME records in the answer section for the domain we're ;; interested in, and ;; 3. has no records in the answer section for the domain and type ;; we're interested in, and ;; 4. has no SOA records in the authority section, and ;; 5. has at least one NS record in the authority section, and ;; 6. that NS record's name is equal to our bailiwick or is not in our ;; bailiwick, ;; ;; then it is a lame referral. ;; ;; Anything with non-zero response-code is clearly not a referral, so ;; that explains (1). If either of checks (2) and (3) fail then the ;; answer is a real, sensible answer to the question we posed. I'm not ;; 100% on why (4) is there; presumably it's to be conservative, and ;; not treat something possibly-valid as definitely-lame? Rules (5) ;; and (6) are the real heart of lameness, where a referral is given ;; to somewhere that can't be more authoritative than the responder ;; was supposed to be. ;; ;; We modify check (4) to ignore SOA records not in bailiwick, just ;; for consistency. It's correct to leave (5) and (6) alone because ;; it's incorrect for a server to refer us to anywhere at the same ;; level of the tree or further up the tree, but we do apply them to ;; every NS record rather than just the first, which is slightly ;; stricter than DJB's rule. ;;--------------------------------------------------------------------------- ;; A NetworkQueryResult is a ListOf, some actions to take: ;; either involved in or resulting from completion of the network ;; query. (define first-timeout 3) ;; seconds ;; A NetworkRequest is a (network-request UdpAddress Question ;; DomainName NEListOf UniqueID) representing the ;; parameters used to start and process a network query. (struct network-request (client-socket question zone-origin server-names unique-id) #:prefab) ;; A NetworkReply is a (network-reply UniqueID Maybe) ;; representing the final result of a network query. (struct network-reply (unique-id answer) #:prefab) ;; A NetworkQueryState is a (network-query-state NetworkRequest ;; Integer Map> ListOf ;; Maybe ListOf), representing an in-progress ;; DNS network query. (struct network-query-state (request timeout known-addresses remaining-addresses current-name remaining-names) #:prefab) ;; seconds -> Maybe (define (next-timeout timeout) (case timeout ((3) 11) ((11) 45) ((45) #f))) (define (make-dns-query-message q query-id) (dns-message query-id 'request 'query 'non-authoritative 'not-truncated 'no-recursion-desired 'no-recursion-available 'no-error (list q) '() '() '())) ;; filter-dns-reply : Question DNSMessage DomainName ;; -> (or Maybe 'bad-answer 'lame-delegation) ;; ;; Filters RRs from the answer, authorities, and additional sections ;; of the passed-in `message`, returning the set of RRs surviving the ;; filter. RRs are only accepted if their `rr-name` falls in the ;; bailiwick of the given `zone-origin`. All of this only happens if ;; the passed-in message's `dns-message-response-code` is `'no-error`: ;; if it's `'name-error`, then `#f` is returned, and if it's any other ;; code, `'bad-answer` is returned. ;; ;; In cases where a CompleteAnswer would otherwise be returned, if the ;; answer is in fact a lame delegation (see notes above), then ;; 'lame-delegation is returned instead. (define (filter-dns-reply q message zone-origin) (case (dns-message-response-code message) [(no-error) (define (f l) (list->set (filter (lambda (claim-rr) (in-bailiwick? (rr-name claim-rr) zone-origin)) l))) ;; Here's where we do the "lame referral" check. This code is ;; nice and simple (though wrong) without it. Ho hum. (define answers (f (dns-message-answers message))) (define unfiltered-authorities (dns-message-authorities message)) (define non-subzone-ns-rrs ;; all NS authorities not for a STRICT subzone of our zone-origin (filter (lambda (rr) (and (eqv? (rr-type rr) 'ns) (or (equal? (rr-name rr) zone-origin) (not (in-bailiwick? (rr-name rr) zone-origin))))) unfiltered-authorities)) (define authorities (f unfiltered-authorities)) (define answers-to-q ;; answers specifically to the question we asked (set-filter (lambda (rr) (equal? (rr-name rr) (question-name q))) answers)) (define lame? (and (set-empty? (filter-by-type answers-to-q 'cname)) (set-empty? (filter-rrs answers-to-q (question-type q) (question-class q))) (set-empty? (filter-by-type authorities 'soa)) (not (null? non-subzone-ns-rrs)))) (if lame? 'lame-delegation (complete-answer answers authorities (f (dns-message-additional message))))] [(name-error) #f] [else (log-info (format "Abnormal response-code ~v in response to questions ~v" (dns-message-response-code message) (dns-message-questions message))) 'bad-answer])) ;; IPv4 -> String (define (ip->host-name ip-address) (match-define (vector a b c d) ip-address) (format "~a.~a.~a.~a" a b c d)) (define (make-dns-address ip-address) (udp-address (ip->host-name ip-address) 53)) ;; network-query : UdpAddress Question DomainName NEListOf UniqueId -> BootK (define (network-query s q zone-origin server-names unique-id) (lambda (self-pid) (try-next-server (network-query-state (network-request s q zone-origin server-names unique-id) first-timeout (hash) '() #f server-names)))) (define (try-next-server w) (match w [(network-query-state _ #f _ _ _ _) ;; No more timeouts to try, so give up. (on-answer w (empty-complete-answer) #f)] [(network-query-state req timeout _ '() _ '()) ;; No more addresses to try with this timeout. Refill the list ;; and bump the timeout and retry. ;; TODO: randomize ordering of servers in list. (Don't forget the initial fill.) (try-next-server (struct-copy network-query-state w [timeout (next-timeout timeout)] [remaining-addresses '()] [current-name #f] [remaining-names (network-request-server-names req)]))] [(network-query-state req _ known-addresses '() _ (cons current-name remaining-names)) (if (hash-has-key? known-addresses current-name) (try-next-server (struct-copy network-query-state w [remaining-addresses (hash-ref known-addresses current-name)] [current-name current-name] [remaining-names remaining-names])) (let ((subq (ns-question current-name (network-request-question req)))) (transition (struct-copy network-query-state w [current-name current-name] [remaining-names remaining-names]) (send-message subq) (role/fresh subq-id (topic-subscriber (answered-question subq (wild))) #:state w [(answered-question (== subq) ans) (define ips (map make-dns-address (set->list (extract-addresses current-name ans)))) (sequence-actions (try-next-server (struct-copy network-query-state w [known-addresses (hash-set known-addresses current-name ips)] [remaining-addresses ips])) (delete-role subq-id))]))))] [(network-query-state req timeout _ (cons current-ip remaining-ips) _ _) (define rpc-id (gensym 'network-query/allocate-query-id)) (transition w (send-message `(request ,rpc-id allocate-query-id)) (role (topic-subscriber `(reply ,rpc-id ,(wild))) #:name rpc-id #:state w [`(reply ,(== rpc-id) ,id) (sequence-actions (send-request (struct-copy network-query-state w [remaining-addresses remaining-ips]) id timeout current-ip) (delete-role rpc-id))]))])) (define (on-answer w ans server-ip) (match ans ['bad-answer ;; can come from filter-dns-reply (try-next-server w)] ['lame-delegation ;; can come from filter-dns-reply (match-define (network-query-state req _ known-addresses _ current-name _) w) (match-define (network-request _ q zone-origin _ _) req) (log-info (format "Lame delegation received from ~v (at ~v) in bailiwick ~v in response to ~v" current-name server-ip zone-origin q)) ;; Actually remove the offending IP address so it's never tried again. (try-next-server (struct-copy network-query-state w [known-addresses (hash-update known-addresses current-name (lambda (addrs) (remove server-ip addrs)))]))] [else (transition w (send-message (network-reply (network-request-unique-id (network-query-state-request w)) ans)))])) (define (send-request w query-id timeout server-ip) (match-define (network-request s q zone-origin _ _) (network-query-state-request w)) (define query (make-dns-query-message q query-id)) (define subscription-id (list s query-id)) (define start-time (current-inexact-milliseconds)) (log-debug (format "Sending ~v ~v to ~v ~v with ~v seconds of timeout" q query-id zone-origin server-ip timeout)) (transition w (send-message (dns-request query s server-ip)) (send-message (set-timer subscription-id (* timeout 1000) 'relative)) (role (set (topic-subscriber (timer-expired subscription-id (wild))) (topic-subscriber (dns-reply (wild) (wild) s))) #:name subscription-id #:state w [(timer-expired (== subscription-id) _) (log-debug (format "Timed out ~v ~v to ~v ~v after ~v seconds" q query-id zone-origin server-ip timeout)) (sequence-actions (try-next-server w) (delete-role subscription-id) (send-message (list 'release-query-id query-id)))] [(dns-reply reply-message source (== s)) ;; TODO: maybe receive only specifically from the queried IP address? (log-debug (format "Answer to ~v from ~v ~v in ~v ms~n-- Answers: ~v~n-- Authorities: ~v~n-- Additional: ~v" q zone-origin server-ip (inexact->exact (round (- (current-inexact-milliseconds) start-time))) (dns-message-answers reply-message) (dns-message-authorities reply-message) (dns-message-additional reply-message))) (if (not (= (dns-message-id reply-message) (dns-message-id query))) (transition w) (sequence-actions (on-answer w (filter-dns-reply q reply-message zone-origin) server-ip) (delete-role subscription-id) (send-message (list 'release-query-id query-id))))])))