#lang racket/base ;; ;;; Copyright 2010, 2011, 2012, 2013 Tony Garnock-Jones ;;; ;;; This file is part of marketplace-dns. ;;; ;;; marketplace-dns is free software: you can redistribute it and/or ;;; modify it under the terms of the GNU General Public License as ;;; published by the Free Software Foundation, either version 3 of the ;;; License, or (at your option) any later version. ;;; ;;; marketplace-dns is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with marketplace-dns. If not, see ;;; . (require racket/set) (require racket/match) (require "api.rkt") (require "codec.rkt") (require "zonedb.rkt") (require syndicate/actor) (require syndicate/drivers/udp) (require syndicate/drivers/timer) (require "tk-dns.rkt") (provide network-query (struct-out network-reply)) ;; DJB's rules for handling DNS responses. Some of these are handled ;; here (specifically, rules 2 through 5, in the action of ;; filter-dns-reply), some are handled in resolver.rkt (rule 1, in the ;; action of answer-from-zone), and some are handled in the ;; interaction between the resolver and the network-query modules ;; (rule 1 as well, the interplay between CNAME expansion and ;; recursion): ;;
;; When a cache receives a normal DNS response, it learns exactly one ;; of the following five pieces of information: ;; ;; 1. ``The query was not answered because the query name is an ;; alias. I need to change the query name and try again.'' This ;; applies if the answer section of the response contains a CNAME ;; record for the query name and CNAME does not match the query type. ;; ;; 2. ``The query name has no records answering the query, and is also ;; guaranteed to have no records of any other type.'' This applies if ;; the response code is NXDOMAIN and #1 doesn't apply. The amount of ;; time that this information can be cached depends on the contents of ;; the SOA record in the authority section of the response, if there ;; is one. ;; ;; 3. ``The query name has one or more records answering the query.'' ;; This applies if the answer section of the response contains one or ;; more records under the query name matching the query type, and #1 ;; doesn't apply, and #2 doesn't apply. ;; ;; 4. ``The query was not answered because the server does not have ;; the answer. I need to contact other servers.'' This applies if the ;; authority section of the response contains NS records, and the ;; authority section of the response does not contain SOA records, and ;; #1 doesn't apply, and #2 doesn't apply, and #3 doesn't apply. The ;; ``other servers'' are named in the NS records in the authority ;; section. ;; ;; 5. ``The query name has no records answering the query, but it may ;; have records of another type.'' This applies if #1 doesn't apply, ;; and #2 doesn't apply, and #3 doesn't apply, and #4 doesn't ;; apply. The amount of time that this information can be cached ;; depends on the contents of the SOA record in the authority section, ;; if there is one. ;; ;; This procedure requires an incredible amount of bug-prone parsing ;; for a very small amount of information. The underlying problem is ;; that DNS was designed to declare information in a human-oriented ;; format, rather than to support crucial operations in the simplest ;; possible way. ;;
;;--------------------------------------------------------------------------- ;; DJB's djbdns logic for determining whether a response is a lame ;; referral or not is as follows (see his query.c in areas dealing ;; with the variable "flagreferral" and calls to the function ;; "log_lame"): ;; ;; If a response - ;; ;; 1. has response-code no-error (0), and ;; 2. has no CNAME records in the answer section for the domain we're ;; interested in, and ;; 3. has no records in the answer section for the domain and type ;; we're interested in, and ;; 4. has no SOA records in the authority section, and ;; 5. has at least one NS record in the authority section, and ;; 6. that NS record's name is equal to our bailiwick or is not in our ;; bailiwick, ;; ;; then it is a lame referral. ;; ;; Anything with non-zero response-code is clearly not a referral, so ;; that explains (1). If either of checks (2) and (3) fail then the ;; answer is a real, sensible answer to the question we posed. I'm not ;; 100% on why (4) is there; presumably it's to be conservative, and ;; not treat something possibly-valid as definitely-lame? Rules (5) ;; and (6) are the real heart of lameness, where a referral is given ;; to somewhere that can't be more authoritative than the responder ;; was supposed to be. ;; ;; We modify check (4) to ignore SOA records not in bailiwick, just ;; for consistency. It's correct to leave (5) and (6) alone because ;; it's incorrect for a server to refer us to anywhere at the same ;; level of the tree or further up the tree, but we do apply them to ;; every NS record rather than just the first, which is slightly ;; stricter than DJB's rule. ;;--------------------------------------------------------------------------- (define first-timeout 3) ;; seconds ;; A NetworkReply is a (network-reply UniqueID Maybe) ;; representing the final result of a network query. (struct network-reply (unique-id answer) #:transparent) ;; (: next-timeout : Natural -> (Option Natural)) (define (next-timeout timeout) (cond [(equal? timeout 3) 11] [(equal? timeout 11) 45] [else #f])) ;; (: make-dns-query-message : Question Nonnegative-Integer -> DNSMessage) (define (make-dns-query-message q query-id) (dns-message query-id 'request 'query 'non-authoritative 'not-truncated 'no-recursion-desired 'no-recursion-available 'no-error (list q) '() '() '())) ;; (define-type CheckedAnswer (U (Option CompleteAnswer) 'bad-answer 'lame-delegation)) ;; (: filter-dns-reply : Question DNSMessage DomainName -> CheckedAnswer) ;; Filters RRs from the answer, authorities, and additional sections ;; of the passed-in `message`, returning the set of RRs surviving the ;; filter. RRs are only accepted if their `rr-name` falls in the ;; bailiwick of the given `zone-origin`. All of this only happens if ;; the passed-in message's `dns-message-response-code` is `'no-error`: ;; if it's `'name-error`, then `#f` is returned, and if it's any other ;; code, `'bad-answer` is returned. ;; ;; In cases where a CompleteAnswer would otherwise be returned, if the ;; answer is in fact a lame delegation (see notes above), then ;; 'lame-delegation is returned instead. (define (filter-dns-reply q message zone-origin) (case (dns-message-response-code message) [(no-error) ;; (: f : (Listof RR) -> (Setof RR)) (define (f l) (list->set (filter (lambda (claim-rr) (in-bailiwick? (rr-name claim-rr) zone-origin)) l))) ;; Here's where we do the "lame referral" check. This code is ;; nice and simple (though wrong) without it. Ho hum. (define answers (f (dns-message-answers message))) (define unfiltered-authorities (dns-message-authorities message)) (define non-subzone-ns-rrs ;; all NS authorities not for a STRICT subzone of our zone-origin (filter (lambda (rr) (and (eqv? (rdata-type (rr-rdata rr)) 'ns) (or (equal? (rr-name rr) zone-origin) (not (in-bailiwick? (rr-name rr) zone-origin))))) unfiltered-authorities)) (define authorities (f unfiltered-authorities)) (define answers-to-q ;; answers specifically to the question we asked (set-filter (lambda (rr) (equal? (rr-name rr) (question-name q))) answers)) (define lame? (and (set-empty? (filter-by-type answers-to-q 'cname)) (set-empty? (filter-rrs answers-to-q (question-type q) (question-class q))) (set-empty? (filter-by-type authorities 'soa)) (not (null? non-subzone-ns-rrs)))) (if lame? 'lame-delegation (complete-answer answers authorities (f (dns-message-additional message))))] [(name-error) #f] [else (log-info "Abnormal response-code ~v in response to questions ~v" (dns-message-response-code message) (dns-message-questions message)) 'bad-answer])) ;; (: ip->host-name : IPv4 -> String) (define (ip->host-name ip-address) (match-define (vector a b c d) ip-address) (format "~a.~a.~a.~a" a b c d)) ;; (: make-dns-address : IPv4 -> UdpAddress) (define (make-dns-address ip-address) (udp-remote-address (ip->host-name ip-address) 53)) ;; (: network-query : (All (ParentState) ;; UdpAddress Question DomainName (Listof DomainName) Any -> ;; Void)) (define (network-query s q zone-origin server-names unique-id) (spawn* #:name (list 'network-query q) (field [timeout first-timeout] [known-addresses #hash()] ;; Hash DomainName (Listof UdpAddress) [remaining-addresses '()] ;; Listof UdpAddress [current-name #f] ;; Option DomainName [remaining-names server-names]) ;; Listof DomainName (define (on-answer ans server-ip) (match ans ['bad-answer ;; can come from filter-dns-reply (try-next-server)] ['lame-delegation ;; can come from filter-dns-reply (log-info "Lame delegation received from ~v (at ~v) in bailiwick ~v in response to ~v" (current-name) server-ip zone-origin q) (when (and (current-name) server-ip) ;; Actually remove the offending IP address so it's never tried again. (known-addresses (hash-update (known-addresses) (current-name) (lambda (addrs) (remove server-ip addrs))))) (try-next-server)] [(and (or (? complete-answer?) #f) ans) (send! (network-reply unique-id ans))])) (define (try-next-server) (if (not (timeout)) ;; No more timeouts to try, so give up. (on-answer (empty-complete-answer) #f) (match (remaining-addresses) ['() (match (remaining-names) ['() ;; No more addresses to try with this timeout. Refill the list ;; and bump the timeout and retry. ;; TODO: randomize ordering of servers in list. (Don't forget the initial fill.) (timeout (next-timeout (timeout))) (current-name #f) (remaining-names server-names) (try-next-server)] [(cons next-name new-remaining-names) (current-name next-name) (remaining-names new-remaining-names) (if (hash-has-key? (known-addresses) next-name) (begin (remaining-addresses (hash-ref (known-addresses) (current-name))) (try-next-server)) (let ((subq (ns-question next-name q))) (react (on-start (send! subq)) (stop-when (message (answered-question subq $ans)) (define ips (for/list [(a (extract-addresses next-name ans))] (make-dns-address a))) (known-addresses (hash-set (known-addresses) next-name ips)) (remaining-addresses ips) (try-next-server)))))])] [(cons current-ip new-remaining-ips) (remaining-addresses new-remaining-ips) (define rpc-id (gensym 'network-query/allocate-query-id)) (react (on-start (send! `(request ,rpc-id allocate-query-id))) (stop-when (message `(reply ,rpc-id ,(? exact-nonnegative-integer? $id))) (remaining-addresses new-remaining-ips) (send-request id current-ip)))]))) (define (send-request query-id server-ip) (define query (make-dns-query-message q query-id)) (define reply-wait-id (list s query-id 'reply-wait)) (define timeout-id (list s query-id 'timeout)) (define start-time (current-inexact-milliseconds)) (log-debug "Sending ~v ~v to ~v ~v with ~v seconds of timeout" q query-id zone-origin server-ip (timeout)) ;; NB: ANALYSIS: Previous implementation of this used a ;; quasi-join, where one endpoint deleted the other. Here the two ;; stop-when clauses do a similar job. Also, we can pull the ;; `release-query-id` send up to an on-stop clause. (react (on-start (send! (dns-request query s server-ip)) (send! (set-timer timeout-id (* (timeout) 1000) 'relative))) (on-stop (send! (list 'release-query-id query-id))) (stop-when (message (timer-expired timeout-id _)) (log-debug "Timed out ~v ~v to ~v ~v after ~v seconds" q query-id zone-origin server-ip (timeout)) (try-next-server)) (stop-when (message (dns-reply (? (lambda (m) (= (dns-message-id m) (dns-message-id query))) $reply-message) $source s)) ;; TODO: maybe receive only specifically from the queried IP address? (log-debug "Answer to ~v from ~v ~v in ~v ms~n-- Answers: ~v~n-- Authorities: ~v~n-- Additional: ~v" q zone-origin server-ip (inexact->exact (round (- (current-inexact-milliseconds) start-time))) (dns-message-answers reply-message) (dns-message-authorities reply-message) (dns-message-additional reply-message)) (on-answer (filter-dns-reply q reply-message zone-origin) server-ip)))) (try-next-server))) (define ((dns-message-id-matches? expected) m) (= (dns-message-id m) expected))