racket-dns-2012/network-query.rkt

#lang racket/base

(require racket/set)
(require racket/match)
(require "api.rkt")
(require "codec.rkt")
(require "zonedb.rkt")
(require "../racket-matrix/os-big-bang.rkt")
(require "../racket-matrix/os-udp.rkt")
(require "../racket-matrix/os-timer.rkt")
(require "os-dns.rkt")

(provide network-query
	 (struct-out network-reply))

;; DJB's rules for handling DNS responses. Some of these are handled
;; here (specifically, rules 2 through 5, in the action of
;; filter-dns-reply), some are handled in resolver.rkt (rule 1, in the
;; action of answer-from-zone), and some are handled in the
;; interaction between the resolver and the network-query modules
;; (rule 1 as well, the interplay between CNAME expansion and
;; recursion):

;; <blockquote>
;; When a cache receives a normal DNS response, it learns exactly one
;; of the following five pieces of information:
;;
;; 1. ``The query was not answered because the query name is an
;; alias. I need to change the query name and try again.'' This
;; applies if the answer section of the response contains a CNAME
;; record for the query name and CNAME does not match the query type.
;;
;; 2. ``The query name has no records answering the query, and is also
;; guaranteed to have no records of any other type.'' This applies if
;; the response code is NXDOMAIN and #1 doesn't apply. The amount of
;; time that this information can be cached depends on the contents of
;; the SOA record in the authority section of the response, if there
;; is one.
;;
;; 3. ``The query name has one or more records answering the query.''
;; This applies if the answer section of the response contains one or
;; more records under the query name matching the query type, and #1
;; doesn't apply, and #2 doesn't apply.
;;
;; 4. ``The query was not answered because the server does not have
;; the answer. I need to contact other servers.'' This applies if the
;; authority section of the response contains NS records, and the
;; authority section of the response does not contain SOA records, and
;; #1 doesn't apply, and #2 doesn't apply, and #3 doesn't apply. The
;; ``other servers'' are named in the NS records in the authority
;; section.
;;
;; 5. ``The query name has no records answering the query, but it may
;; have records of another type.'' This applies if #1 doesn't apply,
;; and #2 doesn't apply, and #3 doesn't apply, and #4 doesn't
;; apply. The amount of time that this information can be cached
;; depends on the contents of the SOA record in the authority section,
;; if there is one.
;;
;; This procedure requires an incredible amount of bug-prone parsing
;; for a very small amount of information. The underlying problem is
;; that DNS was designed to declare information in a human-oriented
;; format, rather than to support crucial operations in the simplest
;; possible way.
;; </blockquote>

;; A NetworkQueryResult is a ListOf<Action>, some actions to take:
;; either involved in or resulting from completion of the network
;; query.

(define first-timeout 3) ;; seconds

;; A NetworkRequest is a (network-request UdpAddress Question
;; DomainName NEListOf<DomainName> UniqueID) representing the
;; parameters used to start and process a network query.
(struct network-request (client-socket
			 question
			 zone-origin
			 server-names
			 unique-id)
	#:transparent)

;; A NetworkReply is a (network-reply UniqueID Maybe<CompleteAnswer>)
;; representing the final result of a network query.
(struct network-reply (unique-id answer) #:transparent)

;; A NetworkQueryState is a (network-query-state NetworkRequest
;; Integer Map<DomainName,ListOf<UdpAddress>> ListOf<UdpAddress>
;; ListOf<DomainName>), representing an in-progress DNS network query.
(struct network-query-state (request
			     timeout
			     known-addresses
			     remaining-addresses
			     remaining-names)
	#:transparent)

;; seconds -> Maybe<seconds>
(define (next-timeout timeout)
  (case timeout
    ((3) 11)
    ((11) 45)
    ((45) #f)))

(define (make-dns-query-message q query-id)
  (dns-message query-id
	       'request
	       'query
	       'non-authoritative
	       'not-truncated
	       'no-recursion-desired
	       'no-recursion-available
	       'no-error
	       (list q)
	       '()
	       '()
	       '()))

;; filter-dns-reply : DNSMessage DomainName -> (or Maybe<CompleteAnswer> 'bad-answer)
;;
;; Filters RRs from the answer, authorities, and additional sections
;; of the passed-in `message`, returning the set of RRs surviving the
;; filter. RRs are only accepted if their `rr-name` falls in the
;; bailiwick of the given `zone-origin`. All of this only happens if
;; the passed-in message's `dns-message-response-code` is `'no-error`:
;; if it's `'name-error`, then `#f` is returned, and if it's any other
;; code, `'bad-answer` is returned.
(define (filter-dns-reply message zone-origin)
  (case (dns-message-response-code message)
    [(no-error)
     (define (f l)
       (list->set (filter (lambda (claim-rr) (in-bailiwick? (rr-name claim-rr) zone-origin)) l)))
     (complete-answer (f (dns-message-answers message))
		      (f (dns-message-authorities message))
		      (f (dns-message-additional message)))]
    [(name-error) #f]
    [else
     (log-info (format "Abnormal response-code ~v in response to questions ~v"
		       (dns-message-response-code message)
		       (dns-message-questions message)))
     'bad-answer]))

;; IPv4 -> String
(define (ip->host-name ip-address)
  (match-define (vector a b c d) ip-address)
  (format "~a.~a.~a.~a" a b c d))

(define (make-dns-address ip-address)
  (udp-address (ip->host-name ip-address) 53))

;; network-query : UdpAddress Question DomainName NEListOf<DomainName> UniqueId -> BootK
(define (network-query s q zone-origin server-names unique-id)
  (os-big-bang/transition
   (try-next-server (network-query-state (network-request s q zone-origin server-names unique-id)
					 first-timeout
					 (hash)
					 '()
					 server-names))))

(define (try-next-server w)
  (match w
    [(network-query-state _ #f _ _ _)
     ;; No more timeouts to try, so give up.
     (on-answer w (empty-complete-answer))]
    [(network-query-state req timeout _ '() '())
     ;; No more addresses to try with this timeout. Refill the list
     ;; and bump the timeout and retry.
     ;; TODO: randomize ordering of servers in list. (Don't forget the initial fill.)
     (try-next-server (struct-copy network-query-state w
				   [timeout (next-timeout timeout)]
				   [remaining-addresses '()]
				   [remaining-names (network-request-server-names req)]))]
    [(network-query-state req _ known-addresses '() (cons current-name remaining-names))
     (if (hash-has-key? known-addresses current-name)
	 (try-next-server (struct-copy network-query-state w
				       [remaining-addresses (hash-ref known-addresses
								      current-name)]
				       [remaining-names remaining-names]))
	 (let ((subq (ns-question current-name (network-request-question req))))
	   (transition (struct-copy network-query-state w [remaining-names remaining-names])
	     (send-message subq)
	     (subscribe/fresh subscription-id
	       (message-handlers w
		 [(answered-question (== subq) ans)
		  (define ips
		    (map make-dns-address (set->list (extract-addresses current-name ans))))
		  (extend-transition
		      (try-next-server (struct-copy network-query-state w
						    [known-addresses (hash-set known-addresses
									       current-name
									       ips)]
						    [remaining-addresses ips]))
		    (unsubscribe subscription-id))])))))]
    [(network-query-state req timeout _ (cons current-ip remaining-ips) _)
     (define rpc-id (gensym 'network-query/allocate-query-id))
     (transition w
       (send-message `(request ,rpc-id allocate-query-id))
       (subscribe rpc-id
	 (message-handlers w
	   [`(reply ,(== rpc-id) ,id)
	    (extend-transition (send-request (struct-copy network-query-state w
							  [remaining-addresses remaining-ips])
					     id
					     timeout
					     current-ip)
	      (unsubscribe rpc-id))])))]))

(define (on-answer w ans)
  (if (eq? ans 'bad-answer) ;; can come from filter-dns-reply
      (try-next-server w)
      (transition w
	(send-message (network-reply (network-request-unique-id (network-query-state-request w))
				     ans)))))

(define (send-request w query-id timeout server-ip)
  (match-define (network-request s q zone-origin _ _) (network-query-state-request w))
  (define query (make-dns-query-message q query-id))
  (define subscription-id (list s query-id))
  (define start-time (current-inexact-milliseconds))
  (log-debug (format "Sending ~v ~v to ~v ~v with ~v seconds of timeout"
		     q query-id
		     zone-origin server-ip
		     timeout))
  (transition w
    (send-message (dns-request query s server-ip))
    (send-message (set-timer subscription-id (* timeout 1000) 'relative))
    (subscribe subscription-id
      (message-handlers w
	[(timer-expired (== subscription-id) _)
	 (log-debug (format "Timed out ~v ~v to ~v ~v after ~v seconds"
			    q query-id
			    zone-origin server-ip
			    timeout))
	 (extend-transition (try-next-server w)
	   (unsubscribe subscription-id)
	   (send-message (list 'release-query-id query-id)))]
	[(dns-reply reply-message source (== s))
	 ;; TODO: maybe receive only specifically from the queried IP address?
	 (log-debug
	  (format
	   "Answer to ~v from ~v ~v in ~v ms~n-- Answers: ~v~n-- Authorities: ~v~n-- Additional: ~v"
	   q zone-origin server-ip
	   (inexact->exact (round (- (current-inexact-milliseconds) start-time)))
	   (dns-message-answers reply-message)
	   (dns-message-authorities reply-message)
	   (dns-message-additional reply-message)))
	 (if (not (= (dns-message-id reply-message) (dns-message-id query)))
	     w
	     (extend-transition (on-answer w (filter-dns-reply reply-message zone-origin))
	       (unsubscribe subscription-id)
	       (send-message (list 'release-query-id query-id))))]))))