examples/flink: implement task delegation roles in terms of abstract

templates
2019-04-02 16:18:57 -04:00 · 2019-04-02 16:18:57 -04:00 · 7815fad415
parent e16db164df
commit 7815fad415
1 changed files with 123 additions and 88 deletions
--- a/racket/syndicate/examples/actor/flink.rkt
+++ b/racket/syndicate/examples/actor/flink.rkt
@ -5,7 +5,8 @@
                  set-count
                  set-empty?
                  set-first
-                  set-remove))
+                  set-remove
                  set-add))
 (require (only-in racket/list
                  partition
                  empty?
@ -38,14 +39,14 @@ of each TaskManager (TM) is contingent on the presence of a job manager.
 #|
 In turn, TaskManagers advertise their presence with (task-manager ID slots),
 where ID is a unique id, and slots is a natural number. The number of slots
-dictates how many tasks the TM can take on. To reduce contention, we the JM
+dictates how many tasks the TM can take on. To reduce contention, the JM
 should only assign a task to a TM if the TM actually has the resources to
 perform a task.
 |#
 (assertion-struct task-manager (id slots))
 ;; an ID is a symbol or a natural number.
 ;; Any -> Bool
-;; recognize ids
+;; recognize IDs
 (define (id? x)
  (or (symbol? x) (exact-nonnegative-integer? x)))
 #|
@ -127,6 +128,50 @@ The JobManager then performs the job and, when finished, asserts (job-finished I
 (define (log fmt . args)
  (displayln (apply format fmt args)))
 ;; ---------------------------------------------------------------------------------------------------
 ;; Generic Implementation of Task Delegation Protocol
 ;; a TaskFun is a
 ;; (Task ID (TaskResults -> Void) ((U ACCEPTED OVERLOAD RUNNING) -> Void) -> Void)
 ;; ID (-> Bool) TaskFun -> TaskPerformer
 ;; doesn't really account for long-running tasks
 ;; gonna need some effect polymorphism to type uses of this
 (define (task-performer my-id can-accept? perform-task)
  (react
   (during (task-assignment my-id $job-id $task)
     (field [status #f])
     (assert (task-state my-id job-id (task-id task) (status)))
     (cond
       [(can-accept?)
        (status RUNNING)
        (define (on-complete results)
          (status (finished results)))
        (perform-task task job-id on-complete status)]
       [else
        (status OVERLOAD)]))))
 ;; Task
 ;; ID
 ;; ID
 ;; (-> Void)
 ;; (TaskResults -> Void)
 ;; -> TaskAssigner
 (define (task-assigner tsk job-id performer
                       on-overload!
                       on-complete!)
  (react
   (assert (task-assignment performer job-id tsk))
   (on (asserted (task-state performer job-id (task-id tsk) $status))
       (match status
         [(or (== ACCEPTED)
              (== RUNNING))
          (void)]
         [(== OVERLOAD)
          (stop-current-facet (on-overload!))]
         [(finished results)
          (stop-current-facet (on-complete! results))]))))
 ;; ---------------------------------------------------------------------------------------------------
 ;; TaskRunner
@ -138,25 +183,24 @@ The JobManager then performs the job and, when finished, asserts (job-finished I
   (assert (task-runner id (status)))
   (begin/dataflow
     (log "task-runner ~v state is: ~a" id (status)))
-   (during (task-assignment id $job-id (task $tid $desc))
+   ;; Task (TaskStateDesc -> Void) -> Void
-     (field [execution-state (if (idle?) RUNNING OVERLOAD)]
+   (define (perform-task tsk job-id on-complete! update-status!)
-            [word-count (hash)])
+     (unless (idle?)
-     (assert (task-state id job-id tid (execution-state)))
+       (error "tried to perform a task when not idle"))
-     ;; we have to avoid asking a non-idle runner to do anything
+     ;; since we currently finish everything in one turn, these changes to status aren't
-     (when (idle?)
+     ;; actually visible.
-       (on-stop (status IDLE))
+     (status RUNNING)
-       (on-start
+     (match-define (task tid desc) tsk)
-        (status (executing tid))
+     (match desc
-        ;; since we currently finish everything in one turn, allow other actors to observe the changes in our
+       [(map-work data)
-        ;; task-runner state by flushing pending actions.
+        (define wc (count-new-words (hash) (string->words data)))
-        (flush!)
+        (on-complete! wc)]
-        (match desc
+       [(reduce-work left right)
-          [(map-work data)
+        (define wc (hash-union left right #:combine +))
-           (word-count (count-new-words (word-count) (string->words data)))
+        (on-complete! wc)])
-           (execution-state (finished (word-count)))]
+     (status IDLE))
-          [(reduce-work left right)
+   (on-start
-           (word-count (hash-union left right #:combine +))
+    (task-performer id idle? perform-task))))
           (execution-state (finished (word-count)))]))))))
 ;; (Hash String Nat) String -> (Hash String Nat)
 (define (word-count-increment h word)
@ -206,40 +250,25 @@ The JobManager then performs the job and, when finished, asserts (job-finished I
       #:on-remove (log "TM learns that task-runner ~a is NOT IDLE" id))
     (assert (task-manager id (set-count (idle-runners))))
     (field [busy-runners (list)])
-     (during (task-assignment id $job-id $t)
+     (define (can-accept?)
-       (match-define (task task-id desc) t)
+       (not (set-empty? (idle-runners))))
-       #;(on-start (log "TM receives task ~a" task-id))
+     (define (perform-task tsk job-id on-complete! update-status!)
-       (log "TM receives task ~a" task-id)
+       (match-define (task task-id desc) tsk)
-       (on-stop (log "TM finished with task ~a" task-id))
+       (define runner (set-first (idle-runners)))
-       (field [status ACCEPTED])
+       ;; n.b. modifying a query set is questionable
-       ;; TODO - could delegate this assertion, in the non-overloaded case, to TaskRunner
+       ;; but if we wait for the IDLE assertion to be retracted, we might assign multiple tasks to the same runner.
-       ;; (also removing the first id from task-state)
+       ;; Could use the busy-runners field to avoid that
-       (assert (task-state id job-id task-id (status)))
+       (idle-runners (set-remove (idle-runners) runner))
-       (cond
+       (log "TM assigns task ~a to runner ~a" task-id runner)
-         [(set-empty? (idle-runners))
+       ;; TODO - since we're both adding and removing from this set I'm not sure TRs
-          (log "TM can't run ~a right now" task-id)
+       ;; need to be making assertions about their idleness
-          (status OVERLOAD)]
+       (on-stop (idle-runners (set-add (idle-runners) runner)))
-         [else
+       (on-start
-          (define runner (set-first (idle-runners)))
+        (task-assigner tsk job-id runner
-          ;; n.b. modifying a query set is questionable
+                      (lambda () (update-status! OVERLOAD))
-          ;; but if we wait for the IDLE assertion to be retracted, we might assign multiple tasks to the same runner.
+                      (lambda (results) (on-complete! results)))))
-          ;; Could use the busy-runners field to avoid that
+     (on-start
-          (idle-runners (set-remove (idle-runners) runner))
+      (task-performer id can-accept? perform-task)))))
          (log "TM assigns task ~a to runner ~a" task-id runner)
          (assert (task-assignment runner job-id t))
          (status RUNNING)
          (on (asserted (task-state runner job-id task-id $state))
              (match state
                [(or (== ACCEPTED)
                     (== RUNNING))
                 ;; nothing to do
                 (void)]
                [(== OVERLOAD)
                 (log "TM overloaded TR with task ~a" task-id)
                 (status OVERLOAD)]
                [(finished results)
                 (log "TM receives the results of task ~a" task-id)
                 (status state)]))])))))
 ;; ---------------------------------------------------------------------------------------------------
 ;; JobManager
@ -297,43 +326,49 @@ The JobManager then performs the job and, when finished, asserts (job-finished I
      ;; Requires (task-ready? t)
      (define (perform-task t k)
        (react
         (define task-facet (current-facet-id))
         (on-start (tasks-in-progress (add1 (tasks-in-progress))))
         (on-stop (tasks-in-progress (sub1 (tasks-in-progress))))
         (match-define (task this-id desc) t)
         (log "JM begins on task ~a" this-id)
-         (field [task-mngr #f])
+         (define (select-a-task-manager)
-         (begin/dataflow
+           (react
-           ;; n.b. cyclic data-flow dependency
+            (begin/dataflow
-           (unless (task-mngr)
+              (define mngr
-             (define mngr
+                (for/first ([(id slots) (in-hash (task-managers))]
-               (for/first ([(id slots) (in-hash (task-managers))]
+                            #:when (positive? (- slots (hash-ref (requests-in-flight) id 0))))
-                           #:when (positive? (- slots (hash-ref (requests-in-flight) id 0))))
+                  id))
-                 id))
+              (when mngr
-             (when mngr
+                (take-slot! mngr)
-               (take-slot! mngr)
+                (stop-current-facet (assign-task mngr))))))
-               (react (stop-when (asserted (task-state mngr job-id this-id _))
+
-                                 (received-answer! mngr)))
+         ;; ID -> ...
-               (task-mngr mngr))))
+         (define (assign-task mngr)
-         ;; TODO - should respond if task manager dies
+           (react
-         (assert #:when (task-mngr)
+            (define this-facet (current-facet-id))
-                 (task-assignment (task-mngr) job-id t))
+            (on (retracted (task-manager mngr _))
-         (on #:when (task-mngr)
+                ;; our task manager has crashed
-             (asserted (task-state (task-mngr) job-id this-id $state))
+                (stop-current-facet (select-a-task-manager)))
-             (match state
+            (on-start
-               [(or (== ACCEPTED)
+             ;; N.B. when this line was here, and not after `(when mngr ...)` above,
-                    (== RUNNING))
+             ;; things didn't work. I think that due to script scheduling, all ready
-                ;; nothing to do
+             ;; tasks were being assigned to the manager
-                (void)]
+             #;(take-slot! mngr)
-               [(== OVERLOAD)
+             (react (stop-when (asserted (task-state mngr job-id this-id _))
-                ;; need to find a new task manager
+                               (received-answer! mngr)))
-                ;; don't think we need a release-slot! here, because if we've heard back from a task manager,
+             (task-assigner t job-id mngr
-                ;; they should have told us a different slot count since we tried to give them work
+                            (lambda ()
-                (log "JM overloaded manager ~a with task ~a" (task-mngr) this-id)
+                              ;; need to find a new task manager
-                (task-mngr #f)]
+                              ;; don't think we need a release-slot! here, because if we've heard back from a task manager,
-               [(finished results)
+                              ;; they should have told us a different slot count since we tried to give them work
-                (log "JM receives the results of task ~a" this-id)
+                              (log "JM overloaded manager ~a with task ~a" mngr this-id)
-                (stop-current-facet (k this-id results))]))))
+                              (stop-facet this-facet (select-a-task-manager)))
                            (lambda (results)
                              (log "JM receives the results of task ~a" this-id)
                              (stop-facet task-facet (k this-id results)))))))
         (on-start (select-a-task-manager))))
      ;; ID Data -> Void
      ;; Update any dependent tasks with the results of the given task, moving