; Parallelism primitives designed for ACL2 functions evaluating in the ; raw LISP environment. Modified to run in LISP without ACL2. ; Copyright (C) 2008 David L. Rager ; This program is free software; you can redistribute it and/or modify ; it under the terms of the GNU General Public License as published by ; the Free Software Foundation; either version 2 of the License, or ; (at your option) any later version. ; This program is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ; GNU General Public License for more details. ; You should have received a copy of the GNU General Public License ; along with this program; if not, write to the Free Software ; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ; Written by: David L. Rager ; Matt Kaufmann has spent many days/weeks improving this library, has ; my gratitude, and deserves credit for his efforts. A more thorough ; explanation of those who have contributed to this library can be ; found in Rager's master's thesis. ; This library has many LISP-only problems. The library was designed for use ; within ACL2, and as such, some user "bugs" occur when used in raw LISP. ; These issues are thought to be quite safe when this library is used in ACL2 ; and have therefore been unaddressed thus far. This file is created more as a ; curiousity and not actively maintained, but if enough people find it useful, ; Rager's further attention might be diverted to it. ; This file is divided into the following sections. ; Section: Enabling and Disabling Interrupts ; Section: Threading Interface ; Section: ACL2 Utility Definitions ; Section: Parallelism Basis ; Section: Work Consumer Code ; Section: Work Producer Code ; Section: Parallelism Primitives ; In particular, see the Essay on Parallelism Definitions and the Essay on ; Parallelism Strategy for overviews on this implementation of parallel ; evaluation. #+(or (and sbcl sb-thread) openmcl) (push :acl2-par *features*) (make-package "PAR" :use '(cl)) (in-package "PAR") ;--------------------------------------------------------------------- ; Section: Enabling and Disabling Interrupts ; "Without-interrupts" means that there will be no interrupt from the Lisp ; system, including ctrl+c from the user or an interrupt from another ; thread/process. For example, if *thread1* is running (progn ; (without-interrupts (process0)) (process1)), then execution of ; (interrupt-thread *thread1* (lambda () (break))) will not interrupt ; (process0). ; But note that "without-interrupts" does not guarantee atomicity; for example, ; it does not mean "without-setq". (defmacro without-interrupts (&rest forms) ; This macro prevents interrupting evaluation of any of the indicated forms in ; a parallel lisp. In a non-parallel environment (#-acl2-par), we simply ; evaluate the forms. This behavior takes priority over any enclosing call of ; with-interrupts. See also with-interrupts. #+(and openmcl acl2-par) `(ccl:without-interrupts ,@forms) #+(and sbcl sb-thread acl2-par) `(sb-sys:without-interrupts ,@forms) #-acl2-par `(progn ,@forms)) (defmacro with-interrupts (&rest forms) ; This macro allows interrupting evaluation of any of the indicated forms in ; a parallel lisp. In a non-parallel environment (#-acl2-par), we simply ; evaluate the forms. This behavior takes priority over any enclosing call of ; without-interrupts. See also without-interrupts. #+(and openmcl acl2-par) `(ccl:with-interrupts-enabled ,@forms) #+(and sbcl sb-thread acl2-par) `(sb-sys:with-interrupts ,@forms) #-acl2-par `(progn ,@forms)) (defmacro unwind-protect-disable-interrupts-during-cleanup (body-form &rest cleanup-forms) ; As the name suggests, this is unwind-protect but with a guarantee that ; cleanup-form cannot be interrupted. Note that OpenMCL's implementation ; already disables interrupts during cleanup (1.1pre and later). #+(and openmcl acl2-par) `(unwind-protect ,body-form ,@cleanup-forms) #+(and sbcl sb-thread acl2-par) `(unwind-protect ,body-form (without-interrupts ,@cleanup-forms)) #-acl2-par `(unwind-protect ,body-form ,@cleanup-forms)) ;--------------------------------------------------------------------- ; Section: Threading Interface ; ; The threading interface is intended for system level programmers. It is not ; intended for the ACL2 user. When writing system-level multi-threaded code, ; we use implementation-independent interfaces. If you need a function not ; covered in this interface, create the interface! ; Many of the functions in this interface (lockp, make-lock, and so on) are not ; used elsewhere, but are included here in case we find a use for them later. ; We take a conservative approach for implementations that do not support ; parallelism. For example, if the programmer asks for a semaphore or lock in ; an unsupported Lisp, then nil is returned. ; We employ counting semaphores. For details, including a discussion of ; ordering, see comments in the definition of function make-semaphore. ; Note: We use parts of the threading interface for our implementation of the ; parallelism primitives. (defun lockp (x) #+(and openmcl acl2-par) (cl:typep x 'ccl::recursive-lock) #+(and sbcl sb-thread acl2-par) (cl:typep x 'sb-thread::mutex) #-acl2-par ; We return nil in the uni-threaded case in order to stay in sync with ; make-lock, which returns nil in this case. In a sense, we want (lockp ; (make-lock x)) to be a theorem if there is no error. (null x)) (defun make-lock (&optional lock-name) ; See also deflock. ; Even though OpenMCL nearly always uses a FIFO for threads blocking on a lock, ; it does not guarantee so: no such promise is made by the OpenMCL ; documentation or implementor (in fact, we are aware of a race condition that ; would violate FIFO properties for locks). Thus, we make absolutely no ; guarantees about ordering; for example, we do not guarantee that the ; longest-blocked thread for a given lock is the one that would enter a ; lock-guarded section first. However, we suspect that this is usually the ; case for most implementations, so assuming such an ordering property is ; probably a reasonable heuristic. We would be somewhat surprised to find ; significant performance problems in our own application to ACL2's parallelism ; primitives due to the ordering provided by the underlying system. #-acl2-par (declare (ignore lock-name)) #+(and openmcl acl2-par) (ccl:make-lock lock-name) #+(and sbcl sb-thread acl2-par) (sb-thread:make-mutex :name lock-name) #-acl2-par ; We return nil in the uni-threaded case in order to stay in sync with lockp. nil) (defmacro deflock (lock-symbol) ; Deflock defines what some Lisps call a "recursive lock", namely a lock that ; can be grabbed more than once by the same thread, but such that if a thread ; outside the owner tries to grab it, that thread will block. ; Note that if lock-symbol is already bound, then deflock will not re-bind ; lock-symbol. `(defvar ,lock-symbol (make-lock (symbol-name ',lock-symbol)))) (defmacro reset-lock (bound-symbol) ; This macro binds the given global (but not necessarily special) variable to a ; lock that is new, at least from a programmer's perspective. ; Reset-lock should only be applied to bound-symbol if deflock has previously ; been applied to bound-symbol. `(setq ,bound-symbol (make-lock ,(symbol-name bound-symbol)))) (defmacro with-lock (lock-name &rest forms) ; Grab a lock, blocking until it is acquired; evaluate forms; and then release ; the lock. This macro guarantees mutual exclusion. #-acl2-par (declare (ignore lock-name)) (let ((forms ; We ensure that forms is not empty because otherwise, in OpenMCL alone, ; (with-lock some-lock) evaluates to t. We keep the code simple and consistent ; by modifying forms here for all cases, not just OpenMCL. (or forms '(nil)))) #+(and openmcl acl2-par) `(ccl:with-lock-grabbed (,lock-name) nil ,@forms) #+(and sbcl sb-thread acl2-par) `(sb-thread:with-recursive-lock (,lock-name) nil ,@forms) #-acl2-par `(progn ,@forms))) ; A future possibility, also mentioned in parallel.lisp: ; Recycle locks, perhaps for example in wait-on-condition-variable-lockless. ; Here is code, not heavily tested, that may be useful in that regard. #|| (progn ; We declare some variables here that are necessary for the threading ; interface. These support semaphore recycling via functions allocate-lock and ; free-lock, which are useful for efficient implementation of ; wait-on-condition-variable-lockless. (defvar *lock-allocation-lock* (make-lock)) (defvar *lock-freelist* #+openmcl (ccl::%cons-pool) #-openmcl nil) ) (defun allocate-lock () (with-lock *lock-allocation-lock* (or (pop #+openmcl (ccl::pool.data *lock-freelist*) #-openmcl *lock-freelist*) (make-lock)))) (defun free-lock (lock) ; Warning: This function requires that lock is unacquired. ; We considered creating a user-settable limit on the length of the ; *lock-freelist*. In favor of simplicity, we have not implemented this, but ; if OS lock resources become an issue, this is worth further consideration. ; We test that the lock is not currently acquired. Note that we do not test ; that the lock isn't already on the *lock-freelist*. It's questionable ; whether this check is worth it, since the user can get into trouble in many ; other ways. (without-interrupts #+(and openmcl acl2-par) (when (not (ccl:try-lock lock)) (error "A lock was freed while still being held.")) #+(and sbcl sb-thread acl2-par) (if (sb-thread:get-mutex lock nil nil) (sb-thread:release-mutex lock) (error "A lock was freed while still being held."))) (with-lock *lock-allocation-lock* (push lock #+openmcl (ccl::pool.data *lock-freelist*) #-openmcl *lock-freelist*))) (defun reset-lock-free-list () ; We provide the user the ability to clear the locks stored for recycling. If ; an ACL2 programmer wants to free OS resources for garbage collection, they ; can use this method to free the locks stored in our system. (with-lock *lock-allocation-lock* (setf *lock-freelist* #+openmcl (ccl::%cons-pool) #-openmcl nil))) ||# (defun run-thread (name fn-symbol &rest args) ; Apply fn-symbol to args. We follow the precedent set by LISP machines (and ; in turn OpenMCL), which allowed the user to spawn a thread whose initial ; function receives an arbitrary number of arguments. ; We expect this application to occur in a fresh thread with the given name. ; When a call of this function returns, we imagine that this fresh thread can ; be garbage collected; at any rate, we don't hang on to it! ; Note that run-thread returns different types in different Lisps. #-acl2-par (declare (ignore name)) #+(and openmcl acl2-par) (ccl:process-run-function name (lambda () (apply fn-symbol args))) #+(and sbcl sb-thread acl2-par) (sb-thread:make-thread (lambda () (apply fn-symbol args)) :name name) ; We're going to be nice and let the user's function still run, even though ; it's not split off. #-acl2-par (apply fn-symbol args)) (defun interrupt-thread (thread function &rest args) ; Interrupt the indicated thread and then, in that thread, apply function to ; args. Note that function and args are all evaluated. When this function ; application returns, the thread resumes from the interrupt (from where it ; left off). #-acl2-par (declare (ignore thread function args)) #+(and openmcl acl2-par) (apply #'ccl:process-interrupt thread function args) #+(and sbcl sb-thread acl2-par) (if args (error "Passing arguments to interrupt-thread not supported in SBCL.") (sb-thread:interrupt-thread thread function)) #-acl2-par nil) (defun kill-thread (thread) #-acl2-par (declare (ignore thread)) #+(and openmcl acl2-par) (ccl:process-kill thread) #+(and sbcl sb-thread acl2-par) (sb-ext:process-kill thread) #-acl2-par nil) (defun all-threads () #+(and openmcl acl2-par) (ccl:all-processes) #+(and sbcl sb-thread acl2-par) (sb-thread:list-all-threads) #-acl2-par (error "We don't know how to list threads in this Lisp (or acl2-par is not ~ on the features list.")) (defun current-thread () #+(and openmcl acl2-par) ccl:*current-process* #+(and sbcl sb-thread acl2-par) sb-thread:*current-thread* #-acl2-par nil) (defun thread-wait (fn &rest args) ; Thread-wait provides an inefficient mechanism for the current thread to wait ; until a given condition, defined by the application of fn to args, is true. ; When performance matters, we advise using a signaling mechanism over this ; hacker's function. #+openmcl (apply #'ccl:process-wait "Busy-waiting on condition for thread" fn args) #-openmcl (loop while (not (apply fn args)) do (sleep 0.05))) #+(and sbcl sb-thread acl2-par (not acl2-loop-only)) (defstruct sbcl-semaphore (lock (sb-thread:make-mutex)) (cv (sb-thread:make-waitqueue)) ; condition variable (count 0)) (defun make-semaphore (&optional name) ; Make-semaphore, signal-semaphore, and semaphorep work together to implement ; counting semaphores for the threading interface. ; This function creates "counting semaphores", which are data structures that ; include a "count" field, which is a natural number. A thread can "wait on" a ; counting semaphore, and it will block in the case that the semaphore's count ; is 0. To "signal" such a semaphore means to increment that field and to ; notify a unique waiting thread (we will discuss a relaxation of this ; uniqueness shortly) that the semaphore's count has been incremented. Then ; this thread, which is said to "receive" the signal, decrements the ; semaphore's count and is then unblocked. This mechanism is typically much ; faster than busy waiting. ; In principle more than one waiting thread could be notified (though this ; seems rare in practice). In this case, only one would be the receiving ; thread, i.e., the one that decrements the semaphore's count and is then ; unblocked. ; If semaphore usage seems to perform inefficiently, could this be due to ; ordering issues? For example, even though OpenMCL nearly always uses a FIFO ; for blocked threads, it does not guarantee so: no such promise is made by the ; OpenMCL documentation or implementor. Thus, we make absolutely no guarantees ; about ordering; for example, we do not guarantee that the longest-blocked ; thread for a given semaphore is the one that would receive a signal. ; However, we suspect that this will usually be the case for most ; implementations, so assuming such an ordering property is probably a ; reasonable heuristic. We would be somewhat surprised to find significant ; performance problems in our own application to ACL2's parallelism primitives ; due to the ordering provided by the underlying system. ; OpenMCL provides us with semaphores for signaling. SBCL provides ; condition variables for signaling. Since we want to code for one ; type of signaling between parents and children, we create a ; semaphore wrapper for SBCL's condition variables. The structure ; sbcl-semaphore implements the data for this wrapper. ; Followup: SBCL has recently implemented sempahores, and the ; parallelism code should maybe be changed to reflect this. It ; probably depends on whether their implementation provides ; previously discussed semaphore-nofication-object's. (declare (ignore name)) #+(and openmcl acl2-par) (ccl:make-semaphore) #+(and sbcl sb-thread acl2-par) (make-sbcl-semaphore) #-acl2-par ; We return nil in the uni-threaded case in order to stay in sync with ; semaphorep. nil) (defun semaphorep (semaphore) ; Make-semaphore, signal-semaphore, and semaphorep work together to implement ; counting semaphores for our threading interface. ; This function recognizes our notion of semaphore structures. #+(and openmcl acl2-par) (typep semaphore 'ccl::semaphore) #+(and sbcl sb-thread acl2-par) (and (sbcl-semaphore-p semaphore) (typep (sbcl-semaphore-lock semaphore) 'sb-thread::mutex) (typep (sbcl-semaphore-cv semaphore) 'sb-thread::waitqueue) (integerp (sbcl-semaphore-count semaphore))) #-acl2-par ; We return nil in the uni-threaded case in order to stay in sync with ; make-semaphore, which returns nil in this case. In a sense, we want ; (semaphorep (make-semaphore x)) to be a theorem if there is no error. (null semaphore)) (defun signal-semaphore (semaphore) ; Make-semaphore, signal-semaphore, and semaphorep work together to implement ; counting semaphores for our threading interface. ; This function is executed for side effect; the value returned is irrelevant. #-acl2-par (declare (ignore semaphore)) #+(and openmcl acl2-par) (ccl:signal-semaphore semaphore) #+(and sbcl sb-thread acl2-par) (sb-thread:with-recursive-lock ((sbcl-semaphore-lock semaphore)) (without-interrupts (incf (sbcl-semaphore-count semaphore)) (sb-thread:condition-notify (sbcl-semaphore-cv semaphore)))) #-acl2-par nil) (progn ; We declare some variables here that are necessary for the threading ; interface. These support semaphore recycling via functions ; allocate-semaphore and free-semaphore, which is useful for efficiency ; (especially when there are thousands of sempaphores) and for avoiding errors ; in early versions (at least) of OpenMCL. (defvar *semaphore-allocation-lock* (make-lock)) (defvar *semaphore-freelist* #+openmcl (ccl::%cons-pool) #-openmcl nil) ) (defun allocate-semaphore () (without-interrupts (with-lock *semaphore-allocation-lock* (or (pop #+openmcl (ccl::pool.data *semaphore-freelist*) #-openmcl *semaphore-freelist*) (make-semaphore))))) (defun free-semaphore (s) ; Warning: This function assumes that s is properly initialized. In ; particular, it must have a count field of 0, and for SBCL, the lock field ; must be free (which is guaranteed if we only use the threading interface). ; We test that the semaphore is properly initialized. Note that we do not test ; that the semaphore isn't already on the *semaphore-freelist*. It's ; questionable whether this check is worth it, since the user can get into ; trouble in many other ways. ; We considered creating a user-settable limit on the length of the ; *semaphore-freelist*. In favor of simplicity, we have not ; implemented this, but if OS semaphore resources again become an ; issue, this is worth further consideration. If this limit is ; implemented, once reached, probably all semaphores should be ; discarded and a full gc called. This full gc is required to truly ; free the OS-level semaphores underneath the LISP implementation. (without-interrupts #+openmcl (loop while (timed-wait-on-semaphore s 0)) #+(and sbcl sb-thread) (let ((lock (sbcl-semaphore-lock s))) ; Note that we must test the count before the mutex, so that we don't acquire ; the mutex and then not release it. (if (or (not (equal (sbcl-semaphore-count s) 0)) (not (sb-thread:get-mutex lock nil nil))) (error "It is a design violation to free a semaphore while its lock ~ is acquired.") (sb-thread:release-mutex lock)))) (with-lock *semaphore-allocation-lock* (push s #+openmcl (ccl::pool.data *semaphore-freelist*) #-openmcl *semaphore-freelist*))) (defun reset-semaphore-free-list () ; We provide the user the ability to clear the semaphores stored for recycling. ; If an ACL2 programmer wants to free OS resources for garbage collection, they ; can use this method to free the semaphores stored in our system. (with-lock *semaphore-allocation-lock* (setf *semaphore-freelist* #+openmcl (ccl::%cons-pool) #-openmcl nil))) (defun wait-on-semaphore (semaphore &optional semaphore-notification-object) ; This function always returns t. It only returns normally after receiving a ; signal for the given semaphore, setting the notification status of ; semaphore-notification-object (if supplied and not nil) to true; see ; semaphore-notification-status. But control can leave this function ; abnormally, for example if the thread executing a call of this function is ; interrupted (e.g., with interface function interrupt-thread) with code that ; does a throw, in which case semaphore-notification-object is unmodified. ; We need the ability to know whether we received a signal or not. OpenMCL ; provides this through a semaphore-notification-object. As it turns out, SBCL ; does not provide this mechanism currently, so we modify our wrapper to ; "unreceive the signal" in the semaphore. We do this by not decrementing the ; count of it unless we also modify the semaphore-notification object. This ; means we have to resignal the semaphore if we were interrupted while ; signaling, but we would have to do this anyway. #-acl2-par (declare (ignore semaphore semaphore-notification-object)) #+(and openmcl acl2-par) (ccl:wait-on-semaphore semaphore semaphore-notification-object) #+(and sbcl sb-thread acl2-par) (let ((supposedly-did-not-receive-signal-p t)) (sb-thread:with-recursive-lock ((sbcl-semaphore-lock semaphore)) (unwind-protect-disable-interrupts-during-cleanup (progn (loop while (<= (sbcl-semaphore-count semaphore) 0) do ; The current thread missed the chance to decrement and must rewait. (sb-thread:condition-wait (sbcl-semaphore-cv semaphore) (sbcl-semaphore-lock semaphore))) (setq supposedly-did-not-receive-signal-p nil)) (if supposedly-did-not-receive-signal-p ; The current thread may have received the signal but been unable to record it. ; In this case, the current thread will signal the condition variable again, so ; that any other thread waiting on the semaphore can have a chance at acquiring ; the said semaphore. (sb-thread:condition-notify (sbcl-semaphore-cv semaphore) (sbcl-semaphore-lock semaphore)) ; The current thread was able to record the reception of the signal. The ; current thread will decrement the count of the semaphore and set the ; semaphore-notification-object. (progn (decf (sbcl-semaphore-count semaphore)) (when semaphore-notification-object (set-semaphore-notification-status semaphore-notification-object))))))) #-acl2-par t) ; default is to receive a semaphore/lock (defun timed-wait-on-semaphore (semaphore length-in-seconds) ; It would be possible to manually implement a timed-wait-on-semaphore in SBCL, ; but it requires an extra thread every time a current thread waits. This ; extra thread could be given a pointer to the current thread, sleep for 60 ; seconds, and throw the thread if the current thread hadn't set a variable in ; a shared array. This variable would be set whenever the current thread had ; successfully received the signal to wake up. The problem with this idea is ; that it requires double the number of threads. As a result, we leave the ; spawned worker threads to forever exist in SBCL, with the hope that SBCL ; will one day implement a timed-condition-wait. ; We do not simply reduce timed-wait-on-semaphore to wait-on-semaphore outside ; of OpenMCL, because the timeout version has different behavior than the ; non-timeout version. We want the hard run-time error here. #+openmcl (ccl:timed-wait-on-semaphore semaphore length-in-seconds) #-openmcl (error "Timed waiting not supported outside openmcl" nil)) (defun make-semaphore-notification () ; This function returns an object that records when a corresponding semaphore ; has been signaled (for use when wait-on-semaphore is called with that ; semaphore and that object). #+(and openmcl acl2-par) (ccl:make-semaphore-notification) #+(and sbcl sb-thread acl2-par) (make-array 1 :initial-element nil) #-acl2-par nil) (defun semaphore-notification-status (semaphore-notification-object) (declare (ignorable semaphore-notification-object)) #+(and openmcl acl2-par) (ccl:semaphore-notification-status semaphore-notification-object) #+(and sbcl sb-thread acl2-par) (aref semaphore-notification-object 0) #-acl2-par ; t may be the wrong default, but we don't have a use case for this return ; value yet, so we postpone thinking about the "right" value until we are aware ; of a need. t) (defun clear-semaphore-notification-status (semaphore-notification-object) (declare (ignorable semaphore-notification-object)) #+(and openmcl acl2-par) (ccl:clear-semaphore-notification-status semaphore-notification-object) #+(and sbcl sb-thread acl2-par) (setf (aref semaphore-notification-object 0) nil) #-acl2-par nil) ; We implement this only for SBCL, because even a system-level programmer is ; not expected to use this function. We use it only from within the threading ; interface to implement wait-on-semaphore for SBCL. (defun set-semaphore-notification-status (semaphore-notification-object) (declare (ignorable semaphore-notification-object)) #+(and sbcl sb-thread) (setf (aref semaphore-notification-object 0) t) #-(and sbcl sb-thread) (error "Set-semaphore-notification-status not supported outside SBCL" nil)) ; Essay on Condition Variables ; A condition variable is a data structure that can be passed to corresponding ; "wait" and "signal" functions. When a thread calls the wait function on a ; condition variable, c, the thread blocks until "receiving a signal" from the ; application of the signal function to c. Only one signal is sent per call of ; the signal function; so, at most one thread will unblock. (There is a third ; notion for condition variable, namely the broadcast function, which is like ; the signal function except that all threads blocking on the given condition ; variable will unblock. But we do not support broadcast functions in this ; interface, in part because we use semaphores for OpenMCL, and there's no way ; to broadcast when you're really using a semaphore.) ; The design of our parallelism library is simpler when using condition ; variables for the following reason: Since a worker must wait for two ; conditions before consuming work, it is better to use a condition variable ; and test those two conditions upon waking, rather than try and use two ; semaphores. ; Implementation Note: As of March 2007, our OpenMCL implementation does not ; yield true condition variables. A condition variable degrades to a ; semaphore, so if one thread first signals a condition variable, then that ; signal has been stored. Then later (perhaps much later), when another thread ; waits for that signal, that thread will be able to proceed by decrementing ; the count. As a result the later thread will "receive" the signal, even ; though that signal occurred in the past. Fortunately, this isn't a ; contradiction of the semantics of condition variables, since with condition ; variables there is no specification of how far into the future the waiting ; thread will receive a signal from the signalling thread. ; Note: Condition variables should not be used to store state. They are only a ; signaling mechanism, and any state update implied by receiving a condition ; variable's signal should be checked. This usage is believed to be consistent ; with traditional condition variable semantics. (defun make-condition-variable () ; If OpenMCL implements condition variables, we will want to change the OpenMCL ; expansion and remove the implementation note above. #+(and openmcl acl2-par) (ccl:make-semaphore) #+(and sbcl sb-thread acl2-par) (sb-thread:make-waitqueue) #-acl2-par ; We may wish to have assertions that evaluation of (make-condition-variable) ; is non-nil. So we return t, even though as of this writing there are no such ; assertions. t) (defmacro signal-condition-variable (cv) #-acl2-par (declare (ignore cv)) #+(and openmcl acl2-par) `(ccl:signal-semaphore ,cv) #+(and sbcl sb-thread acl2-par) ; According to an email sent by Gabor Melis, of SBCL help, on 2007-02-25, if ; there are two threads waiting on a condition variable, and a third thread ; signals the condition variable twice before either can receive the signal, ; then both threads should receive the signal. If only one thread unblocks, it ; is considered a bug. `(sb-thread:condition-notify ,cv) #-acl2-par t) (defun wait-on-condition-variable-lockless (cv &optional s) ; Wait-on-condition-variable-lockless takes a required condition variable and ; an optional amount of timeout value. Since timeout-bound waiting is ; unsupported in SBCL, an error occurs when the user tries to include a timeout ; value. ; Here, s is the number of seconds allowed to elapse before unblocking occurs ; (in essence a timeout). This function returns t if we acquire the semaphore ; (i.e., we don't time out), and otherwise returns nil. #-acl2-par (declare (ignore cv s)) #+(and openmcl acl2-par) (if s (ccl:timed-wait-on-semaphore cv s) (ccl:wait-on-semaphore cv)) #+(and sbcl sb-thread acl2-par) (if s (error "Timed waiting on condition variables unsupported in SBCL") ; Since we do not want lock acquisition to be a bottleneck, we create a new ; lock each time we wait. It's too bad that SBCL doesn't provide a lock-free ; signaling mechanism. But since it doesn't, we do the inefficient thing of ; creating a fresh lock each time that becomes garbage when we return. (let ((lock (make-lock))) (with-lock lock (sb-thread:condition-wait cv lock)) t)) #-acl2-par nil) ; the default is to never receive a signal ; End of threading interface ;--------------------------------------------------------------------- ; Section: ACL2 Utility Definitions ; These definitions are taken straight from ACL2. Think of them as ; short-hand for their raw-lisp equivalents. (defmacro mv (&rest l) (return-from mv (cons 'values l))) (defmacro mv-let (&rest rst) (return-from mv-let (cons 'multiple-value-bind rst))) (defun true-listp (x) (if (consp x) (true-listp (cdr x)) (eq x nil))) (defun caar-is-declarep (x) ; Recognizer for expressions x for which (car x) is of the form (declare ...). (and (consp x) (consp (car x)) (eq (caar x) 'declare))) (defun declare-granularity-p (x) ; We return true when x is of the form (declare (granularity )). (and (princ "0") (true-listp x) (eql (length x) 2) (eq (car x) 'declare) (let ((gran-form (cadr x))) (and (true-listp gran-form) (eql (length gran-form) 2) (equal (car gran-form) 'granularity))))) (defun check-and-parse-for-granularity-form (x) ; X is a list of forms that may begin with a granularity declaration such as ; (declare (granularity (< depth 5))). The return signature is (erp msg ; granularity-form-exists granularity-form remainder-forms). If there is no ; declaration then we return (mv nil nil nil nil x). If there is error then we ; return (mv t an-error-message nil nil x). Otherwise we return (mv nil nil t ; granularity-form (cdr x)). ; It is necessary to return whether the granularity form exists. If we did not ; do so, there would be no mechanism for distinguishing between a non-existent ; granularity form and one that was nil. ; A granularity form declaration is the only acceptable form of declaration. ; Some examples of unaccepted declarations are type and ignore declarations. ; We use this function in both the raw and acl2-loop definitions of plet to ; macroexpand away our granularity form, as part of our effort to ensure that ; pargs is logically the identity function. (cond ((not (caar-is-declarep x)) (mv nil nil nil nil x)) ((declare-granularity-p (car x)) (let* ((granularity-declaration (cadar x)) (granularity-form (cadr granularity-declaration))) (mv nil nil t granularity-form (cdr x)))) (t (mv t "Within a parallelism primitive, a granularity form declaration ~ is the only acceptable form of declaration. Some examples of ~ unaccepted declarations are type and ignore declarations. See ~ :DOC granularity." nil nil x)))) ; End of ACL2 Utility Definitions ;--------------------------------------------------------------------- ; Section: Parallelism Basis ; In this section we outline definitions and strategies for parallel evaluation ; and define constants, structures, variables, and other basic parallelism ; infrastructure. ; Essay on Parallelism Definitions ; Core ; ; A core is a unit inside a computer that can do useful work. It has its own ; instruction pointers and usually accesses shared memory. In the old days, we ; had "dual processors." This is an example of a two core system. A ; 2006-vintage example of a four core system is "dual sockets" with "dual core ; technology." ; Process ; ; We generally use the term "process" as a verb, meaning: run a set of ; instructions. For example, the system can process a closure. ; Thread ; ; We use the OS definition of a thread as a lightweight process that shares ; memory with other threads in the same process. A thread in our system is in ; one of the following three states. ; ; 1. Idle - The thread is waiting until both a piece of work (see below) and ; a core are available. ; ; 2. Active - The thread has been allocated a core and is processing some ; work. ; ; 3. Pending - This state occurs iff the thread in this state is associated ; with a parent piece of work, and it is waiting for the children of that ; piece of work to complete and for sufficient CPU core resources. A thread ; in this state is often waiting on a signaling mechanism. ; Closure ; ; We use the term "closure" in the Lisp sense: a function that captures values ; of variables from the lexical environment in which it is formed. A closure ; thus contains enough information to be applied to a list of arguments. We ; create closures in the process of saving work to be performed. ; Work ; ; A piece of work contains all the data necessary for a worker thread to ; process one closure, save its result somewhere that a parent can read it, and ; communicate that it is finished. It also contains some data necessary to ; implement features like the early termination of parallel and/or. Comments ; at parallelism-piece give implementation details. ; Roughly, work can be in any of four states: unassigned, starting, pending, or ; resumed. A piece of work will be processed by a single worker thread (not ; including, of course, child work, which will be processed by other worker ; threads). When a core becomes available, a thread can grab an unassigned ; piece of work, at which time the thread and the piece of work leave their ; initial states together. From that point forward until the piece of work is ; complete, the piece of work and its associated worker thread are considered ; to be in corresponding states (active/started,resumed or pending). Initially ; they are in their active/started states. Later, if child work is created, ; then at that time the thread and its associated piece of work both enter the ; pending state. When all child work terminates and either a CPU core becomes ; available or a heuristic allows an exception to that requirement, the piece ; of work enteres the resumed state and its associated worker thread re-enters ; the active state. This heuristic (implemented in ; wait-for-resumptive-parallelism-resources) gives priority to such resumptions ; over starting new pieces of work. ; Parallelism Primitive ; ; A macro that enables the user to introduce parallelism into a computation: ; one of plet, pargs, pand, and por. ; End of Essay on Parallelism Definitions ; Essay on Parallelism Strategy ; Whenever a parallelism primitive is used, the following steps occur. The ; text between the < and > describes the state after the previous step ; finishes. ; 1. If there is a granularity form, the form is evaluated. If the form ; returns nil, the parallelism primitive expands to the serial equivalent; ; otherwise we continue. ; < granularity form has returned true or was omitted - the system was given a ; "large" amount of work > ; 2. If we heuristically determine that the system is already overwhelmed with ; too much work (see parallelism-resources-available for details), then the ; primitive expands to its serial equivalent; otherwise we continue. ; 3. Create closures for each primitive's arguments, as follows. ; - Plet: one closure for each form assigned to a bound variable ; - Pargs: one closure for each argument to the function call ; - Pand/Por: one closure for each argument ; < have closures in memory representing computation to parallelize > ; 4. Create the data structures for pieces of work that worker threads are to ; process. One such data structure (documented in *work-queue* below) is ; created for each computation to be spawned. Among the fields of each ; such structure is a closure that represents that computation. Siblings ; have data structures that share some fields, such as a result-array that ; is to contain the values returned by the sibling computations. ; ; The system then adds these pieces of work to the global *work-queue* for ; worker threads to pop off the queue and process. ; ; Note that Step 2 avoids creating undesirably many pieces of work. ; (Actually the heuristics used in Step 2 don't provide exact guarantees, ; since two computations that reach Step 2 simultaneously might both ; receive the go-ahead even though together, they create work that exceeds ; the heuristic work limit). ; < now have unassigned work in the work-queue > ; 5. After the parent thread adds the work to the queue, it will check to see ; if more worker threads are needed and spawn them if necessary. Note ; however that if there are more threads than cores, then any newly spawned ; thread will wait on a semaphore, and only begins evaluating the work when ; a core becomes available. Each core is assigned to at most one thread at ; any time (but if this decision is revisited, then it should be documented ; here and in the Parallelism Variables section. Note that this decision ; is implemented by setting *idle-core-count* to (1- *core-count*) in ; reset-parallelism-variables). ; Note that by limiting the amount of work in the system at Step 2, we ; avoid creating more threads than the system can handle. ; < now have enough worker threads to process the work > ; 6. The parent thread waits for its children to signal their completion. It ; is crucial for efficiency that this waiting be implemented using a ; signaling mechanism rather than as busy waiting. ; < the parent is waiting for the worker threads to process the work > ; 7. At this point, the child threads begin processing the work on the queue. ; As they are allocated resources, they each pull off a piece of work from ; the work queue and save their results in the associated result-array. ; After a child thread finishes a piece of work, it will check to see if ; its siblings' computations are still necessary. If not, the child will ; remove these computations from the work queue and interrupt each of its ; running sibling threads with a primitive that supplies a function for ; that thread to execute. This function throws to the tag ; :result-no-longer-needed, causing the interrupted sibling to abort ; evaluation of that piece of work, signal the parent (in an ; unwind-protect's cleanup form on the way out to the catch), catch that ; tag, and finally reenter the stalled state (where the controlling loop ; will find it something new to do). We take care to guarantee that this ; mechanism works even if a child receives more than one interrupt. Note ; that when a child is interrupted in this manner, the value stored for the ; child is a don't-care. ; < all of the children are done computing, the required results are in the ; results-array, and the parent has been signaled a number of times equal to ; the number of children > ; 8. The parent thread (from steps 1-6) resumes. It finds the results stored ; in its results array. If the primitive is a: ; - Plet: it executes the body of the plet with the calculated bindings ; - Pargs: it applies the called function to the calculated arguments ; - Pand, Por: it applies a functionalized "and" or "or" to the calculated ; arguments. The result is Booleanized. ; End of Essay on Parallelism Strategy ; Parallelism Constants (progn (defun core-count-raw (&optional (ctx nil) default) ; If ctx is supplied, then we cause an error using the given ctx. Otherwise we ; return a suitable default value (see below). #+openmcl (declare (ignore ctx default)) #+openmcl (ccl:cpu-count) #-openmcl (if ctx (er hard ctx "It is illegal to call cpu-core-count in this Common Lisp ~ implementation.") ; If the host Lisp does not provide a means for obtaining the number of cores, ; then we simply estimate on the high side. A high estimate is desired in ; order to make it unlikely that we have needlessly idle cores. We thus ; believe that 8 cores is a reasonable estimate for early 2007; but we may well ; want to increase this number later. (or default 8))) ; *Core-count* is the total number of cores in the system. (defvar *core-count* (core-count-raw)) (defvar *unassigned-and-active-work-count-limit* ; The *unassigned-and-active-work-count-limit* limits work on the *work-queue* ; to what we think the system will be able to process in a reasonable amount of ; time. Suppose we have 8 CPU cores. This means that there can be 8 active ; work consumers, and that generally not many more than 24 pieces of ; paralellism work are stored in the *work-queue* to be processed. This ; provides us the guarantee that if all worker threads were immediately to ; finish their piece of parallelism work, that each of them would immediately ; be able to grab another piece from the work queue. ; We could increase the following coefficient from 4 and further guarantee that ; consumers have parallelism work to process, but this would come at the ; expense of backlogging the *work-queue". We prefer simply to avoid the ; otherwise parallelized computations in favor of their serial equivalents. (* 4 *core-count*)) (defvar *total-work-limit* ; unassigned, started, resumed AND pending ; The number of pieces of work in the system, *parallelism-work-count*, must be ; less than *total-work-limit* in order to enable creation of new pieces of ; work. (However, we could go from 49 to 69 pieces of work when encountering a ; pand; just not from 50 to 52.) ; Why limit the amount of work in the system? :Doc parallelism-how-to ; (subtopic "Another Granularity Issue Related to Thread Limitations") provides ; an example showing how cdr recursion can rapidly create threads. That ; example shows that if there is no limit on the amount of work we may create, ; then eventually, many successive cdrs starting at the top will correspond to ; waiting threads. If we do not limit the amount of work that can be created, ; this can exhaust the supply of Lisp threads available to process the elements ; of the list. (let ((val ; Warning: It is possible, in principle to create (+ val ; *max-idle-thread-count*) threads. Presumably you'll get a hard Lisp error ; (or seg fault!) if your Lisp cannot create that many threads. 50) (bound (* 2 *core-count*))) (when (< val bound) (error "The variable *total-work-limit* needs to be at least ~s, i.e., ~%~ at least double the *core-count*. Please redefine ~%~ *total-work-limit* so that it is not ~s." bound val)) val)) ; We don't want to spawn more worker threads (which are initially idle) when we ; already have sufficiently many idle worker threads. We use ; *max-idle-thread-count* to limit this spawning in function ; spawn-worker-threads-if-needed. (defvar *max-idle-thread-count* (* 2 *core-count*)) ; *intial-threads* stores a list of threads that are considered to be part of ; the non-threaded part of ACL2. When terminating parallelism threads, only ; those not appearing in this list will be terminated. Warning: If ACL2 uses ; parallelism during the build process, that this variable could incorrectly ; record parallelism threads as initial threads. (defvar *initial-threads* (all-threads)) ) ; end of constant list ; Parallelism Structures ; If the shape of parallelism-piece changes, update the *work-queue* ; documentation in the section "Parallelism Variables." (defstruct parallelism-piece ; piece of work ; A data item in the work queue has the following contents, and we often call ; each a "piece of work." ; thread-array - the array that holds the threads spawned for that closure's ; particular parent ; result-array - the array that holds the results for that closure's particular ; parent, where each value is either nil (no result yet) or a cons whose cdr is ; the result ; array-index - the index into the above two arrays for this particular closure ; semaphore-to-signal-as-last-act - the semaphore to signal right before the ; spawned thread dies ; closure - the closure to process by spawning a thread ; throw-siblings-when-function - the function to funcall on the current ; thread's result to see if its siblings should be terminated. The function ; will also remove work from the work-queue and throw the siblings if ; termination should occur. (thread-array nil) (result-array nil) (array-index -1) (semaphore-to-signal-as-last-act nil) (closure nil) (throw-siblings-when-function nil)) ; Parallelism Variables (progn ; Keep this progn in sync with reset-parallelism-variables, which resets the ; variables defined here. Note that none of the variables are initialized ; here, so reset-parallelism-variables must be called before evaluating ; parallelism primitives (an exception is *throwable-worker-thread* since it is ; first called in reset-parallelism-variables). ; *Idle-thread-count* is updated both when a thread is created and right before ; it expires. It is also updated when a worker thread gets some work to do and ; after it is done with that work. (defvar *idle-thread-count*) (deflock *idle-thread-count-lock*) ; *Idle-core-count* is only used to estimate resource availability. The number ; itself is always kept accurate using a lock, but because we read it without a ; lock when calculating resource availability, the value actually read is only ; an estimate. It defaults to (1- *core-count*), because the current thread is ; considered active. ; There are two pairs of places that *idle-core-count* is updated. First, ; whenever a worker thread begins processing work, the count is decremented. ; This decrement is paired with the increment that occurs after a worker thread ; finishes work. It is also incremented and decremented in ; eval-and-save-result, before and after a parent waits for its children. ; Note: At different stages of development we have contemplated having a ; "*virtual-core-count*", exceeding the number of CPU cores, that bounds the ; number of active threads. Since our initial tests did not show a performance ; improvement by using this trick, we have not employed a *virtual-core-count*. ; If we later do employ this trick, the documentation in step 5 of the Essay on ; Parallelism Strategy will need to be updated. (defvar *idle-core-count*) (deflock *idle-core-count-lock*) ; *Unassigned-and-active-work-count* tracks the amount of parallelism work in ; the system, other than pending work. It is increased when a parallelism ; primitive adds work to the system. This increase is paired with the final ; decrease in consume-work-on-work-queue-when-its-there, which occurs when a ; piece of work finishes. It is decremented and incremented (respectively) ; when a parent waits on children and when it resumes after waiting on ; children. (defvar *unassigned-and-active-work-count*) (deflock *unassigned-and-active-work-count-lock*) ; *Total-work-count* tracks the total amount of parallelism work. This ; includes unassigned, started, pending, and resumed work. (defvar *total-work-count*) (deflock *total-work-count-lock*) ; We maintain a queue of work to process. See parallelism-piece for ; documentation on pieces of work. Even though *work-queue* is a list, we ; think of it as a structure that can be destructively modified -- so beware ; sharing any structure with *work-queue*! (defvar *work-queue*) (deflock *work-queue-lock*) ; An idle thread waits for the condition variable ; *check-work-and-core-availability-cv* to be signaled, at which time it looks ; for work on the *work-queue* and an idle core to use. This condition can be ; signaled by the addition of new work or by the availabilty of a CPU core. ; Warning: In the former case, a parent thread must always signal this ; semaphore *after* it has already added the work to the queue. Otherwise, a ; child can attempt to acquire work, fail, and then go wait on the semaphore ; again. Since the parent has already signaled, there is no guarantee that the ; work they place on the queue will ever be processed. (The latter case also ; requires analogous care.) ; Why are there two condition variables, one for idle threads and one for ; resuming threads? Suppose that idle and resuming threads waited on the same ; condition variable. We would then have no guarantee that resuming threads ; would be signaled before the idle threads (which is necessary to establish ; the priority explained in wait-for-resumptive-parallelism-resources). Using ; separate condition variables allows both an idle and resuming thread to be ; signaled. Then whichever thread's heuristics allow it to execute will claim ; access to the CPU core. There is no problem if both their heuritistics allow ; them to continue. (defvar *check-work-and-core-availability-cv*) (defvar *check-core-availability-for-resuming-cv*) ; When we terminate threads due to a break and abort, we need a way to ; terminate all threads. We implement this by having them throw the ; :worker-thread-no-longer-needed tag. Unfortunately, sometimes the threads ; are outside the scope of the associated catch, when throwing the tag would ; cause a warning. We avoid this warning by maintaining the dynamically-bound ; variable *throwable-worker-thread*. When the throwable context is entered, ; we let a new copy of the variable into existence and set it to T. Now, when ; we throw :worker-thread-no-longer-needed, we only throw it if ; *throwable-worker-thread* is non-nil. (defvar *throwable-worker-thread* nil) ; *total-parallelism-piece-historical-count* tracks the total number of pieces ; of parallelism work processed over the lifetime of the ACL2 session. It is ; ; reset whenever the parallelism variables are reset. It is only used for ; ; informational purposes, and the system does not depend on its accuracy in any ; way. It therefore does not have an associated lock. (defvar *total-parallelism-piece-historical-count*) ; *parallel-evaluation-enabled* enables and disables parallel ; *evaluation. By default it is set to t. (defvar *parallel-evaluation-enabled* t) ) ; end of parallelism variables ; Following are definitions of functions that help us restore the ; parallelism system to a stable state after an interrupt occurs. (defun throw-all-threads-in-list (thread-list) ; We interrupt each of the given threads with a throw to the catch at the top ; of consume-work-on-work-queue-when-its-there, which is the function called ; by run-thread in spawn-worker-threads-if-needed. ; Compare with kill-all-threads-in-list, which kills all of the given threads ; (typically all user-produced threads), not just those self-identified as ; being within the associated catch block. (if (endp thread-list) nil (progn (interrupt-thread (car thread-list) #'(lambda () (when *throwable-worker-thread* (throw :worker-thread-no-longer-needed nil)))) (throw-all-threads-in-list (cdr thread-list))))) (defun kill-all-threads-in-list (thread-list) ; Compare with throw-all-threads-in-list, which uses throw instead of killing ; threads directly, but only affects threads self identified as being within an ; associated catch block. (if (endp thread-list) nil (progn (kill-thread (car thread-list)) (kill-all-threads-in-list (cdr thread-list))))) (defun all-threads-except-initial-threads-are-dead () #+sbcl (<= (length (all-threads)) 1) #-sbcl (null (set-difference (all-threads) *initial-threads*))) (defun send-die-to-all-except-initial-threads () ; This function is evaluated only for side effect. (let ((target-threads #+sbcl (cdr (all-threads)) #-sbcl (set-difference (all-threads) *initial-threads*))) #+acl2-par (throw-all-threads-in-list target-threads) #-acl2-par nil) (thread-wait 'all-threads-except-initial-threads-are-dead)) (defun kill-all-except-initial-threads () ; This function is evaluated only for side effect. (let ((target-threads #+sbcl (cdr (all-threads)) #-sbcl (set-difference (all-threads) *initial-threads*))) (kill-all-threads-in-list target-threads)) (thread-wait 'all-threads-except-initial-threads-are-dead)) (defun reset-parallelism-variables () ; We use this function (a) to kill all worker threads, (b) to reset "most" of ; the parallelism variables, and (c) to reset the lock and semaphore recycling ; systems. Keep (b) in sync with the progn above that declares the variables ; reset here, in the sense that this function assigns values to exactly those ; variables. ; If a user kills threads directly from raw Lisp, for example using functions ; above, then they should call reset-parallelism-variables. Note that ; reset-parallelism-variables is called automatically on any call of LD, ; including the case that an ACL2 user interrupts with control-c and then ; aborts to get back to the ACL2 top-level loop. ; (a) Kill all worker threads. (send-die-to-all-except-initial-threads) ; (b) Reset "most" of the parallelism variables. (setf *idle-thread-count* 0) (reset-lock *idle-thread-count-lock*) (setf *idle-core-count* (1- *core-count*)) (reset-lock *idle-core-count-lock*) (setf *unassigned-and-active-work-count* 1) (reset-lock *unassigned-and-active-work-count-lock*) (setf *total-work-count* 1) (reset-lock *total-work-count-lock*) (setf *work-queue* nil) (reset-lock *work-queue-lock*) (setf *check-work-and-core-availability-cv* (make-condition-variable)) (setf *check-core-availability-for-resuming-cv* (make-condition-variable)) (setf *throwable-worker-thread* nil) (setf *total-parallelism-piece-historical-count* 0) (setf *initial-threads* (all-threads)) (setf *parallel-evaluation-enabled* t) ; (c) Reset the lock and semaphore recycling system. ; The definitions for the following two are commented out above. ; (setf *lock-allocation-lock* (make-lock)) ; (reset-lock-free-list) (setf *semaphore-allocation-lock* (make-lock)) (reset-semaphore-free-list) ) ; We reset parallelism variables as a standard part of compilation to make ; sure the code behind declaring variables and resetting variables is ; consistent and that we are in a stable state. (reset-parallelism-variables) ;--------------------------------------------------------------------- ; Section: Work Consumer Code ; We develop functions that assign threads to process work. (defun eval-and-save-result (work) ; Work is a piece of parallelism work. Among its fields are a closure and an ; array. We evaluate this closure and save the result into this array. No ; lock is required because no other thread will be writing to the same position ; in the array. ; Keep this in sync with the comment in parallelism-piece, where we explain ; that the result is the cdr of the cons stored in the result array at the ; appropriate position. (assert work) (let ((result-array (parallelism-piece-result-array work)) (array-index (parallelism-piece-array-index work)) (closure (parallelism-piece-closure work))) (setf (aref result-array array-index) (cons t (funcall closure))))) (defun pop-work-and-set-thread () ; Once we exit the without-interrupts that must enclose a call to ; pop-work-and-set-thread, our siblings can interrupt us so that we execute a ; throw to the tag :result-no-longer-needed. The reason they can access us is ; that they will have a pointer to us in the thread array. ; There is a race condition between when work is popped from the *work-queue* ; and when the current thread is stored in the thread-array. This race ; condition could be eliminated by holding *work-queue-lock* during the ; function's entire execution. Since (1) we want to minimize the duration ; locks are held, (2) the chance of this race condition occuring is small and ; (3) there is no safety penalty when this race condition occurs (instead an ; opportunity for early termination is missed), we only hold the lock for the ; amount of time it takes to ready and modify the *work-queue* (let ((work (with-lock *work-queue-lock* (when (consp *work-queue*) (pop *work-queue*)))) (thread (current-thread))) (when work (assert thread) (assert (parallelism-piece-thread-array work)) ; Record that the current thread is the one assigned to do this piece of work: (setf (aref (parallelism-piece-thread-array work) (parallelism-piece-array-index work)) thread)) work)) (defun consume-work-on-work-queue-when-its-there () ; This function is an infinite loop. However, the thread running it can be ; waiting on a condition variable and will expire if it waits too long. ; Each iteration through the main loop will start by trying to grab a piece of ; work to process. When it succeeds, then it will process that piece of work ; and wait again on a condition variable before starting the next iteration. ; But ideally, if it has to wait too long for a piece of work to grab then we ; return from this function (with expiration of the current thread); see below. (catch :worker-thread-no-longer-needed (let* ((*throwable-worker-thread* t)) (loop ; "forever" - really until :worker-thread-no-longer-needed thrown ; Wait until there are both a piece of work and an idle core. In openmcl, if ; the thread waits too long, it throws to the catch above and returns from this ; function. (loop while (not (and *work-queue* (< 0 *idle-core-count*))) ; We can't grab work yet, so we wait until somebody signals us to try again, by ; returning a non-nil value to the call of not, just below. If however nobody ; signals us then ideally (and in OpenMCL but not SBCL) a timeout occurs that ; returns nil to this call of not, so we give up with a throw. do (when (not #+(and openmcl acl2-par) (wait-on-condition-variable-lockless *check-work-and-core-availability-cv* 15) #+(and sbcl sb-thread acl2-par) (wait-on-condition-variable-lockless *check-work-and-core-availability-cv*) #-acl2-par t) (throw :worker-thread-no-longer-needed nil))) ; Now very likely there are both a piece of work and an idle core to process ; it. But a race condition allows either of these to disappear before we can ; claim a piece of work and a CPU core, which explains the use of `when' ; below. (unwind-protect-disable-interrupts-during-cleanup (when (<= 0 (with-lock *idle-core-count-lock* ; allocate CPU core ; We will do a corresponding increment of *idle-core-count* in the cleanup form ; of this unwind-protect. Note that the current thread cannot be interrupted ; (except by direct user intervention, for which we may provide only minimal ; protection) until the call of pop-work-and-set-thread below (see long comment ; above that call), because no other thread has a pointer to this one until ; that time. (decf *idle-core-count*))) (catch :result-no-longer-needed (let ((work nil)) (unwind-protect-disable-interrupts-during-cleanup (progn (without-interrupts (setq work ; The following call has the side effect of putting the current thread into a ; thread array, such that this presence allows the current thread to be ; interrupted by another (via interrupt-thread, in throw-threads-in-array). So ; until this point, the current thread will not be told to do a throw. ; We rely on the following claim: If any state has been changed by this call of ; pop-work-and-set-thread, then that call completes and work is set to a ; non-nil value. This claim guarantees that if any state has been changed, ; then the cleanup form just below will be executed and will clean up properly. ; For example, we would have a problem if pop-work-and-set-thread were ; interrupted after the decrement of *idle-thread-count*, but before work is ; set, since then the matching increment in the cleanup form below would be ; skipped. For another example, if we complete the call of ; pop-work-and-set-thread but not the enclosing setq for work, then we miss the ; semaphore signaling in the cleanup form below. (pop-work-and-set-thread)) (when work (with-lock *idle-thread-count-lock* (decf *idle-thread-count*)))) (when work ; The consumer now has a core (see the <= test above) and a piece of work. (eval-and-save-result work) (let* ((thread-array (parallelism-piece-thread-array work)) (result-array (parallelism-piece-result-array work)) (array-index (parallelism-piece-array-index work)) (throw-siblings-when-function (parallelism-piece-throw-siblings-when-function work))) (setf (aref thread-array array-index) nil) ; The nil setting just above guarantees that the current thread doesn't ; interrupt itself by way of the early termination function. (when throw-siblings-when-function (funcall throw-siblings-when-function (aref result-array array-index)))))) (when work ; process this cleanup form if we acquired work (let* ((semaphore-to-signal-as-last-act (parallelism-piece-semaphore-to-signal-as-last-act work)) (thread-array (parallelism-piece-thread-array work)) (array-index (parallelism-piece-array-index work))) (incf *total-parallelism-piece-historical-count*) (setf (aref thread-array array-index) nil) (with-lock *idle-thread-count-lock* ; Above we argued that if *idle-thread-count* is decremented, then work is set ; and hence we get to this point so that we can do the corresponding ; increment. In the other direction, if we get here, then how do we know that ; *idle-thread-count* was decremented? We know because if we get here, then ; work is non-nil and hence pop-work-and-set-thread must have completed. (incf *idle-thread-count*)) ; Each of the following two decrements undoes the corresponding increment done ; when the piece of work was first created and queued. (with-lock *total-work-count-lock* (decf *total-work-count*)) (with-lock *unassigned-and-active-work-count-lock* (decf *unassigned-and-active-work-count*)) (assert (semaphorep semaphore-to-signal-as-last-act)) (signal-semaphore semaphore-to-signal-as-last-act))))) ) ; end catch :result-no-longer-needed ) ; end when CPU core allocation (with-lock *idle-core-count-lock* (incf *idle-core-count*)) (signal-condition-variable *check-work-and-core-availability-cv*) (signal-condition-variable *check-core-availability-for-resuming-cv*)))) ) ; end catch :worker-thread-no-longer-needed ; The current thread is about to expire because all it was given to do was to ; run this function. (with-lock *idle-thread-count-lock* (decf *idle-thread-count*))) (defun spawn-worker-threads-if-needed () ; This function must be called with interrupts disabled. Otherwise it is ; possible for the *idle-thread-count* to be incremented even though no new ; worker thread is spawned. (loop while (< *idle-thread-count* *max-idle-thread-count*) ; Note that the above test could be true, yet *idle-thread-count* could be ; incremented before we get to the lock just below. But we want as little ; bottleneck as possible for scaling later, and the practical worst consequence is ; that we spawn extra threads here. ; Another possibility is that we spawn too few threads here, because the final ; decrement of *idle-thread-count* in consume-work-on-work-queue-when-its-there ; has not occurred even though a worker thread has decided to expire. If this ; occurs, then we may not have the expected allotment of idle threads for ; awhile, but we expect the other idle threads (if any) and the active threads ; to suffice. Eventually a new parallelism primitive call will invoke this ; function again, at a time when the about-to-expire threads have already ; updated *idle-thread-count*, which will allow this function to create the ; expected number of threads. The chance of any of this kind of issue arising ; is probably extremely small. ; NOTE: Consider coming up with a design that's easier to understand. do (progn (with-lock *idle-thread-count-lock* (incf *idle-thread-count*)) ;(format t "param parent thread ~a: ~s~%" (current-thread) acl2::*param*) (run-thread "Worker thread" 'consume-work-on-work-queue-when-its-there)))) ;--------------------------------------------------------------------- ; Section: Work Producer Code ; We develop functions that create work, to be later processed by threads. Our ; main concern is to keep the work queue sufficiently populated so as to keep ; CPU cores busy, while limiting the total amount of work so that the number of ; threads necessary to evaluate that work does not execede the number of ; threads that the underlying Lisp supports creating. (See also comments in ; *total-work-limit*.) (defun add-work-list-to-queue (work-list) ; Call this function inside without-interrupts, in order to maintain the ; invariant that when this function exits, the counts are accurate. ; WARNING! This function destructively modifies *work-queue*. (let ((work-list-length (length work-list))) (with-lock *work-queue-lock* ; In naive performance tests using a parallel version of Fibonacci, we found ; that (pfib 45) took about 19.35 seconds with (nconc *work-queue* work-list), ; as opposed to 19.7 seconds when we reversed the argument order. We have ; other evidence that suggests switching the argument order. But we follow ; Halstead's 1989 paper "New Ideas in Parallel Lisp: Language Design, ; Implementation, and Programming Tools", by doing the oldest work first. (setf *work-queue* (nconc *work-queue* work-list))) (with-lock *total-work-count-lock* (incf *total-work-count* work-list-length)) (with-lock *unassigned-and-active-work-count-lock* (incf *unassigned-and-active-work-count* work-list-length)) (dotimes (i work-list-length) (signal-condition-variable *check-work-and-core-availability-cv*)))) (defun combine-array-results-into-list (result-array current-position acc) (if (< current-position 0) acc (combine-array-results-into-list result-array (1- current-position) (cons (cdr ; entry is a cons whose cdr is the result (aref result-array current-position)) acc)))) (defun remove-thread-array-from-work-queue-rec (work-queue thread-array array-positions-left) ; The function calling remove-thread-array-from-work-queue must hold the lock ; *work-queue-lock*. ; This function must be called with interrupts disabled. (cond ((equal array-positions-left 0) work-queue) ((atom work-queue) nil) ((equal thread-array (parallelism-piece-thread-array (car work-queue))) (progn (with-lock *total-work-count-lock* (decf *total-work-count*)) (with-lock *unassigned-and-active-work-count-lock* (decf *unassigned-and-active-work-count*)) ; we must signal the parent (assert (semaphorep (parallelism-piece-semaphore-to-signal-as-last-act (car work-queue)))) (signal-semaphore (parallelism-piece-semaphore-to-signal-as-last-act (car work-queue))) (remove-thread-array-from-work-queue-rec (cdr work-queue) thread-array (1- array-positions-left)))) (t (cons (car work-queue) (remove-thread-array-from-work-queue-rec (cdr work-queue) thread-array (1- array-positions-left)))))) (defun remove-thread-array-from-work-queue (thread-array) (without-interrupts (with-lock *work-queue-lock* (setf *work-queue* (remove-thread-array-from-work-queue-rec *work-queue* thread-array (length thread-array)))))) (defun terminate-siblings (thread-array) ; This function supports early termination by eliminating further computation ; by siblings. Siblings not yet assigned a thread are removed from the work ; queue. Siblings that are already active are interrupted to throw with tag ; :result-no-longer-needed. The order of these two operations is important: if ; we do them in the other order, then we could miss a sibling that is assigned ; a thread (and removed from the work queue) just inbetween the two ; operations. (remove-thread-array-from-work-queue thread-array) (throw-threads-in-array thread-array (1- (length thread-array)))) (defun generate-work-list-from-closure-list-rec (thread-array result-array children-done-semaphore closure-list current-position &optional throw-siblings-when-function) (if (atom closure-list) (assert (equal current-position (length thread-array))) ; returns nil (cons (make-parallelism-piece :thread-array thread-array :result-array result-array :array-index current-position :semaphore-to-signal-as-last-act children-done-semaphore :closure (car closure-list) :throw-siblings-when-function throw-siblings-when-function) (generate-work-list-from-closure-list-rec thread-array result-array children-done-semaphore (cdr closure-list) (1+ current-position) throw-siblings-when-function)))) (defun generate-work-list-from-closure-list (closure-list &optional terminate-early-function) ; Given a list of closures, we need to generate a list of work data structures ; that are in a format ready for the work queue. Via mv, we also return the ; pointers to the thread, result, and semaphore arrays. (let* ((closure-count (length closure-list)) (thread-array (make-array closure-count :initial-element nil)) (result-array (make-array closure-count :initial-element nil)) (children-done-semaphore (allocate-semaphore))) (progn ; warning: avoid prog2 as we need to return multiple value (assert (semaphorep children-done-semaphore)) (mv (generate-work-list-from-closure-list-rec thread-array result-array children-done-semaphore closure-list 0 (if terminate-early-function (lambda (x) ; x is (t . result) (when (funcall terminate-early-function (cdr x)) (terminate-siblings thread-array))) nil)) thread-array result-array children-done-semaphore)))) (defun parallelism-resources-available () ; This function is our attempt to guess when resources are available. When ; this function returns true, then resources are probably available, and a ; parallelism primitive call will opt to parallelize. We say "probably" ; because correctness does not depend on our answering exactly. For ; performance, we prefer that this function is reasonably close to an accurate ; implementation that would use locks. Perhaps even more important for ; performance, however, is that we avoid the cost of locks to try to remove ; bottlenecks. ; In summary, it is unneccessary to acquire a lock, because we just don't care ; if we miss a few chances to parallelize, or parallelize a few extra times. (and *parallel-evaluation-enabled* (< *unassigned-and-active-work-count* *unassigned-and-active-work-count-limit*) (< *total-work-count* *total-work-limit*))) (defun throw-threads-in-array (thread-array current-position) ; Call this function to terminate computation for every thread in the given ; thread-array from position current-position down to position 0. We expect ; that thread-array was either created by the current thread's parent or was ; created by the current thread (for its children). ; We require that the current thread not be in thread-array. This requirement ; prevents the current thread from interrupting itself, which could conceivably ; abort remaining recursive calls of this function, or cause a hang in some ; Lisps since we may be operating with interrupts disabled (for example, inside ; the cleanup form of an unwind-protect in OpenMCL 1.1pre or later). (assert thread-array) (when (<= 0 current-position) (let ((current-thread (aref thread-array current-position))) (when current-thread (interrupt-thread current-thread ; The delayed evaluation of (aref thread-array...) below is crucial to keep a ; thread from throwing :result-no-longer-needed outside of the catch for that tag. ; Consume-work-on-work-queue-when-its-there will set the (aref thread-array...) ; to nil when the thread should not be thrown. (lambda () (when (aref thread-array current-position) (throw :result-no-longer-needed nil)))))) (throw-threads-in-array thread-array (1- current-position)))) (defun decrement-children-left (children-left-ptr semaphore-notification-obj) ; This function should be called with interrupts disabled. (when (semaphore-notification-status semaphore-notification-obj) (decf (aref children-left-ptr 0)) (clear-semaphore-notification-status semaphore-notification-obj))) (defun wait-for-children-to-finish (semaphore children-left-ptr semaphore-notification-obj) ; This function is called both in the normal case and in the early-termination ; case. (assert children-left-ptr) (when (<= 1 (aref children-left-ptr 0)) (assert (not (semaphore-notification-status semaphore-notification-obj))) (unwind-protect-disable-interrupts-during-cleanup (wait-on-semaphore semaphore semaphore-notification-obj) (decrement-children-left children-left-ptr semaphore-notification-obj)) (wait-for-children-to-finish semaphore children-left-ptr semaphore-notification-obj))) (defun wait-for-resumptive-parallelism-resources () ; A thread resuming execution after its children finish has a higher priority ; than a thread just beginning execution. As such, resuming threads are ; allowed to "borrow" up to *core-count* CPU cores. That is implemented by ; waiting until *idle-core-count* is greater than the negation of the ; *core-count*. This is different from a thread just beginning execution, ; which waits for *idle-core-count* to be greater than 0. (loop while (<= *idle-core-count* (- *core-count*)) ; So, *idle-core-count* is running a deficit that is at least the number of ; cores: there are already *core-count* additional active threads beyond the ; normal limit of *core-count*. do (wait-on-condition-variable-lockless *check-core-availability-for-resuming-cv*)) (with-lock *unassigned-and-active-work-count-lock* (incf *unassigned-and-active-work-count*)) (with-lock *idle-core-count-lock* (decf *idle-core-count*))) (defun early-terminate-children-and-rewait (children-done-semaphore children-left-ptr semaphore-notification-obj thread-array) ; This function performs three kinds of actions. ; A. It signals children-done-semaphore once for each child that is unassigned ; (i.e. still on the work queue) and removes that child from the work queue. ; B. It interrups each assigned child's thread with a throw that terminates ; processing of its work. Note that we must do Step B after Step A: otherwise ; threads might grab work after Step B but before Step A, resulting in child ; work that is no longer available to terminate unless we call this function ; again. ; C. The above throw from Step B eventually causes the interrupted threads to ; signal children-done-semaphore. The current thread waits for those remaining ; signals. (when (< 0 (aref children-left-ptr 0)) (remove-thread-array-from-work-queue ; A ; Signal children-done-semaphore, which is in each piece of work in ; closure-list. thread-array) (throw-threads-in-array thread-array ; B (1- (length thread-array))) (wait-for-children-to-finish ; C children-done-semaphore children-left-ptr semaphore-notification-obj))) #+acl2-par (defun prepare-to-wait-for-children () ; This function should be executed with interrupts disabled, after all child ; work is added to the work queue but before the current thread waits on such ; work to finish. ; First, since we are about to enter the pending state, we must free CPU core ; resources and notify other threads. (with-lock *idle-core-count-lock* (incf *idle-core-count*)) (signal-condition-variable *check-work-and-core-availability-cv*) (signal-condition-variable *check-core-availability-for-resuming-cv*) ; Second, record that we are no longer active. (Note: We could avoid the ; following form (thus saving a lock) by incrementing ; *unassigned-and-active-work-count* by one less in add-work-list-to-queue.) (with-lock *unassigned-and-active-work-count-lock* (decf *unassigned-and-active-work-count*))) #+acl2-par (defun parallelize-closure-list (closure-list &optional terminate-early-function) ; Given a list of closures, we: ; 1. Create a list of pieces of work (see defstruct parallelism-piece). ; 2. If there aren't enough idle worker threads, we spawn a reasonably ; sufficient number of new worker threads, so that CPU cores are kept busy but ; without the needless overhead of useless threads. Note that when a thread ; isn't already assigned work, it is waiting for notification to look for work ; to do. ; 3. Add the work to the work queue, which notifies the worker threads of the ; additional work. ; 4. Free parallelism resources (specifically, a CPU core), since we are about ; to become idle as we wait children to finish. Issue the proper notifications ; (via condition variables) so that other threads are aware of the freed ; resources. ; 5. Wait for the children to finish. In the event of receiving an early ; termination from our parent (a.k.a. the grandparent of our children) or our ; sibling (a.k.a. the uncle of our children), we signal our children to ; terminate early, and we wait again. ; Note that if the current thread's children decide the remaining child results ; are irrelevant, that the current thread will never know it. The children ; will terminate early amongst themselves without any parent intervention. ; 6. Resume when resources become available, reclaiming parallelism resources ; (see wait-for-resumptive-parallelism-resources). ; 7. Return the result. ; It's silly to parallelize just 1 (or no!) thing. The definitions of pargs, ; plet, pand, and por should prevent this assertion from failing, but we have ; it here as a check that this is true. (assert (and (consp closure-list) (cdr closure-list))) (let ((work-list-setup-p nil) (semaphore-notification-obj (make-semaphore-notification)) (children-left-ptr (make-array 1 :initial-element (length closure-list)))) ; 1. Create a list of pieces of work. (mv-let (work-list thread-array result-array children-done-semaphore) (generate-work-list-from-closure-list closure-list terminate-early-function) (assert (semaphorep children-done-semaphore)) (unwind-protect-disable-interrupts-during-cleanup (progn (without-interrupts ; 2. Spawn worker threads so that CPU cores are kept busy. (spawn-worker-threads-if-needed) ; 3. Add the work to the work queue. (setq work-list-setup-p (progn (add-work-list-to-queue work-list) t)) ; 4. Free parallelism resources. (prepare-to-wait-for-children)) ; 5a. Wait for children to finish. But note that we may be interrupted by our ; sibling or our parent before this wait is completed. ; Now that the two operations under the above without-interrupts are complete, ; it is once again OK to be interrupted with a function that throws to the tag ; :results-no-longer-needed. Note that wait-for-children-to-finish is called ; again in the cleanup form below, so we don't have to worry about dangling ; child threads even if we don't complete evaluation of the following form. (wait-for-children-to-finish children-done-semaphore children-left-ptr semaphore-notification-obj)) ; We are entering the cleanup form, which we always need to run (in particular, ; so that we can resume and return a result). But why must we run without ; interrupts? Suppose for example we have been interrupted (to do a throw) by ; the terminate-early-function of one of our siblings or by our parent. Then ; we must wait for all child pieces of work to terminate (see ; early-terminate-children-and-rewait) before we return. And this waiting must ; be non-abortable; otherwise, for example, we could be left (after Control-c ; and an abort) with orphaned child threads. (progn (when work-list-setup-p ; children were added to *work-queue* ; If we were thrown by a sibling or parent, it's possible that our children ; didn't finish. We now throw our children and wait for them. ; 5b. Complete processing of our children in case we were interrupted when we ; were waiting the first time. (early-terminate-children-and-rewait children-done-semaphore children-left-ptr semaphore-notification-obj thread-array) ; AS OF *HERE*, ALL OF THIS PARENT'S CHILD WORK IS "DONE" ; 6. Resume when resources become available. (wait-for-resumptive-parallelism-resources) (assert (eq (aref children-left-ptr 0) 0)) ; Free semaphore for semaphore recycling. (free-semaphore children-done-semaphore)))) ; 7. Return the result. (combine-array-results-into-list result-array (1- (length result-array)) nil)))) ; Parallelize-fn Booleanizes the results from pand/por. #+acl2-par (defun parallelize-fn (parent-fun-name arg-closures &optional terminate-early-function) ; It's inefficient to parallelize just one (or no!) computation. The ; definitions of pargs, plet, pand, and por should prevent this assertion from ; failing, but we have it here as a check that this is true. (assert (cdr arg-closures)) (let ((parallelize-closures-res (parallelize-closure-list arg-closures terminate-early-function))) (if (or (equal parent-fun-name 'and-list) (equal parent-fun-name 'or-list)) (funcall parent-fun-name parallelize-closures-res) (apply parent-fun-name parallelize-closures-res)))) (defmacro closure-for-expression (x) `(function (lambda () ,x))) (defmacro closure-list-for-expression-list (x) (if (atom x) nil `(cons (closure-for-expression ,(car x)) (closure-list-for-expression-list ,(cdr x))))) ;--------------------------------------------------------------------- ; Section: Parallelism Primitives #+acl2-par (defmacro pargs (&rest forms) ; This is the raw lisp version for threaded Lisps. (mv-let (erp msg gran-form-exists gran-form remainder-forms) (check-and-parse-for-granularity-form forms) (declare (ignore msg)) (assert (not erp)) (let ((function-call (car remainder-forms))) (if (null (cddr function-call)) ; whether there are two or more arguments function-call (list 'if (if gran-form-exists `(and (parallelism-resources-available) ; We check availability of resources before checking the granularity form, ; since the latter can be arbitrarily expensive. ,gran-form) '(parallelism-resources-available)) (list 'parallelize-fn (list 'quote (car function-call)) (list 'closure-list-for-expression-list (cdr function-call))) function-call))))) (defun plet-doublets (bindings bsym n) (cond ((endp bindings) nil) (t (cons (list (caar bindings) (list 'nth n bsym)) (plet-doublets (cdr bindings) bsym (1+ n)))))) (defun make-closures (bindings) ; We return a list of forms (function (lambda () )), each of which ; evaluates to a closure, as ranges over the expression components of ; the given plet bindings. Note that this function is only called on the ; bindings of a plet expression that has passed translate -- so we know that ; bindings has the proper shape. (if (endp bindings) nil (cons `(function (lambda () ,(cadar bindings))) (make-closures (cdr bindings))))) (defun do-let (results body-closure) (apply body-closure results)) (defun identity-list (&rest rst) rst) (defun make-variable-list (x) (if (atom x) nil (cons (caar x) (make-variable-list (cdr x))))) (defun make-list-until-non-declare (remaining-list acc) (if (not (caar-is-declarep remaining-list)) (mv (reverse acc) remaining-list) (make-list-until-non-declare (cdr remaining-list) (cons (car remaining-list) acc)))) (defun parse-additional-declare-forms-for-let (x) ; X is a list of forms from a well-formed plet, with the plet and optional ; granularity form removed. It thus starts with bindings and is followed by ; any finite number of valid declare forms, and finally a body. (mv-let (declare-forms body) (make-list-until-non-declare (cdr x) nil) (mv (car x) declare-forms body))) #+acl2-par (defmacro plet (&rest forms) ; This is the raw Lisp version for threaded Lisps. (mv-let (erp msg gran-form-exists gran-form remainder-forms) (check-and-parse-for-granularity-form forms) (declare (ignore msg)) (assert (not erp)) (mv-let (bindings declare-forms body) (parse-additional-declare-forms-for-let remainder-forms) (cond ((null (cdr bindings)) ; at most one binding `(let ,bindings ,@declare-forms ,@body)) (t (list 'if (if gran-form-exists `(and (parallelism-resources-available) ,gran-form) '(parallelism-resources-available)) (let ((bsym (gensym))) `(let ((,bsym (parallelize-fn 'identity-list (list ,@(make-closures bindings))))) (let ,(plet-doublets bindings bsym 0) ,@declare-forms ,@body))) `(let ,bindings ,@declare-forms ,@body))))))) (defun and-list (x) (declare (xargs :guard (true-listp x))) (if (endp x) t (and (car x) (and-list (cdr x))))) #+acl2-par (defmacro pand (&rest forms) ; This is the raw Lisp version for threaded Lisps. (mv-let (erp msg gran-form-exists gran-form remainder-forms) (check-and-parse-for-granularity-form forms) (declare (ignore msg)) (assert (not erp)) (if (null (cdr remainder-forms)) ; whether pand has only one argument (list 'if (car remainder-forms) t nil) (let ((and-early-termination-function '(lambda (x) (null x)))) (list 'if (if gran-form-exists `(and (parallelism-resources-available) ,gran-form) '(parallelism-resources-available)) (list 'parallelize-fn ''and-list (list 'closure-list-for-expression-list remainder-forms) and-early-termination-function) (list 'if (cons 'and remainder-forms) t nil)))))) (defun or-list (x) (declare (xargs :guard (true-listp x))) (if (endp x) nil (if (car x) t (or-list (cdr x))))) #+acl2-par (defmacro por (&rest forms) ; This is the raw Lisp version for threaded Lisps. (mv-let (erp msg gran-form-exists gran-form remainder-forms) (check-and-parse-for-granularity-form forms) (declare (ignore msg)) (assert (not erp)) (if (null (cdr remainder-forms)) ; whether por has one argument (list 'if (car remainder-forms) t nil) (let ((or-early-termination-function '(lambda (x) x))) (list 'if (if gran-form-exists `(and (parallelism-resources-available) ,gran-form) '(parallelism-resources-available)) (list 'parallelize-fn ''or-list (list 'closure-list-for-expression-list remainder-forms) or-early-termination-function) (list 'if (cons 'or remainder-forms) t nil)))))) #| ; Some very basic examples follow: ; Serial version of Fibonacci (defun fib (x) (cond ((<= x 0) 0) ((= x 1) 1) (t (let ((a (fib (- x 1))) (b (fib (- x 2)))) (+ a b))))) ; Parallelized version of Fibonacci, using plet (defun pfib (x) (cond ((<= x 0) 0) ((= x 1) 1) ; note the use of PAR:: before granularity (t (PAR::plet (declare (PAR::granularity (> x 33))) ((a (pfib (- x 1))) (b (pfib (- x 2)))) (+ a b))))) (assert (equal (fib 35) (pfib 35))) (defun binary-+ (x y) (+ x y)) ; Parallel version of Fibonacci, using pargs (defun pfib-with-pargs (x) (cond ((<= x 0) 0) ((= x 1) 1) ; note the use of PAR:: before granularity (t (PAR::pargs (declare (PAR::granularity (> x 33))) (binary-+ (pfib-with-pargs (- x 1)) (pfib-with-pargs (- x 2))))))) (assert (equal (fib 35) (pfib-with-pargs 35))) |#