; Simple Test Suite for
; Rewriting for Symbolic Execution of State Machine Models
; by
; J Strother Moore
; The problems in this test suite are MUCH smaller and MUCH more
; regular than the proprietary industrial examples for which the
; nu-rewriter was developed.
; The ACL2 forms in this file were used to generate Figure 1 of the
; above paper.
; -------------------------------------------------------
; Theorem standard ACL2 nu-ACL2
; b-phase1 0.48 0.01
; b-phase1-phase1 128.76 0.01
; a-phase1 0.41 0.04
; a-machine 139.39 0.02
; b-machine 143.91 0.02
; -------------------------------------------------------
; Fig. 1: Seconds to Prove Theorem on 731 MHz Pentium III
; After discussing how I used this file to generate the data in the
; figure, I discuss the test suite itself, from the perspective of
; someone who might want to benchmark another prover on the analogous
; problem in that other logic.
; To Generate the Data in Figure 1
; The data in Figure 1 was generated with a pre-released version of
; ACL2 Version 2.6 running under Allegro Common Lisp on a 731 MHz
; Pentinum III with Debian GNU Linux.
; I fed this file into that ACL2 and then used grep to recover the
; data:
; % acl2 < test-suite.lisp > test-suite.out1
; % grep -A 2 Time test-suite.out1
; Grep prints some irrelevant lines (like how long it takes to process
; the definitions) and I extract the relevant times and enter them
; into the raw table below. I repeated the test two times. The
; table above is produced by averaging the two tests.
(value :q)
(lp)
(set-inhibit-output-lst '(proof-tree prove))
; To benchmark standard ACL2, include these two lines and comment out the
; ones below.
; (set-nu-rewriter-mode nil)
; (in-theory (disable nth update-nth))
; To benchmark the nu-rewriter, include these two and comment out the ones
; above.
(set-nu-rewriter-mode :literals)
(in-theory (disable nth update-nth nth-update-nth))
; The in-theory disables the functions nth and update-nth so
; that they are not expanded. Otherwise, standard ACL2 expands them
; into list-processing primitives (car, cdr, consp and cons) and thus
; tackles a different problem than the nu-rewriter does.
; The Test Suite
; It is anticipated that some readers will be interested in trying out
; the analogues of these problems with other mechanical provers. Such
; a reader must, of course, translate the problem in a fair way to
; that other system. Most of our comments below explain informally
; what the more mysterious ACL2 commands mean.
; Readers unfamiliar with ACL2 can learn more about the forms below by
; reading the ACL2 online documentation, available from the ACL2 home
; page http://www.cs.utexas.edu/users/moore/acl2.
; The next form declares s to be a ``single-threaded object.''
; Single-threadedness is not important to the rewriting test. It is
; only relevant to the efficient execution of these functions on
; concrete states, as would be necessary if the functions were to be
; used as a simulator. This test suite is set up also to illustrate
; how efficiently executed state-machines are defined in ACL2.
(defstobj s a b)
; The above defstobj command adds four definitions relevant to the
; nu-rewriter.
; (defun a (s) (nth 0 s))
; (defun b (s) (nth 1 s))
; (defun update-a (v s) (update-nth 0 v s))
; (defun update-b (v s) (update-nth 1 v s))
; The only logically relevant fact about nth and update-nth is the
; equality expressed in nth-update-nth! above. In a syntactically
; typed logic, the rewrite rule would simply be:
; (equal (nth i (update-nth j v s))
; (if (equal i j)
; v
; (nth i s)))
; But our version employs the mysterious function nfix. The
; definition of nfix in ACL2 is
; (defun nfix (x) (if (and (integerp x) (>= x 0)) x 0))
; The function can be thought of as ``coercing'' its argument to a
; natural number (0 by default). In ACL2, Both nth and update-nth
; treat their index arguments as naturals.
; Next, we introduce three undefined functions, each taking two
; arguments and returning one result.
(defstub v0 (i x) t)
(defstub v1 (i x) t)
(defstub v2 (i x) t)
; We define phase1, as described in the paper. The declare form notes
; that s is used in a single-threaded way, which affects the
; compilation of phase1 but not the nu-rewriter. So it is irrelevant
; for our purposes.
(defun phase1 (s)
(declare (xargs :stobjs (s)))
(let ((s (update-a (if (v0 1 (a s))
(v1 1 (a s))
(v2 1 (a s)))
s)))
(let ((s (update-a (if (v0 2 (a s))
(v1 2 (a s))
(v2 2 (a s)))
s)))
(let ((s (update-a (if (v0 3 (a s))
(v1 3 (a s))
(v2 3 (a s)))
s)))
(let ((s (update-a (if (v0 4 (a s))
(v1 4 (a s))
(v2 4 (a s)))
s)))
(let ((s (update-a (if (v0 5 (a s))
(v1 5 (a s))
(v2 5 (a s)))
s)))
(let ((s (update-a (if (v0 6 (a s))
(v1 6 (a s))
(v2 6 (a s)))
s)))
s)))))))
; Here are the first three test theorems. Each is proved with
; rule-classes nil, which means that the theorem is not stored in the
; rewrite-rule data base and consequently is not available for future
; use as a rule. To do otherwise would allow b-phase1-phase1 to be
; proved trivially by two applications of b-phase1.
(defthm b-phase1
(equal (b (phase1 s)) (b s))
:rule-classes nil)
(defthm b-phase1-phase1
(equal (b (phase1 (phase1 s))) (b s))
:rule-classes nil)
(defun next-a (a)
(let ((a (if (v0 1 a) (v1 1 a) (v2 1 a))))
(let ((a (if (v0 2 a) (v1 2 a) (v2 2 a))))
(let ((a (if (v0 3 a) (v1 3 a) (v2 3 a))))
(let ((a (if (v0 4 a) (v1 4 a) (v2 4 a))))
(let ((a (if (v0 5 a) (v1 5 a) (v2 5 a))))
(let ((a (if (v0 6 a) (v1 6 a) (v2 6 a))))
a)))))))
(defthm a-phase1
(equal (a (phase1 s)) (next-a (a s)))
:rule-classes nil)
; Note: The theorem that might be called a-phase1-phase1 involves a
; very big IF expression. The IF (rather than the recovery of the
; a components) causes explosions. This test suite exploits that in
; the next set of theorems...
; Now we define the ``more complicated'' machine as noted in the paper.
(defun phase0 (s)
(declare (xargs :stobjs (s)))
(let ((s (update-b (a s) s)))
s))
(defun phase2 (s)
(declare (xargs :stobjs (s)))
(let ((s (update-a (b s) s)))
s))
(defun machine (s)
(declare (xargs :stobjs (s)))
(let ((s (phase0 s)))
(let ((s (phase1 s)))
(let ((s (phase1 s)))
(let ((s (phase2 s)))
s)))))
; And here are the last two test theorems. Note that an intermediate result
; is comparable to a-phase1-phase1, but it is not ever simplified.
(defthm a-machine
(equal (a (machine s)) (a s))
:rule-classes nil)
(defthm b-machine
(equal (b (machine s)) (a s))
:rule-classes nil)
(good-bye)
; Here is the raw table from which averages were computed.
; -------------------------------------------------------
; Theorem standard ACL2 nu-ACL2
; b-phase1 0.49 <0.01
; 0.48 0.01
; b-phase1-phase1 127.48 0.01
; 130.05 0.01
; a-phase1 0.42 0.04
; 0.41 0.04
; a-machine 139.31 0.02
; 139.47 0.01
; b-machine 144.05 0.02
; 143.78 0.02
; -------------------------------------------------------
; For what it is worth, the standard ACL2 proof of a-machine contains
; 1,305,961 lines and 42,251,010 characters. The entire test-suite
; proof log for standard ACL2 is 129,192,953 bytes long and takes
; about 326 seconds. The entire test-suite proof log for nu-ACL2 is
; 8436 bytes long and takes .23 seconds.