Recursion Continued

An Extended Example: Satisfiability of Boolean Expressions

A Boolean expression is an expression where all the operators are logical (NOT, AND, and OR) and connect variables that are truth values. In C, Boolean expressions look like this:
v1 || v2 && v3 || v4 || !v5
or this:
(v1 || v3) && (!v4 || !(v6 && v7))
A satisfying assignment to a Boolean expression is a set of assignments to the variables that will cause the expression to evaluate to true. For example, a satisfying assignment to v1 && v2 is v1 = 1, v2 = 1.

Some Boolean expressions have no satisfying assignment. For example, v1 && !v1 || v2 is unsatisfiable, since no assignment to the variables will cause it to be true.

Part of the job of a compiler is to eliminate dead code, code that can't possibly be executed during a program. If a compiler comes upon some code that looks like this:

	if (v1 && !v1 || v2) {
		printf ("hello!");
	}
it should eliminate it and possibly give a warning, since the printf statement can't ever be executed because of the unsatisfiable expression.

We would like to write a program that reads in a Boolean expression and checks to see if it is satisfiable. If it is, we should print an assignment that satisfies it (as proof). Otherwise, we should print out "unsatisfiable."

Reading Expressions with a Recursive Descent Parser

First, we will have to read in the Boolean expressions. Expressions can be arbitrarily parenthesized, e.g. ((!v1) || (v2 && !v3)) || v2 and the operators have different precedences (priority over each other), so we will have to be careful how we do this.

The highest precedence is given to parenthesized expressions, then the NOT operator, then AND, then OR. We can write a grammar that for expressions that shows this precedence. The following grammar is in BNF format. A BNF grammar shows the structure of a language, in our case, the language of expressions. Curly braces indicate optional items that are repeated; everything else is pretty much intuitive. Here is the grammar:

expr : orexpr
orexpr : andexpr { "||" andexpr }
andexpr : notexpr { "&&" notexpr }
notexpr : { "!" } parenexpr
parenexpr : "v"{ digit } | "(" expr ")"
digit : 0 - 9
This grammar can be almost mechanically transformed into a kind of C program called a recursive descent parser. But first, we need a way of reading in the "atomic" (indivisible) elements of the grammar: &&, ||, !, (, ), and variable names beginning with v. To do this, we'll write up a simple lexical analyzer returning these types of items as tokens, or structures containing lexical information (lexical just means related to text):
/*
 * lex.h
 *
 * Definitions for the lexical analyzer (reading in stuff)
 */

/* token types */

#define AND_TOK	0
#define OR_TOK	1
#define NOT_TOK	2
#define VAR_TOK	3
#define LP_TOK	4
#define RP_TOK	5
#define EOF_TOK	6

/* token structure */

typedef struct _token {
	int	type, 	/* type of token (AND_TOK, etc.) */
		varno;	/* number of a VAR_TOK token */
} token;

/* get a single token from standard input */

token get_token (void);

/*
 * lex.c
 *
 * Lexical analyzer.  That just means it reads in stuff and packages it
 * in neat little "tokens" 
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lex.h"

/* get a character from standard input, eliding whitespace */

char get_nonwhite_char (void) {
	char	ch;

	/* get a character, repeat while it is white */

	do {
		ch = getchar ();
		if (feof (stdin)) break;
	} while (strchr ("\n\t ", ch));
	return ch;
}

/* get and return a token */

token get_token (void) {
	token	t;
	static int init = 1;	/* set to true first time through */
	static char lk;		/* lookahead character */
	char	s[100];
	int	i;

	/* put nonsense value in for debugging */

	t.type = -1;
	t.varno = 0;

	/* if at end of file, return an EOF token */

	if (feof (stdin)) {
		t.type = EOF_TOK;
		return t;
	}

	/* if uninitialized, initialize the lookahead character */

	if (init) {
		init = 0;
		lk = get_nonwhite_char();
	}

	/* based on the next character in input stream... */

	switch (lk) {
		/* left paren token */
		case '(': t.type = LP_TOK; lk = get_nonwhite_char(); break;

		/* right paren token */
		case ')': t.type = RP_TOK; lk = get_nonwhite_char(); break;

		/* NOT token */
		case '!': t.type = NOT_TOK; lk = get_nonwhite_char(); break;

		/* variable token */
		case 'v': 
			t.type = VAR_TOK;

			/* build up an integer string */

			i = 0;
			lk = get_nonwhite_char();

			/* while input is a digit ... */

			while (strchr ("0123456789", lk)) {
				s[i++] = lk;
				lk = get_nonwhite_char ();
			}

			/* null terminator */

			s[i] = 0;

			/* get value into varno */

			t.varno = atoi (s);
			break;

		/* get an AND token */

		case '&':
			t.type = AND_TOK;
			lk = get_nonwhite_char();

			/* should be two &'s */

			if (lk != '&') {
				printf ("&& expected!\n");
			} else lk = get_nonwhite_char();
			break;
		
		/* get an OR token */

		case '|':
			t.type = OR_TOK;
			lk = get_nonwhite_char();

			/* should be two |'s */

			if (lk != '|') {
				printf ("|| expected!\n");
			} else lk = get_nonwhite_char();
			break;
		default: printf ("what does '%c' mean?\n", lk);
	}
	return t;
}
Ok, that was horrible. Now we can write the recursive descent parser. The name "recursive descent" means the parser recursively descends into the expression, the returns out with the entire expression in a parse tree, a kind of linked list with the essential structure of the expression.

We store the parse tree in special linked list nodes. Here is expr.h, containing a definition for these nodes and some function prototypes for the parser:

/*
 * expr.h
 *
 * Definitions for the recursive descent parser for Boolean expressions
 */
#define NOT	0
#define AND	1
#define OR	2
#define VAR	3

/*
 * parse tree node
 */
typedef struct _node {
	int		type, 		/* type of node (NOT, AND, OR, VAR) */
			varno;		/* number of a VAR node */
	struct _node	*left_op, 	/* left operand, if any (NOT, AND, OR) */
			*right_op;	/* right operand, if any (AND, OR) */
} node, *expr;

/* get an expression */
expr get_expr (void);

/* parse an OR expression */
expr orexpr (void);

/* parse an AND expression */
expr andexpr (void);

/* parse a NOT expression */
expr notexpr (void);

/* parse a parenthesized expression */
expr parenexpr (void);

/* print a parse tree as a parsable expression */
void print_expr (expr);
So the get_expr() function returns a special linked parse tree that gives the structure of the entire expression. Here is expr.c, where all this takes place:
/*
 * expr.c
 *
 * Functions to parse a stream of tokens into Boolean expression
 * parse trees.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "expr.h"
#include "lex.h"

/*
 * This is a BNF grammar for what these functions parse.
 *
 * expr : orexpr
 * orexpr : andexpr { "||" andexpr }
 * andexpr : notexpr { "&&" notexpr }
 * notexpr : { "!" } parenexpr
 * parenexpr : var | "(" expr ")"
 */

/* "lookahead" token.  This global token variable always contains the 
 * next item in the stream of tokens, so we can see what's ahead of
 * us without actually reading it (so we can decide what to do next
 * without getting rid of the next token).
 */
token lk;

/* return a new parse tree node */

expr newnode (void) {
	node	*p;

	/* get some memory */

	p = (node *) malloc (sizeof (node));

	/* we want to make sure this node isn't used uninitialized,
	 * so we pretend it's a VAR with an impossibly high number that 
	 * will cause a segfault if someone doesn't initialize it (evil :-)
	 */
	p->type = VAR;
	p->varno = 12345678;
	p->left_op = NULL;
	p->right_op = NULL;
	return p;
}

/* get an expression from token stream */

expr get_expr (void) {
	expr	e;

	/* initialize the lookahead token */

	lk = get_token ();

	/* an expression is just an OR expression, so get an OR expression */

	e = orexpr ();

	/* if we're not at end of file, we should be */

	if (lk.type != EOF_TOK)
		printf ("extraneous info\n");

	/* return expr */

	return e;
}

/* get an OR expression: either an andexpr or andexpr || orexpr */

expr orexpr (void) {
	expr	e, f;

	/* get andexpr */

	e = andexpr ();

	/* if the next token is || ... */

	if (lk.type == OR_TOK) {

		/* get the next token */

		lk = get_token ();

		/* make a new OR node */

		f = newnode ();
		f->type = OR;

		/* left op is the andexpr we just got */

		f->left_op = e;

		/* recursively get an orexpr for the right op */

		f->right_op = orexpr ();

		/* return result */

		return f;
	} else return e;
}

/* get an AND expression: either a notexpr or notexpr && andexpr */

expr andexpr (void) {
	expr	e, f;

	/* same method as orexpr */

	e = notexpr ();
	if (lk.type == AND_TOK) {
		lk = get_token ();
		f = newnode ();
		f->type = AND;
		f->left_op = e;
		f->right_op = andexpr ();
		return f;
	} else return e;
}

/* get a NOT expression: either a parenexpr or ! notexpr */

expr notexpr (void) {
	expr	e, f;

	if (lk.type == NOT_TOK) {
		lk = get_token ();
		f = newnode ();
		f->type = NOT;
		f->left_op = notexpr ();
		return f;
	}
	else return parenexpr ();
}

/* get a parenthesized expression : either ( expr ) or a variable */

expr parenexpr (void) {
	expr	e;

	if (lk.type == LP_TOK) {
		lk = get_token ();
		e = orexpr ();
		if (lk.type != RP_TOK) {
			printf ("parse error! ')' expected!\n");
		} else 
			lk = get_token ();
		return e;
	} else if (lk.type == VAR_TOK) {
		e = newnode ();
		e->type = VAR;
		e->varno = lk.varno;
		lk = get_token ();
		return e;
	} else {
		printf ("parse error!\n");
		return NULL;
	}
}

/* print a parsable expression on standard output */

void print_expr (expr e) {

	/* this shouldn't happen */

	if (!e) {
		printf ("()"); return;
	}

	/* what's the type of this node? */

	switch (e->type) {

		/* if &&, print ( left && right ) */

		case AND: 
			printf ("("); 
			print_expr (e->left_op);
			printf (" && ");
			print_expr (e->right_op);
			printf (")");
			break;

		/* if ||, print ( left || right ) */

		case OR: 
			printf ("("); 
			print_expr (e->left_op);
			printf (" || ");
			print_expr (e->right_op);
			printf (")");
			break;

		/* if !, print ! (left) */

		case NOT:
			printf ("! (");
			print_expr (e->left_op);
			printf (")");
			break;

		/* if variable, print v then the number */

		case VAR:
			printf ("v%d", e->varno);
			break;
		default: printf ("huh %d?\n", e->type);
	}
}

Recursive Search with Backtracking

Now that we know how to read in expressions, we have to be able to test them for satisfiability. To do this, we will search (recursively, of course!) for a satisfying assignment. We will start with an "empty" assignment, then set the first variable to "false" and recursively try to satisfy, if that doesn't work we'll backtrack to the first and try "true", etc.

Here is sat.c, containing the main program and code to search for satisfying assignments. It contains lots of extra code for seeing whether an assignment (represented in an array) satisfies an expression etc. The important function is sat, which does the backtracking search.

#include <stdio.h>
#include "expr.h"
#include "lex.h"

/* return true if assignments to variable in t satisfies e */

int eval (expr e, int t[]) {
	switch (e->type) {
		case VAR: return t[e->varno];
		case AND: return eval (e->left_op, t) && eval (e->right_op, t);
		case OR: return eval (e->left_op, t) || eval (e->right_op, t);
		case NOT: return ! eval (e->left_op, t);
	}
}

/* return maximum of two ints */

int max2 (int a, int b) {
	if (a > b) return a;
	return b;
}

/* return maximum of three ints */

int max3 (int a, int b, int c) {
	return max2 (max2 (a, b), c);
}

/* find the maximum variable number and set t[i] to true only
 * if variable number i exists in the expression
 */
int find_max_varno (expr e, int t[], int max) {
	int	a, b;

	/* shouldn't happen */

	if (!e) return max;

	/* based on type of node */

	switch (e->type) {

		/* if either AND or OR, max variable is max of
		 * max for left, max for right, or current max 
		 */
		case AND: 
		case OR:
			return max3 (max, 	
				find_max_varno (e->right_op, t, max),
				find_max_varno (e->left_op, t, max));

		/* if NOT, max variable is max of max for left or 
		 * current max 
		 */
		case NOT:
			return max2 (max, find_max_varno (e->left_op, t, max));

		/* if variable, max var is max of current max or this
		 * variable's number
		 */
		case VAR:
			t[e->varno] = 1;
			return max2 (e->varno, max);
		default: 
			printf ("huh?\n");
			return 0;
	}
}

#define MAXVARS	100

/* return 1 if and only if the expression is satisfiable */

int sat (expr e, 	/* expression */
	int t[], 	/* truth assignments to variables */
	int ex[], 	/* whether a variable exists */
	int i, 		/* current variable index */
	int n) {	/* number of variables *

	int	j;

	/* if we've exceeded number of variables, then we have
	 * a completely specified truth assignment.  see if it
	 * satisfies the expression
	 */
	if (i == n+1) {
		if (eval (e, t)) 
			return 1;
		else
			return 0;
	} else 

	/* if the i'th variable isn't mentioned in the expression,
	 * just go to the i+1 variable 
	 */
		if (!ex[i]) {
			t[i] = 0;
			return sat (e, t, ex, i+1, n);
		}
	else {

		/* try letting i'th variable be 0, see if that works */

		t[i] = 0;
		if (sat (e, t, ex, i+1, n)) return 1;

		/* if not, try 1 */

		t[i] = 1;
		if (sat (e, t, ex, i+1, n)) return 1;

		/* oh well, must be unsatisfiable with this partial
		 * assignment to variables
		 */

		return 0;
	}
}

/* main program */

int main () {
	expr	e;		/* expression to read in */
	int	i, 		/* loop counter */
		nvars,		/* number of variables */
		values[MAXVARS],/* values for each variable */
		exists[MAXVARS];/* whether a variable exists */

	/* get an expression */

	e = get_expr ();

	/* print it out */

	print_expr (e);
	printf ("\n");

	/* assume none of the variables exist */

	for (i=1; i<MAXVARS; i++) exists[i] = 0;

	/* find maximum variable and see which ones exist */

	nvars = find_max_varno (e, exists, 0);

	/* too many variables? */

	if (nvars >= MAXVARS) {
		printf ("too many variables!\n");
		exit (1);
	}
	printf ("%d variables\n", nvars);

	/* see if the expression is satisfiable */

	if (!sat (e, values, exists, 0, nvars)) {

		/* if not satisfiable, say so */

		printf ("no satisfying assignment!\n");
	} else {

		/* otherwise, print the truth assignment found */

		printf ("this assignment satisfies:\n");
		for (i=1; i<=nvars; i++) printf ("v%d = %d\n", i, values[i]);
	}
	return 0;
}