"""This file implements the Wordlist abstract data type.
   A Wordlist is a structure that stores an arbitrary number of
   words.  The main function is to be able to ask whether a word is
   in the wordlist.  Below is the interface:

     Wordlist()                       --> Wordlist object
     addWord( word, f )               --> Wordlist object
     addWordsFromFile( filename, f )  --> Wordlist object
     removeWord( word )               --> Wordlist object
     findWord( word )                 --> Boolean
     len( )                           --> integer

   This is Wordlist Version 1.0, which implements the wordlist
   as a flat unordered list, with linear search. 

"""

import time

######################################################################
#                                                                    #
#                       Flat Unordered Wordlist                      #
#                                                                    #
######################################################################

class Wordlist:
    """A Wordlist stores an arbitrary number of words.  The main
       function is to be able to ask whether or not a word is 
       in the Wordlist.  We can also add and remove words."""

    def __init__(self):
        """Create an empty Wordlist."""
        self._words = []
        self._wordCount = 0

    def __len__(self):
        return self._wordCount

    def isEmpty( self ):
        return not self._words

    def addWord(self, word, f):
        """Add a single word to the Wordlist."""
        if f( word ):
            self._wordCount += 1
            self._words.append( word )
            self._wordCount += 1

    def addWordsFromFile( self, filename, f ):
        """Given an external file containing words, add
           each to the Wordlist.  Assume that the file
           contains one word per line, and that the words
           do not repeat."""
        inputFile = open( filename, 'r' )
        count = 0
        for line in inputFile:
            time1 = time.time()
            word = line.strip()
            self.addWord( word, f )
            # Because the wordlist building is slow, added
            # this to give some indication of progress.
            if f( word ):
                count += 1
                if ( count % 1000 == 0 ):
                    time2 = time.time()
                    print(" Added", count, "words to the wordlist: (%2.5f seconds)" % (time2 - time1))
        inputFile.close()
        # print( self._words )

    def removeWord( self, word ):
        """This removes the first occurrence of the word.
           Assumes only one occurrence."""
        if self.findWord( word ):
            self._words.remove( word )
            self._wordCount -+ 1
        else:
            print("Word", word, "not found in wordlist.")

    def findWord( self, word ):
        """Find a word in the Wordlist via linear search.  This 
           could use the Python in operator, but that would be 
           difficult to meter.  Returns a pair containing the 
           boolean result and the number of comparisons made."""
        for i in range( len( self._words ) ):
            if self._words[i] == word:
                return ( True, i+1 )
        return ( False, len( self._words ) )


######################################################################
#                                                                    #
#                           Hashed Wordlist                          #
#                                                                    #
######################################################################

class WordlistHash (Wordlist):
    """For this version, we store the Wordlist as a hash table."""
    
    # This is a list of the first 40 primes
    PRIMES = [  2,  3,  5,  7, 11, 13, 17, 19, 23, 29, 
                31, 37, 41, 43, 47, 53, 59, 61, 67, 71,
                73, 79, 83, 89, 97, 101, 103, 107, 109, 113,
                127, 131, 137, 139, 149, 151, 157, 163, 167, 173 ]

    # This is the size of our table.  It it turns out we need a prime, can
    # chose 10007 because it's the first prime after 10000.
    TABLESIZE = 10007

    def computeHash ( word, tableSize ):
        """For this version, compute the index i of the character
        in the range [a..z]. Then multiply by the ith prime.  This
        hash will have the attribute that it is indifferent to 
        permuations.  
        """
        hash = 1
        for ch in word:
            i = ord(ch) - ord('a')
            hash *=  WordlistHash.PRIMES[i]
        return hash % tableSize

    def __init__(self):
        Wordlist.__init__( self )
        # Create a hashtable of size TABLESIZE
        self._hashtable = [None] * WordlistHash.TABLESIZE

    def addWord( self, word, f ):
        """For this version, we insert a word into a hash table."""
        if f( word ):
            self._wordCount += 1
            index = WordlistHash.computeHash( word, WordlistHash.TABLESIZE )
            if not self._hashtable[index]:
                self._hashtable[index] = [ word ]
            else:
                self._hashtable[index].append( word )

    def bucketCounts( self ):
        counts = [0] * 25
        for x in self._hashtable:
            if x:
                counts[ len( x ) ] += 1
            else:
                counts[0] += 1
        for i in range(25):
            print( "buckets with", i, "items:", counts[i] )

    def loadFactor( self ):
        """The load factor computes the average length of a bucket,
        but only counts non-empty buckets.  It returns a pair containing
        the number of empty buckets and the load factor on the non-empty
        buckets."""
        countEmpties = 0
        for x in self._hashtable:
            if not x:
                countEmpties += 1
        nonEmpties = WordlistHash.TABLESIZE - countEmpties
        lf = self._wordCount / nonEmpties
        return (countEmpties, lf)

    def findPerm( self, word ):
        """Find any permutation of word in the Wordlist via hashing.  If the hash 
           bucket is empty, return False.  Otherwise, do a linear 
           search on the bucket at that position."""
        # Compute the hash index for the word.
        index = WordlistHash.computeHash( word, WordlistHash.TABLESIZE )

        comparisons = 0
        found = False
        # See if hashbucket is empty
        if not self._hashtable[index]:
            return ( False, 1 )
        else:
            letterlist = sorted( word )
            for w in self._hashtable[index]:
                comparisons += 1
                if sorted( w ) == letterlist:
                    found = w
                    break
        # This returns the word or False
        return (found, comparisons)

        
