Lecture Notes on 22 Nov 2013 # keeps only letters and removes everything else def filter_string (st): s = '' for ch in st: if ((ch >= 'a') and (ch <= 'z')): s += ch else: s += ' ' return s def main(): # open book book = open ('hard_times.txt', 'r') # create empty set of words word_set = set() # track total number of words total_words = 0 # read book line by line for line in book: # remove leading and trailing spaces line = line.strip() # make everything lower case line = line.lower() # filter the line line = filter_string (line) # get the words in the line word_list = line.split() # add words to set for word in word_list: word_set.add (word) total_words += 1 # close book book.close() # print total number of words print ('Total words used = ', total_words) # print unique number of words num_unique_words = len (word_set) print ('Number of unique words = ', num_unique_words) # ratio of unique words to total words word_ratio = num_unique_words / total_words print ('Unique words / Total words = ', word_ratio) main()