Lecture Notes on 6 Aug 2014 def filter_string (st): s = '' for ch in st: if ((ch >= 'a') and (ch <= 'z')): s += ch else: s += ' ' return s def main(): # open the book book = open ('./hard_times.txt', 'r') # create a dictionary for word frequency word_dict = {} # total number of words total_words = 0 # read book line by line for line in book: # remove leading and trailing spaces line = line.strip() # make everything lower case line = line.lower() # filter the line line = filter_string (line) # get the words in the line word_list = line.split() # add words to the dictionary for word in word_list: total_words += 1 if word in word_dict: word_dict[word] = word_dict[word] + 1 else: word_dict[word] = 1 # close the book book.close() # print total number of words print ('Total words used = ', total_words) # print unique number of words num_unique = len (word_dict) print ('Number of unique words = ', num_unique) # ratio of unique words to total words word_ratio = num_unique / total_words print ('Unique words / Total words = ', word_ratio) # print word frequency all_words = list (word_dict.keys()) all_words.sort() for word in all_words: print (word + " : " + str (word_dict[word])) print ("\n") # get distribution according to frequency freq_dict = {} for word in word_dict: freq = word_dict[word] if freq in freq_dict: (freq_dict[freq]).append (word) else: new_list = [] new_list.append (word) freq_dict[freq] = new_list # print according to frequency all_freq = list (freq_dict.keys()) all_freq.sort() all_freq.reverse() for freq in all_freq: print (str(freq) + " : " + str (freq_dict[freq])) main()