#!/usr/bin/env python # word frequency in a text # tested with Python24 vegaseat 25aug2005 def printf(format, *args): """Python version of printf() in C Language""" print format % args, def tokenizeString(mySentence): """creates a list of words separated at whitespaces""" wordList = mySentence.split() return wordList def cleanString(myWordList, myCharacter2Remove): newWordList = [] for word1 in myWordList: word2 = removeCharacter(word1, myCharacter2Remove) # newWordList.append(word2.lower()) newWordList.append(word2) return newWordList def changeToLower(myWordList): newWordList = [] for word in myWordList: newWordList.append(word.lower()) return newWordList def changeToUpper(myWordList): newWordList = [] for word in myWordList: newWordList.append(word.upper()) return newWordList def removeCharacter(myWord, myCharacter2Remove): """removes the specified characters in each word""" # newWord = "" # for pos in range(len(myWord)): # myChar = myWord[pos] # if myChar in myCharacter2Remove: # pass # else: # newWord += myChar # return newWord for pos in range(len(myCharacter2Remove)): myWord = myWord.replace(myCharacter2Remove[pos], '') return myWord def countFrequency(myList): """creates a wordfrequency dictionary.""" myDictionary = {} for myWord in myList: if len(myWord) != 0: myDictionary[myWord] = myDictionary.get(myWord, 0) + 1 return myDictionary def getSortedKey(myDictionary): """creates a list of keys and sorts the list""" keyList = myDictionary.keys() keyList.sort() return keyList def printDictionary(myKeyList, myDictionary): printf("\n\nFrequency of each word in the sentence (sorted):\n") # for key in myKeyList: # printf("%-10s %d", key, myDictionary[key]) printf("{") for key in myKeyList: printf("'%s':%d, ", key, myDictionary[key]) printf("}") def printByLength(myKeyList, myDictionary, myLengthLimit): printf("\n\nFrequency of each word (whose length >= %d) in the sentence (sorted):\n", myLengthLimit) # for key in myKeyList: # printf("%-10s %d", key, myDictionary[key]) printf("{") for key in myKeyList: if len(key) >= myLengthLimit: printf("'%s':%d, ", key, myDictionary[key]) printf("}") def printByFrequency(myKeyList, myDictionary, myFrequencyLimit): printf("\n\nFrequency of each word (whose frequency >= %d) in the sentence (sorted):\n", myFrequencyLimit) # for key in myKeyList: # printf("%-10s %d", key, myDictionary[key]) printf("{") for key in myKeyList: myFrequency = myDictionary[key] if myFrequency >= myFrequencyLimit: printf("'%s':%d, ", key, myFrequency) printf("}") #=================================== if __name__ == '__main__': myCharacter2Remove = [",", ".", "!", "?", ";", "'", '"'] mySentence = """In this project, we implement a python script that computes frequency of each string in a given sentence. We will exclude some specific characters such as ",", ".", "!", "?", ";", "'", and '"' in a string. We assume that a sentence is already given in the script that you are implementing""" printf("\nSentence = ") printf('"%s"', mySentence) myWordList = tokenizeString(mySentence) myWordList = cleanString(myWordList, myCharacter2Remove) myWordList = changeToLower(myWordList) myDictionary = countFrequency(myWordList) myKeyList = getSortedKey(myDictionary) printDictionary(myKeyList, myDictionary) printByFrequency(myKeyList, myDictionary, 2) printByLength(myKeyList, myDictionary, 10) myWordList = changeToUpper(myWordList) myDictionary = countFrequency(myWordList) myKeyList = getSortedKey(myDictionary) printDictionary(myKeyList, myDictionary) printByFrequency(myKeyList, myDictionary, 2) printByLength(myKeyList, myDictionary, 10) printf("\n\n")