# File: Benford.py
# Description: A program that tests Benford's law
# for a given data file
#
#
# Suggested Solution
#
# Date created: 4/9/2012
# Date last modified: 4/9/2012
import math
def getData(fileName):
    data = []
    filevar = open(fileName, 'r')
    for line in filevar:
        data.append(line)
    return data
def getLeadDigitCounts(rawData):
    baseOfNumbers = 10
    counts = [0] * (baseOfNumbers - 1)
    for entry in rawData:
        parts = entry.split('\t')
        num = parts[1][:len(parts[1]) - 1]
        leadingDigit = int(num[0:1])
        if leadingDigit != 0:
            # subtract one because list indices are 0 to 8, but digits are 1 to 9
            counts[leadingDigit - 1] = counts[leadingDigit - 1] + 1
    return counts
def showResults(counts):
    total = 0.0
    # sum the number of data points
    for i in counts:
        total += i
    print 'number of data points:', int(total)
    # for each leading digit print the digit, the number and percentage of
    # data points with that leading digit
    print '\ndigit\tnumber\tpercentage'
    for i in range(9):
        line = str(i + 1) + '\t' + str(counts[i]) + '\t%.1f' % (counts[i] / total * 100)
        print line
def showLeadingDigits(digit, data):
    print '\nShowing data with a leading', digit
    for entry in data:
        indexOfTab = entry.find('\t')
        label = entry[:indexOfTab]
        number = entry[indexOfTab + 1:]
        if number[0] == digit:
            print label, number,
    
def processFile(name):
    data = getData(name)
    counts = getLeadDigitCounts(data)
    showResults(counts)
    digit = raw_input('\nEnter leading digit: ')
    showLeadingDigits(digit, data)
    
    
def main():
    processFile('TexasCountyPop2010.txt')
    processFile('StudentData.txt')
       
main()