import sys import math USAGE = "USAGE: compute-t.py " ############################################################################## # Turn a space-delimited string of numbers into a list of floats. def makeArray (myString): # Break the string into words. words = myString.split() # Convert the words to floats. returnValue = [] for word in words: returnValue.append(float(word)) return(returnValue) ############################################################################## # Compute the mean of a given list of numbers. def computeMean (myList): sum = 0.0 for value in myList: sum += value return(sum / len(myList)) ############################################################################## # Given the mean, compute the sum-of-squares of a list of numbers. def computeSS (mean, myList): returnValue = 0.0 for value in myList: difference = value - mean returnValue += difference * difference return(returnValue) ############################################################################## # MAIN PROCEDURE ############################################################################## # Parse the command line. if (len(sys.argv) != 2): print USAGE sys.exit(1) # Read each line of the file into an array. myFile = open(sys.argv[1], "r") lines = myFile.readlines() if (len(lines) != 2): print "Too many lines in file." sys.exit(1) firstSet = makeArray(lines[0]) secondSet = makeArray(lines[1]) # Compute the mean of each array. firstMean = computeMean(firstSet) secondMean = computeMean(secondSet) # Compute the sum-of-squares for each array. firstSS = computeSS(firstMean, firstSet) secondSS = computeSS(secondMean, secondSet) # Compute the pooled variance. pooledVariance = (firstSS + secondSS) / (len(firstSet) + len(secondSet) - 2) # Compute the t-statistic and print. # N.B. I didn't include the absolute value, to allow a two-tailed statistic. tStatistic = ((firstMean - secondMean) / math.sqrt((pooledVariance / len(firstSet)) + (pooledVariance / len(secondSet)))) print "%g" % tStatistic