import sys from Bio import SeqIO # Parse the command line. if (len(sys.argv) != 2): print "USAGE: read-fasta.py " sys.exit(1) fastaFilename = sys.argv[1] # Initialize counters. minLength = 999999999 maxLength = 0 sumLength = 0 numSeqs = 0 # Read the file and increment values. fastaFile = open(fastaFilename, "r") for seq in SeqIO.parse(fastaFile, "fasta"): length = len(seq) if (length < minLength): minLength = length if (length > maxLength): maxLength = length sumLength += length numSeqs += 1 fastaFile.close() # Print print "Number of sequences = %d" % numSeqs print "Number of residues = %d" % sumLength print "Average length = %g" % (float(sumLength) / float(numSeqs)) print "Minimum length = %d" % minLength print "Maximum length = %d" % maxLength