import string

amino_acids = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
               'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']

dna_aa = {'AAA': 'K', 'AAC': 'N', 'AAG': 'K', 'AAT': 'N', \
          'ACA': 'T', 'ACC': 'T', 'ACG': 'T', 'ACT': 'T', \
          'AGA': 'R', 'AGC': 'S', 'AGG': 'R', 'AGT': 'S', \
          'ATA': 'I', 'ATC': 'I', 'ATG': 'M', 'ATT': 'I', \
          'CAA': 'Q', 'CAC': 'H', 'CAG': 'Q', 'CAT': 'H', \
          'CCA': 'P', 'CCC': 'P', 'CCG': 'P', 'CCT': 'P', \
          'CGA': 'R', 'CGC': 'R', 'CGG': 'R', 'CGT': 'R', \
          'CTA': 'L', 'CTC': 'L', 'CTG': 'L', 'CTT': 'L', \
          'GAA': 'E', 'GAC': 'D', 'GAG': 'E', 'GAT': 'D', \
          'GCA': 'A', 'GCC': 'A', 'GCG': 'A', 'GCT': 'A', \
          'GGA': 'G', 'GGC': 'G', 'GGG': 'G', 'GGT': 'G', \
          'GTA': 'V', 'GTC': 'V', 'GTG': 'V', 'GTT': 'V', \
          'TAA': '-', 'TAC': 'Y', 'TAG': '-', 'TAT': 'Y', \
          'TCA': 'S', 'TCC': 'S', 'TCG': 'S', 'TCT': 'S', \
          'TGA': '-', 'TGC': 'C', 'TGG': 'W', 'TGT': 'C', \
          'TTA': 'L', 'TTC': 'F', 'TTG': 'L', 'TTT': 'F'}

stop_codons = ['TAA', 'TAG', 'TGA']

# Incorporate "translations" of codons with unknown nucleotides.
for c in 'ACGT-':
    for d in 'ACGT-':
        for e in 'ACGT-':
            if c == '-' or d == '-' or e == '-':
                dna_aa [c + d + e] = '-'


# Translations from residues to possible codons.
aa_dna = {}
for k in dna_aa.keys():
    aa_dna [dna_aa [k]] = aa_dna.get(dna_aa [k], [])
    aa_dna [dna_aa [k]].append(k)


rc_dict = dict([(f, r) for f, r in zip('ACGT', 'TGCA')])
for f, r in rc_dict.items():
    rc_dict[f.lower()] = r
    
def _reverse_complement(dna_string):

    '''Compute the reverse_complement of a string of DNA.  In pure
    python.'''
    
    complement = [rc_dict.get(c, '-') for c in dna_string]
    complement.reverse()
    return ''.join(complement)

def _translate(seq):
    
    '''Compute the residues for a string of DNA.  In pure python.'''
    
    translation = []
    # Only want to get triples for translation; knock off
    # the odd-men out.
    if len(seq) % 3:
        seq = seq[:-(len(seq) % 3)]
    for codon_start_index in range(0, len(seq), 3):
        codon = seq[codon_start_index: codon_start_index+3]
        translation.append(dna_aa[codon])
    return ''.join(translation)

# C functions for translation and reverse complement.
try:
    from _sequence import translate, reverse_complement
except ImportError:

    import sys
    sys.stderr.write('No C module for sequence manipulation.  \n'
                     'Using python stand-ins.\n')
    translate          = _translate
    reverse_complement = _reverse_complement

def _test_sequence_transforms():

    '''Test the C modules against their python counterparts.'''

    import random
    dna = ''
    protein = ''
    _translations = dna_aa.items()

    for i in range(100):
        translation_pair = random.choice(_translations)
        dna = dna + translation_pair[0]
        protein = protein + translation_pair[1]

    assert translate(dna) == protein

    assert reverse_complement(dna) == \
           _reverse_complement(dna)

_test_sequence_transforms()
    

def translations(sequence):
    translations_ = {}
    complement = reverse_complement(sequence)
    for frame in range(3):
        translations_['f', frame] = translate(sequence[frame:])
        translations_['r', frame] = translate(complement[frame:])
    return translations_


non_nucleotides = list(string.uppercase)
map(non_nucleotides.remove, 'ACGT')
clean_table = string.maketrans(string.join(non_nucleotides, ''),
                                len(non_nucleotides) * '-')
def clean(seq):
    return string.translate(string.upper(seq), clean_table)

