"""New MSA statistical  analysis based on probabilistically reasonable
null hypothesis for nucleotide distributions.  Also, use thermodynamic
analysis to pick only the likely regions for base-pairing."""

from RNA.MSARi.MSA.Jaynes import Sequence
reload(Sequence)
from RNA.MSARi.MSA import clean_MSA, BaseMSA
reload(clean_MSA); reload(BaseMSA)

class MSA(BaseMSA.BaseMSA):

    def __init__(self, sequences, winlength):

        sequences = clean_MSA.clean_MSA(sequences)
        self.winlength = winlength
        self.lengths = len(sequences[0])

        self.sequences = []
        for seqidx, seq in enumerate(sequences):
            otherseqs = sequences[:]
            del otherseqs[seqidx]
            self.sequences.append(Sequence.Sequence(seq, otherseqs, winlength))

    def get_matches(self):

        # Determine which pairs in the sequences to consider.
        chosen_pairs = sets.Set()

        # ...pairs close to chosen  pairs, which therefore do not need
        # to be considered separately.
        pair_neighbors = sets.Set()

        for seq in self.sequences:
            for pos1, pairs in enumerate(seq.pairs):
                for prob, pos2 in pairs:
                    if (prob<0.05) or ((pos1,pos2) in pair_neighbors):
                        continue
                    assert (pos1, pos2) not in chosen_pairs
                    chosen_pairs.add((pos1, pos2))
                    for offset in range(-self.winlength/3, self.winlength/3):
                        pair_neighbors.add((pos1+offset, pos2-offset))
                        pair_neighbors.add((pos2-offset, pos1+offset))

        chosen_pairs = list(chosen_pairs)
        chosen_pairs.sort()

        self.matches = []
        for pairidx, (pos1, pos2) in enumerate(chosen_pairs):
            compsig = self.pair_likelihood(pos1, pos2)
            self.matches.append((compsig, (pos1, pos2)))
        self.matches.sort()

    def pair_likelihood(self, pos1, pos2, display=False):
    
        compsig = 1
        for seq in self.sequences:
            count,p1,p2=seq.best_pairing(pos1,pos2,self.winlength)
            if p1 is not None:
                compsig *= seq.gapped_pairing_prob(
                    p1, p2, self.winlength, count)
        return compsig
                
if __name__ == '__main__':

    from RNA.data.sequences.rnase import sequences
    from RNA.MSARi.MSA.Jaynes.Sequence import cleanseq
    from RNA.MSARi.MSA.Jaynes import MSA
    reload(MSA)
    seqs = map(cleanseq, sequences.values()[:10])
    msa = MSA.MSA(seqs, 10)
    msa.get_matches()
