"""Computes  for   each  sequence  in  an  MSA   the  distribution  of
nucleotides  that  could  be  expected  at each  position,  given  the
sequence's local similarity to other the sequences."""

class Distributions:

    def __init__(self, sequences, winlength):

        self.winlength = winlength
        self.seqlength = len(sequences[0])
        self.sequences = sequences

        #  Compute local similarities
        self.get_matches()
        self.similarities = []
        for matches in self.matches:
            similarities = []
            self.similarities.append(similarities)
            for match_list in matches:
                simlist = []
                similarities.append(simlist)
                for count in match_list:
                    simlist.append(count/float(self.winlength))

        # Compute distributions
        for seqposidx in range(self.seqlength):

            # Compute the window most centered on this position
            winstart = seqposidx - (self.winlength/2)
            winposidx = max(0, min(len(self.similarities)-1,winstart))
            similarities = self.similarities[winposidx]
            
            for seqidx in len(self.sequences):

                # Distribution of nucleotides for this column and sequence
                distribution = {}
                


    def get_matches(self):

        """Count  the  numbers of  matches  between  sequences in  the
        windows being used to measure local similarity."""

        # Local matches are recorded here.
        self.matches = []

        # Matches in  the current window:  one list for  each sequence
        # but  the  last,  containing  a  count  of  matches  to  each
        # subsequent sequence.
        current_matches = []
        for seqidx in range(1, len(self.sequences)):
            current_matches.append([0]*(len(self.sequences)-seqidx))

        # Get the matches in the first window
        for seqposidx in range(self.winlength):
            for seqidx1, seq in enumerate(self.sequences):
                char1 = seq[seqposidx]
                nxtsq = seqidx1+1
                for seqidx2, seq in enumerate(self.sequences[nxtsq:]):
                    seqidx2 += seqidx1 + 1
                    char2 = seq[seqposidx]
                    if (char1 == char2) and (char1 in 'acgtuACGTU'):
                        current_matches[seqidx1][seqidx2] += 1

        # Record the first set of matches
        import copy
        self.matches.append(copy.deepcopy(current_matches))

        # Iterate over the  remaining windows, recording the variation
        # in matches from one window position to the next.
        for seqposidx in range(1, self.seqlength-winlength):
            for seqidx1, seq in enumerate(sequences):
                firstchar1 = seq[seqposidx-1]
                lastchar1  = seq[seqposidx+winlength]
                for seqidx2, seq in enumerate(sequences[seqidx1+1:]):
                    firstchar2 = seq[posidx-1]
                    lastchar2 = seq[posidx+winlength]
                    if (firstchar1 == firstchar2) and \
                       (firstchar1 in 'acgtuACGTU'):

                        # current_matches is counting a match from the
                        # column  just   proceeding  this  window,  so
                        # uncount it.
                        current_matches[seqidx1][seqidx2] -= 1
                    if (lastchar1 == lastchar2) and \
                       (lastchar1 in 'acgtuACGTU'):

                        # The  column  at the  trailing  edge of  this
                        # window, which has  just been added, contains
                        # a new match.  So count it.
                        current_matches[seqidx1][seqidx2] += 1

            # Record this set of matches
            simcopy = copy.deepcopy(current_matches)
            self.matches.append(simcopy)
            
                
        
