#include "modules.h"

#define MAX_EXON_COUNT 1000
#define MERGE_PARSES   0

void printResults(int stats[50]){
  
  //double pp=stats[8]; // predicted positive
  //double pn=stats[32]-stats[8]; // predicted negative
  //double ap=stats[9]; // actual positive
  //double an=stats[32]-stats[9]; // actual negative
  double tp=stats[10]; // true positive
  double tn=stats[32]-stats[9]-stats[8]+stats[10]; // true negatives
  double fp=stats[8]-stats[10]; // false positives
  double fn=stats[9]-stats[10]; // false negatives
  double ac=((tp/(tp+fn))+(tp/(tp+fp))+(tn/(tn+fp))+(tn/(tn+fn)))/2-1;
  
  cout << "Final Statistics: " << endl; cout << endl;
  // Organized by internal, initial and terminal exons
  
  cout << "Internal Exons" << endl << endl;
  
  cout << "Number of internal coding exons: " << stats[1]-stats[5]-stats[6]-stats[7] << endl;
  cout << "Number of internal coding exons predicted correctly: " << stats[0] << endl;
  cout << "Number of internal coding exons predicted correctly only on 5' end: " << stats[11] << endl;
  cout << "Number of internal coding exons predicted correctly only on 3' end: " << stats[12] << endl;
  cout << "Number of internal coding exons predicted correctly on neither end but partially covered: " << stats[28] << endl;
  cout << endl;
  
  cout << "Initial Exons" << endl << endl;
  cout << "Number of initial coding exons: " << stats[5] << endl;
  cout << "Number of initial coding exons predicted correctly: " << stats[13] << endl;
  cout << "Number of initial coding exons predicted correctly only on 5' end: " << stats[16] << endl;
  cout << "Number of initial coding exons predicted correctly only on 3' end: " << stats[17] << endl;
  cout << "Number of initial coding exons predicted correctly on neither end but partially covered: " << stats[29] << endl;
  cout << endl;
  
  cout << "Terminal Exons" << endl << endl;
  cout << "Number of terminal coding exons: " << stats[6] << endl;
  cout << "Number of terminal coding exons predicted correctly: " << stats[14] << endl;
  cout << "Number of terminal coding exons predicted correctly only on 5' end: " << stats[18] << endl;
  cout << "Number of terminal coding exons predicted correctly only on 3' end: " << stats[19] << endl;
  cout << "Number of terminal coding exons predicted correctly on neither end but partially covered: " << stats[30] << endl;
  cout << endl;
  
  cout << "Single Exon Genes" << endl << endl;
  cout << "Number of genes with one coding exon: " << stats[7] << endl;
  cout << "Number of single gene coding exons predicted correctly: " << stats[15] << endl;
  cout << "Number of single gene coding exons predicted correctly only on 5' end: " << stats[20] << endl;
  cout << "Number of single gene coding exons predicted correctly only on 3' end: " << stats[21] << endl;
  cout << "Number of single coding exons predicted correctly on neither end but partially covered: " << stats[31] << endl;
  cout << endl;
  
  cout << endl;
  cout << "Number of genes: " << stats[3] << endl;
  cout << "Number of perfect genes: " << stats[23] << endl; 
  cout << "Number of coding exons: " << stats[1] << endl;
  cout << "Number of predicted exons: " << stats[2] << endl;
  cout << "Number of coding exons of length greater than 50: " << stats[24] << endl;
  cout << "Number of predicted exons of length greater than 50: " << stats[22] << endl;
  cout << "Number of predicted exons overlapping no coding exon: " << stats[25] << endl;
  cout << "Number of splice sites in noncoding exons that are predicted: " << stats[26] << endl;
  cout << "Number of false negatives completely uncovered: " << stats[27] << endl;
  cout << "Number of nucleotides predicted to be coding: " << stats[8] << endl;
  cout << "Number of nucleotides that are coding: " << stats[9] << endl;
  cout << "Number of nucleotides predicted to be coding that are coding: " << stats[10] << endl;
  cout << "Wrong exons (WE): " << (double)stats[25]/stats[2] << endl;
  cout << "Missing exons (ME): " << (double)stats[27]/stats[1] << endl;
  cout << "Nucleotide sensitivity: " << (double)stats[10]/stats[9] << endl;
  cout << "Nucleotide specificity: " << (double)stats[10]/stats[8] << endl;
  cout << "Nucleotide approximate correlation (AC): " << ac << endl;
  cout << "Exact exon sensitivity: " << (double)(stats[0]+stats[13]+stats[14]+stats[15])/(stats[1]) << endl;
  cout << "Exact exon specificity: " << (double)(stats[0]+stats[13]+stats[14]+stats[15])/(stats[2]) << endl;
  cout << "Covered exon sensitivity: " << (double)(stats[1]-stats[27])/stats[1] << endl;
  cout << "Exact internal exon sensitivity: " << (double)(stats[0])/(stats[1]-stats[5]-stats[6]-stats[7]) << endl;

}

void checkParses(FilterSequence *hseq, int seqnum, int stats[50], int mode, int *exonPredictions) {
  
  // This procedure compares the generated parses with the annotated answers
  // from GENBANK
  
  int i,j;
  long int beginexon, endexon; int frame, trueframe, numberon, completemiss, falseneglength;
  String hlocus = hseq->get_locus();
  
  char houtbuf[100];
  if (mode) {
    strcpy(houtbuf, "/data2/rosetta/output/");
    strcpy(houtbuf+strlen(houtbuf), toa(seqnum)); strcpy(houtbuf+strlen(houtbuf), "_");
    strcpy(houtbuf+strlen(houtbuf), hlocus);      strcpy(houtbuf+strlen(houtbuf), "_Parse");
  }
  else {
    strcpy(houtbuf, hlocus); strcpy(houtbuf+strlen(houtbuf), "_GenscanParse");

    ifstream gsTestSet("/cpp/main/GENSCAN/dir.txt");
    
    int found = 0;
    char testbuf[100];
    while (gsTestSet.good()) {
      gsTestSet >> testbuf;
      if (!strcmp(houtbuf, testbuf)) { 
	found = 1;
	break;
      }
    }
    /*    if (!found) {
	  cout << "\t ...is in GENSCAN train set" << endl;
	  return;
	  }*/
  }

  ifstream parseIn(houtbuf);
  
  int totalcexons=0;
  int perfect=0;
  int totalparseexons=0; 
    
  stats[32]+= hseq->get_length();
  for (j=1; j <= hseq->get_region_num(); j++){
    Region *r = hseq->get_region(j);
    if (r->type==REGION_CEXON){
      stats[1]++;
      if (r->stop-r->start+1>50)
	stats[24]++;
      stats[9]+=r->stop-r->start+1;
      totalcexons++;
    }
  }
  if (totalcexons==1)
    stats[7]++;
  else if (totalcexons>1){
    stats[5]++;
    stats[6]++;
  }
  
  int *falseneg = new int[totalcexons]; // this variable will store information about which regions were covered
  arrayZero(falseneg, totalcexons);
  int *trueParseBegin = new int[totalcexons]; // this variable will store the true exon starts;
  int *trueParseEnd = new int[totalcexons]; // this variable will store the true exon ends;
  arrayZero(trueParseBegin, totalcexons);
  arrayZero(trueParseEnd, totalcexons);
  
  cout << "Annotated Parse: " << endl;
  int countertemp=0;
  for (j=1; j <= hseq->get_region_num(); j++){
    Region *r = hseq->get_region(j);
    if (r->type==REGION_CEXON){
      trueParseBegin[countertemp]=r->start;
      trueParseEnd[countertemp]=r->stop;
      cout << countertemp+1 << ".    (" << r->start << "," << r->stop << ")" << endl;
      countertemp++;
    }
  }
  cout << "-------------" << endl;
  cout << "Mistakes: " << endl;
  
  arrayZero(exonPredictions,100);

  while (parseIn.good()){
    parseIn >> beginexon;  parseIn >> endexon; parseIn >> frame;
    assert(endexon>=beginexon);
    if (beginexon==-1 && endexon ==-1 && frame == -1) continue; // continue if there are no predicted exons
    stats[2]++; // count number of exons predicted
    totalparseexons++;
    if (endexon-beginexon+1>50)
      stats[22]++;
    stats[8]+=endexon-beginexon+1; // count number of nucleotides predicted
    numberon=0; // enumerates which coding region is being examined
    completemiss=0;
    
    for (j=1; j <= hseq->get_region_num(); j++){
      Region *r = hseq->get_region(j);
      if (r->type==REGION_CEXON){
	numberon++;
	stats[10]+=max((long int)0,min(r->stop,endexon)-max(r->start,beginexon)+1); // count number of true nucleotides predicted
	if (max((long int)0,min(r->stop,endexon)-max(r->start,beginexon)+1)==0)
	  completemiss++;
	else
	  falseneg[numberon-1]++; 
      }
	
      if (r->type==REGION_NCEXON){
	if (r->start==beginexon){
	  stats[26]++;
	  cout << "5' Noncoding splice site used at " << beginexon <<  endl;
	}
	if (r->stop==endexon){
	  stats[26]++;
	  cout << "3' Noncoding splice site used at " << endexon << endl;
	}
      }
      
      if (r->type==REGION_CEXON && numberon !=1 && numberon != totalcexons){ // count statistics for internal exons

	if (r->start==beginexon && r->stop==endexon){ // count number of perfect exons
	  stats[0]++;
	  perfect++;
	  exonPredictions[numberon-1] = 1;
	}
	if (r->start==beginexon && r->stop!=endexon){ // count number of exons perfect only on 5' end
	  stats[11]++;
	  cout << "Bad internal   " << "(" << r->start << "," << r->stop << ")" << " -> " << "(*," << endexon << ")" << endl;
	  exonPredictions[numberon-1] += 2;
	}
	if (r->start!=beginexon && r->stop==endexon){ // count number of exons perfect only on 3' end
	  stats[12]++;
	  cout << "Bad internal   " << "(" << r->start << "," << r->stop << ")" << " -> " << "(" << beginexon << ",*)" << endl;
	  exonPredictions[numberon-1] += 4;
	}
	if (r->start!=beginexon && r->stop!=endexon && max((long int)0,min(r->stop,endexon)-max(r->start,beginexon)+1)>0){ // count overlapping exons, match on neither end
	  stats[28]++;
	  cout << "Bad internal   " << "(" << r->start << "," << r->stop << ")" << " -> " << "(" << beginexon << "," << endexon << ")" << endl;
	  exonPredictions[numberon-1] += 64;
	}

	trueframe = (3-r->frame+r->start)%3;
	if (trueframe==frame)
	  stats[4]++;
      }
      
      if (r->type==REGION_CEXON && numberon==1 && totalcexons !=1){ // count statistics for initial exons

	if (r->start==beginexon && r->stop==endexon){ // count number of perfect exons
	  stats[13]++;
	  perfect++;
	  exonPredictions[0] = 1;
	}
	if (r->start==beginexon && r->stop!=endexon){ // count number of exons perfect only on 5' end
	  stats[16]++;
	  cout << "Bad initial    " << "(" << r->start << "," << r->stop << ")" << " -> " << "(*," << endexon << ")" << endl;
	  exonPredictions[0] += 2;
	}
	if (r->start!=beginexon && r->stop==endexon){ // count number of exons perfect only on 3' end
	  stats[17]++;
	  cout << "Bad initial    " << "(" << r->start << "," << r->stop << ")" << " -> " << "(" << beginexon << ",*)" << endl;
	  exonPredictions[0] += 4;
	}
	if (r->start!=beginexon && r->stop!=endexon && max((long int)0,min(r->stop,endexon)-max(r->start,beginexon)+1)>0){ // count overlapping exons, match on neither end
	  stats[29]++;
	  cout << "Bad initial    " << "(" << r->start << "," << r->stop << ")" << " -> " << "(" << beginexon << "," << endexon << ")" << endl;
	  exonPredictions[0] += 8;
	}

      }
      
      if (r->type==REGION_CEXON && numberon==totalcexons && totalcexons !=1){ // count statistics for terminal exons
	
	if (r->start==beginexon && r->stop==endexon){ // count number of perfect exons
	  stats[14]++;
	  perfect++;
	  exonPredictions[numberon-1] = 1;
	}
	if (r->start==beginexon && r->stop!=endexon){ // count number of exons perfect only on 3' end
	  stats[18]++;
	  cout << "Bad terminal   " << "(" << r->start << "," << r->stop << ")" << " -> " << "(*," << endexon << ")" << endl;
	  exonPredictions[numberon-1] += 2;
	}
	if (r->start!=beginexon && r->stop==endexon){ // count number of exons perfect only on 5' end
	  stats[19]++;
	  cout << "Bad terminal   " << "(" << r->start << "," << r->stop << ")" << " -> " << "(" << beginexon << ",*)" << endl;
	  exonPredictions[numberon-1] += 4;
	}
	if (r->start!=beginexon && r->stop!=endexon && max((long int)0,min(r->stop,endexon)-max(r->start,beginexon)+1)>0){ // count overlapping exons, match on neither end
	  stats[30]++;
	  cout << "Bad terminal   " << "(" << r->start << "," << r->stop << ")" << " -> " << "(" << beginexon << "," << endexon << ")" << endl;
	  exonPredictions[numberon-1] += 8;
	}

      }
      
      if (r->type==REGION_CEXON && numberon == 1 && totalcexons == 1){ // count statistics for single exons

	if (r->start==beginexon && r->stop==endexon){ // count number of perfect exons
	  stats[15]++;
	  perfect++;
	  exonPredictions[0] = 1;
	}
	if (r->start==beginexon && r->stop!=endexon){ // count number of exons perfect only on 5' end
	  stats[20]++;
	  cout << "Bad single     " << "(" << r->start << "," << r->stop << ")" << " -> " << "(*," << endexon << ")" << endl;
	  exonPredictions[0] += 2;
	}
	if (r->start!=beginexon && r->stop==endexon){ // count number of exons perfect only on 3' end
	  stats[21]++;
	  cout << "Bad single     " << "(" << r->start << "," << r->stop << ")" << " -> " << "(" << beginexon << ",*)" << endl;
	  exonPredictions[0] += 4;
	}
	if (r->start!=beginexon && r->stop!=endexon && max((long int)0,min(r->stop,endexon)-max(r->start,beginexon)+1)>0){ // count overlapping exons, match on neither end
	  stats[31]++;
	  cout << "Bad single   " << "(" << r->start << "," << r->stop << ")" << " -> " << "(" << beginexon << "," << endexon << ")" << endl;
	  exonPredictions[0] += 8;
	}
      }
    }
    if (completemiss==totalcexons){
      stats[25]++; // The predicted exon did not overlap ANY coding exons in the gene
      cout << "False positive " << "(" << beginexon << "," << endexon << ")" << endl;
    }
  }

  if (perfect==totalcexons && totalcexons==totalparseexons){ // check if gene is perfect
    stats[23]++;
    cout << "None! Perfect Gene" << endl;
  }
  
  int tempbegin, tempend;
  for (j=0; j<totalcexons; j++){
    if (falseneg[j]==0){
      stats[27]++;
      numberon=0;
      falseneglength=0;
      for (i=1; i <= hseq->get_region_num(); i++){
	Region *r = hseq->get_region(i);
	if (r->type==REGION_CEXON)
	  numberon++;
	if (numberon==j+1 && r->type==REGION_CEXON){
	  falseneglength=r->stop-r->start+1; 
	  tempbegin=r->start;
	  tempend=r->stop;
	}
      }
      cout << "False Negative " << "(" << tempbegin << "," << tempend << ")" << endl;
    }
  }
  assert(stats[10]<=stats[9]); // Check that the total predicted correct so far is less than the total so far.
  
  delete[] falseneg;
  delete[] trueParseBegin;
  delete[] trueParseEnd;
  cout << "---------------------------------------" << endl;
}

void createRepeatInformation(FilterSequence *hseq, FilterSequence *mseq, int seqnum, Registry *reg) {
  cout << "creating repeat information" << endl;
  
  int j;
  int revCompH = reg->lookupVal("reverseComplementHuman").contains("yes");
  int revCompM = reg->lookupVal("reverseComplementMouse").contains("yes");
  String hlocus = hseq->get_locus();
  String mlocus = mseq->get_locus();
  
  char hseqbuf[100], mseqbuf[100];
  strcpy(hseqbuf, toa(seqnum));                 strcpy(hseqbuf+strlen(hseqbuf), "_");
  strcpy(mseqbuf, toa(seqnum));                 strcpy(mseqbuf+strlen(mseqbuf), "_");
  if (revCompH)
    strcpy(hseqbuf+strlen(hseqbuf), "RC_");
  if (revCompM)
    strcpy(mseqbuf+strlen(mseqbuf), "RC_");
  strcpy(hseqbuf+strlen(hseqbuf), hlocus); strcpy(hseqbuf+strlen(hseqbuf), "_Repeats_H");
  strcpy(mseqbuf+strlen(mseqbuf), mlocus); strcpy(mseqbuf+strlen(mseqbuf), "_Repeats_M");
  
  ifstream hseqFin(hseqbuf);
  if (hseqFin.good()) 
    cout << "WARNING: repeat file " << hseqbuf << " for human sequence already exist: not computing" << endl;
  else {
    hseqFin.close();
    ofstream hseqFout(hseqbuf); 
    
    vector<int> repeatBeginh, repeatEndh, repeatRegionh, 
      repeatIntronScoreh, repeatExonScoreh;
    vector<String> repeatTypeh;
    
    RepIdentify reph;
    reph.rep_find(hseq,repeatBeginh,repeatEndh,repeatTypeh,
		  repeatRegionh,repeatIntronScoreh,repeatExonScoreh,0);
    
    hseqFout << repeatBeginh.size() << endl; // The number of repeats
    for(j=0;j<repeatBeginh.size();j++) { // output human repeats to a file
      hseqFout << repeatBeginh[j] << "\t" << repeatEndh[j] << "\t"
	       << repeatTypeh[j] << "\t" << (int)((RepeatType)(repeatRegionh[j])) << "\t"
	       << repeatIntronScoreh[j] << "\t" << repeatExonScoreh[j] << endl;
    }
    hseqFout.close();
  }
  
  ifstream mseqFin(mseqbuf);
  if (mseqFin.good()) 
    cout << "WARNING: repeat file " << mseqbuf << " for mouse sequence already exist: not computing" << endl;
  else {
    mseqFin.close();
    ofstream mseqFout(mseqbuf); 
    vector<int> repeatBeginm, repeatEndm, repeatRegionm, 
      repeatIntronScorem, repeatExonScorem;
    vector<String> repeatTypem;
    
    RepIdentify repm;
    repm.rep_find(mseq,repeatBeginm,repeatEndm,repeatTypem,
		  repeatRegionm,repeatIntronScorem,repeatExonScorem,1);
    
    mseqFout << repeatBeginm.size() << endl; // The number of repeats
    for(j=0;j<repeatBeginm.size();j++) { // output mouse repeats to a file
      mseqFout << repeatBeginm[j] << "\t" << repeatEndm[j] << "\t"
	       << repeatTypem[j] << "\t" << (int)((RepeatType)(repeatRegionm[j])) << "\t"
	       << repeatIntronScorem[j] << "\t" << repeatExonScorem[j] << endl;
      
    }
    mseqFout.close();
  }
}

void createFasta(FilterSequence *hseq, int seqnum){
  int i;
  char hseqbuf[100];
  String hlocus = hseq->get_locus();
  strcpy(hseqbuf, toa(seqnum));                 strcpy(hseqbuf+strlen(hseqbuf), "_");
  strcpy(hseqbuf+strlen(hseqbuf), "FASTA");
  ofstream hseqFasta(hseqbuf);
  hseqFasta << ">" << hlocus << endl;
  for (i=1; i<=hseq->get_length(); ++i)
    hseqFasta << nucl2char(hseq->get(i));
}

int createAlignmentRegions(FilterSequence *hseq, FilterSequence *mseq, int seqnum, Registry *reg) {
  cout << "Creating alignment regions" << endl;
  int i,j;

  String hlocus = hseq->get_locus();
  String mlocus = mseq->get_locus();

  int revCompH = reg->lookupVal("reverseComplementHuman").contains("yes");
  int revCompM = reg->lookupVal("reverseComplementMouse").contains("yes");
  char hseqbuf[100], mseqbuf[100];
  strcpy(hseqbuf, toa(seqnum));                 strcpy(hseqbuf+strlen(hseqbuf), "_");
  strcpy(mseqbuf, toa(seqnum));                 strcpy(mseqbuf+strlen(mseqbuf), "_");
  if (revCompH)
    strcpy(hseqbuf+strlen(hseqbuf), "RC_");
  if (revCompM)
    strcpy(mseqbuf+strlen(mseqbuf), "RC_");
  strcpy(hseqbuf+strlen(hseqbuf), hlocus); strcpy(hseqbuf+strlen(hseqbuf), "_Regions_H");
  strcpy(mseqbuf+strlen(mseqbuf), mlocus); strcpy(mseqbuf+strlen(mseqbuf), "_Regions_M");
  
  char halignbuf[100], malignbuf[100];
  strcpy(halignbuf, toa(seqnum));                   strcpy(halignbuf+strlen(halignbuf), "_");
  strcpy(malignbuf, toa(seqnum));                   strcpy(malignbuf+strlen(malignbuf), "_");
  if (revCompH)
    strcpy(halignbuf+strlen(halignbuf), "RC_");
  if (revCompM)
    strcpy(malignbuf+strlen(malignbuf), "RC_");
  strcpy(halignbuf+strlen(halignbuf), hlocus); strcpy(halignbuf+strlen(halignbuf), "_Alignments_H");
  strcpy(malignbuf+strlen(malignbuf), mlocus); strcpy(malignbuf+strlen(malignbuf), "_Alignments_M");

  char hallalignbuf[100], mallalignbuf[100];
  strcpy(hallalignbuf, toa(seqnum));                 strcpy(hallalignbuf+strlen(hallalignbuf), "_");
  strcpy(mallalignbuf, toa(seqnum));                 strcpy(mallalignbuf+strlen(mallalignbuf), "_");
  if (revCompH)
    strcpy(hallalignbuf+strlen(hallalignbuf), "RC_");
  if (revCompM)
    strcpy(mallalignbuf+strlen(mallalignbuf), "RC_");
  strcpy(hallalignbuf+strlen(hallalignbuf), hlocus); strcpy(hallalignbuf+strlen(hallalignbuf), "_Global_Alignment_H");
  strcpy(mallalignbuf+strlen(mallalignbuf), mlocus); strcpy(mallalignbuf+strlen(mallalignbuf), "_Global_Alignment_M");
  
  ifstream halignFin(halignbuf);
  ifstream malignFin(malignbuf);
  if (halignFin.good() && malignFin.good()) {
    cout << "WARNING: alignment files " << halignbuf << ", " << malignbuf << " already exist. Not performing alignments" << endl;
    return 0;
  }
  if (halignFin.good()) cout << "WARNING: will overwrite " << halignbuf << " with alignment to different mouse locus !" << endl;
  if (malignFin.good()) cout << "WARNING: will overwrite " << malignbuf << " with alignment to different human locus !" << endl;

  ofstream hseqFout(hseqbuf); ofstream halignFout(halignbuf);
  ofstream mseqFout(mseqbuf); ofstream malignFout(malignbuf);

  ofstream hallalignfout(hallalignbuf);
  ofstream mallalignfout(mallalignbuf);

  int mseql = mseq->get_length(), hseql = hseq->get_length();
  assert(mseql && hseql);
  fixUnknownNuc(hseq); fixUnknownNuc(mseq);
  
  int *humSeqInt   = new int[hseql+1]; seq2int(hseq, humSeqInt);
  int *mouseSeqInt = new int[mseql+1]; seq2int(mseq, mouseSeqInt);
    
  int *humImg   = new int[hseql+1]; arrayInit(-1,humImg,  hseql+1);
  int *mouseImg = new int[mseql+1]; arrayInit(-1,mouseImg,mseql+1);


  int phaseCount = 9;
  int tupLengths[10]  = { 16, 12, 11, 10,  9,  8,  7,  6,  5 };
  int extnLengths[10] = { 12, 12, 12, 12, 12, 12, 12, 12, 12 };
  int extnCutoffs[10] = {  5, 5,  5,  3,  3,  3,  3,  3,  3 };

//   int phaseCount = 8;
//   int tupLengths[10]  = { 12, 11, 10,  9,  8,  7,  6,  5 };
//   int extnLengths[10] = { 12, 12, 12, 12, 12, 12, 12, 12 };
//   int extnCutoffs[10] = {  5,  5,  3,  3,  3,  3,  3,  3 };
  
//   int phaseCount = 9;
//   int tupLengths[10]  = { 30, 25, 20, 16, 12,  9,  7,  6,  5};
//   int extnLengths[10] = { 12, 12, 12, 12, 12, 12, 13, 13, 13};
//   int extnCutoffs[10] = {  5,  5,  5,  3,  3,  3,  3,  3,  4};
  
  cout << "to start alignments" << endl;
  phaseAlign(hseq, mseq, humSeqInt, mouseSeqInt, phaseCount, tupLengths, extnLengths, extnCutoffs, humImg, mouseImg);

  for (i=1; i<=hseql; ++i) if (humImg[i] > 0) assert(humImg[i] <= mseql && mouseImg[humImg[i]] == i);

  char mouseBuf[100];
  strcpy(mouseBuf,     toa(seqnum));         strcpy(mouseBuf+strlen(mouseBuf), "_");
  if (revCompH)
    strcpy(mouseBuf+strlen(mouseBuf), "RC_");
  strcpy(mouseBuf+strlen(mouseBuf), hlocus); strcpy(mouseBuf+strlen(mouseBuf), "_Visual_Alignment");
  ofstream alignMouse(mouseBuf);
  outputAlignment(alignMouse, hseq, mseq, humImg, mouseImg);
  
  int maxGaplen = 30, minReglen = 25;
  int maxAlignRegNum = MIN(hseql,mseql) / (maxGaplen + minReglen) + 2;
  int *hbegins = new int[maxAlignRegNum], *hends = new int[maxAlignRegNum], *mbegins = new int[maxAlignRegNum], *mends = new int[maxAlignRegNum];
  
  int alignRegcnt = extractAligningRegions(humSeqInt, mouseSeqInt, humImg, mouseImg, hseql, mseql, maxGaplen, minReglen, hbegins, hends, mbegins, mends);
  
  if (VERBOSE) cout << "There are " << alignRegcnt << " aligning regions." << endl;
  for (int areg = 0; areg<alignRegcnt; ++areg) {
    if (VERBOSE) cout << areg << ".  Region (" << hbegins[areg] << "," << hends[areg] << ")\t ->\t (" 
	 << mbegins[areg] << "," << mends[areg] << ")" << endl;
  }
  
  int *beginHreg = new int[alignRegcnt];
  int *endHreg   = new int[alignRegcnt];
  int *beginMreg = new int[alignRegcnt];
  int *endMreg   = new int[alignRegcnt];
  
  for (i=0; i<alignRegcnt; ++i) {
 
    beginHreg[i] = MAX(1,     hbegins[i] - 41);
    endHreg[i]   = MIN(hseql, hends[i]   + 40);
    beginMreg[i] = MAX(1,     mbegins[i] - 41);
    endMreg[i]   = MIN(mseql, mends[i]   + 40);

    hseqFout << ">REG_" << i << "__" << "(" << beginHreg[i] << "," << endHreg[i] << ")" << endl;
    
    int endlCounter = 0;
    for (j = beginHreg[i]; j <= endHreg[i]; ++j) {
      hseqFout << nucl2char(hseq->get(j));
      if (!((++endlCounter)%60)) hseqFout << endl;
    }
    hseqFout << endl;
    
    mseqFout << ">REG_" << i << "__" << "(" << beginMreg[i] << "," << endMreg[i] << ")" << endl;
    endlCounter = 0;
    
    for (j = beginMreg[i]; j <= endMreg[i]; ++j) {
      mseqFout << nucl2char(mseq->get(j));
      if (!((++endlCounter)%60)) mseqFout << endl;
    }
    mseqFout << endl;
  }

  for (i=0; i<alignRegcnt; ++i) {
    
    int *tempHimg = new int[endHreg[i] - beginHreg[i] + 2]; tempHimg[0] = -1;
    int *tempMimg = new int[endMreg[i] - beginMreg[i] + 2]; tempMimg[0] = -1;

    for (j=1; j <= (endHreg[i] - beginHreg[i] + 1); ++j) {
      tempHimg[j] = humImg[j + beginHreg[i] - 1] - beginMreg[i] + 1;
      if (tempHimg[j] <= 0)                            tempHimg[j] = -1;
      if (tempHimg[j] > endMreg[i] - beginMreg[i] + 1) tempHimg[j] = -1;

    }
    for (j=1; j <= (endMreg[i] - beginMreg[i] + 1); ++j) {
      tempMimg[j] = mouseImg[j + beginMreg[i] - 1] - beginHreg[i] + 1;
      if (tempMimg[j] <= 0)                            tempMimg[j] = -1;
      if (tempMimg[j] > endHreg[i] - beginHreg[i] + 1) tempMimg[j] = -1;
    }

    for (j=0; j < (endHreg[i] - beginHreg[i] + 1); ++j)
      if (tempHimg[j] > 0) {
	assert(tempHimg[j] <= endMreg[i] - beginMreg[i] + 1);
	assert(tempMimg[tempHimg[j]] == j);
      }
    halignFout << "REGION: " << beginHreg[i] << "\t" << endHreg[i] << endl;
    for (j=0; j <= (endHreg[i] - beginHreg[i] + 1); ++j) halignFout << tempHimg[j] << " ";
    halignFout << endl;

    malignFout << "REGION: " << beginMreg[i] << "\t" << endMreg[i] << endl;
    for (j=0; j <= (endMreg[i] - beginMreg[i] + 1); ++j) malignFout << tempMimg[j] << " ";
    malignFout << endl;
    
    delete[] tempHimg;
    delete[] tempMimg;
  }

  hallalignfout << "REGION: " << 1 << " " << hseql << endl;
  for (i=0; i<=hseql; ++i) hallalignfout << humImg[i] << " ";
  hallalignfout << endl;
  mallalignfout << "REGION: " << 1 << " " << mseql << endl;
  for (i=0; i<=mseql; ++i) mallalignfout << mouseImg[i] << " ";
  mallalignfout << endl;

  hallalignfout << "ALIGNREGCNT: " << alignRegcnt << endl;
  mallalignfout << "ALIGNREGCNT: " << alignRegcnt << endl;

  for (i=0; i<alignRegcnt; ++i) {
    hallalignfout << "ALIGNREGION: " << hbegins[i] << "\t" << hends[i] << endl;
    mallalignfout << "ALIGNREGION: " << mbegins[i] << "\t" << mends[i] << endl;
  }

  delete[] humSeqInt;
  delete[] mouseSeqInt;
  delete[] humImg;
  delete[] mouseImg;

  delete[] hbegins;
  delete[] hends;
  delete[] mbegins;
  delete[] mends;

  delete[] beginHreg; 
  delete[] endHreg;
  delete[] beginMreg;
  delete[] endMreg;

  return alignRegcnt;
}

void alignmentStatistics(FilterSequence *hseq, FilterSequence *mseq, int seqnum, Registry *reg, ofstream &fout) {
  int i;
  
  int hseql = hseq->get_length();

  char hallalignbuf[100], mallalignbuf[100];
  strcpy(hallalignbuf, toa(seqnum));                      strcpy(hallalignbuf+strlen(hallalignbuf), "_");
  strcpy(mallalignbuf, toa(seqnum));                      strcpy(mallalignbuf+strlen(mallalignbuf), "_");
  strcpy(hallalignbuf+strlen(hallalignbuf), hseq->get_locus()); strcpy(hallalignbuf+strlen(hallalignbuf), "_Global_Alignment_H");
  strcpy(mallalignbuf+strlen(mallalignbuf), mseq->get_locus()); strcpy(mallalignbuf+strlen(mallalignbuf), "_Global_Alignment_M");
  
  ifstream halignFin(hallalignbuf), halignFinCopy(hallalignbuf);
  ifstream malignFin(mallalignbuf), malignFinCopy(mallalignbuf); 
  
  Modules *hmod = new Modules(hseq, reg);
  Modules *mmod = new Modules(mseq, reg);
  
  hmod->setImages(halignFin, malignFin);
  mmod->setImages(malignFinCopy, halignFinCopy);

  int exonCnt = 0;
  for (i=1; i<=hseq->get_region_num(); ++i) if (hseq->get_region(i)->type == REGION_CEXON) exonCnt++;
    
  for (i=20; i<hseql-20; i += 7) {
    int matches = 0, gaps = 0;
    for (int j = i-19; j<=i+19; ++j) {
      if (hmod->humImg[j] == -1) gaps++;
      else if (hseq->get(j) == mseq->get(hmod->humImg[j])) matches++;
    }
    //    Region *r = hseq->nuc2reg(i);
    //    if (r->start <= i-19 && r->stop >= i+19)
    fout /* << (int)hseq->whatRegion(i)*/ << "\t" << matches << "\t" << gaps << "\t" << exonCnt << endl;
  }
}

int similarityCount(int *seq1, int *seq2, int l1, int l2, int *img1, int *img2) {

  int i, count=0;
  if (img1[1]>=0 && seq1[1] == seq2[img1[1]]) count++;
  if (img1[l1]>=0 && seq1[l1] == seq2[img1[l1]]) count++;


  for (i=2; i<l1; ++i) {
    if (img1[i] == 0 && seq1[i] == seq2[0]) count++;
    else if (img1[i] == l2 && seq1[i] == seq2[l2]) count++;
    else {
      assert(img1[i] < l2);
      if (img1[i] > 0 && seq1[i] == seq2[img1[i]] && 
	  img1[i-1] >= 0 && img1[i+1] >= 0 && img2[img1[i]-1] >= 0 && img2[img1[i]+1] >= 0) count++;
    }
  }
  return count;
}

void exonsAlignmentStatistics(FilterSequence *hseq, FilterSequence *mseq, int seqnum, Registry *reg, ofstream &fout) {
  int i,j,k;
  
  int hseql = hseq->get_length();

  int revCompH = reg->lookupVal("reverseComplementHuman").contains("yes");
  int revCompM = reg->lookupVal("reverseComplementMouse").contains("yes");
  char hallalignbuf[100], mallalignbuf[100];
  strcpy(hallalignbuf, "/data2/rosetta/output/");
  strcpy(mallalignbuf, "/data2/rosetta/output/");
  
  strcpy(hallalignbuf+strlen(hallalignbuf), toa(seqnum));  strcpy(hallalignbuf+strlen(hallalignbuf), "_");
  strcpy(mallalignbuf+strlen(mallalignbuf), toa(seqnum));  strcpy(mallalignbuf+strlen(mallalignbuf), "_");
  if (revCompH)
    strcpy(hallalignbuf+strlen(hallalignbuf), "RC_");
  if (revCompM)
    strcpy(mallalignbuf+strlen(mallalignbuf), "RC_");
 
  strcpy(hallalignbuf+strlen(hallalignbuf), hseq->get_locus()); strcpy(hallalignbuf+strlen(hallalignbuf), "_Global_Alignment_H");
  strcpy(mallalignbuf+strlen(mallalignbuf), mseq->get_locus()); strcpy(mallalignbuf+strlen(mallalignbuf), "_Global_Alignment_M");
  
  ifstream halignFin(hallalignbuf), halignFinCopy(hallalignbuf);
  ifstream malignFin(mallalignbuf), malignFinCopy(mallalignbuf); 
  
  Modules *hmod = new Modules(hseq, reg);
  Modules *mmod = new Modules(mseq, reg);
  
  hmod->setImages(halignFin, malignFin);
  mmod->setImages(malignFinCopy, halignFinCopy);

  ifstream correspondingExons("/cpp/main/correspondingExons.txt");
  char buf[100];
  int correspondingE[100]; arrayZero(correspondingE,100);
  int hexonCnt=0;

  for (i=1; i<=hseq->get_region_num(); ++i) 
    if (hseq->get_region(i)->type == REGION_CEXON) hexonCnt++;

  while (correspondingExons.good()) {
    correspondingExons >> buf;
    if (!strcmp(buf,hseq->get_locus())) break;
  }
  if (!strcmp(buf, hseq->get_locus())) {
    cout << "found !" << endl;
    for (i=0; i<hexonCnt; ++i) correspondingExons >> correspondingE[i];
  }


  currAlignPAM = new (double*)[22];
  for (i=0; i<22; ++i) currAlignPAM[i] = new double[22];
  ifstream fmatAlign("/data2/rosetta/tables/PAM/PAM20");
  readPamMatrix(currAlignPAM,fmatAlign);

  hexonCnt=0;
  int mexonCnt = 0;
  double totalNucmatches=0, totalAAmatches=0;
  double hexonLengths[1000], hexonNucmatches[1000], hexonNucmatch2[1000], hexonAAmatches[1000];
  int mexonLengths[1000];
  arrayZero(hexonLengths,    1000);
  arrayZero(hexonNucmatches, 1000);
  arrayZero(hexonNucmatch2,  1000);
  arrayZero(hexonAAmatches,  1000);
  arrayZero(mexonLengths,    1000);

  //  int nucSofar = 0;

  int *hncexon5 = new int[10000], *hncexon3 = new int[10000];
  int *mncexon5 = new int[10000], *mncexon3 = new int[10000];
  int *hncexon5Img = new int[10000], *hncexon3Img = new int[10000], *mncexon5Img = new int[10000], *mncexon3Img = new int[10000];
  int hncexon5ptr = 0, hncexon3ptr = 0, mncexon5ptr = 0, mncexon3ptr = 0;
  int ncexonflag = 0;
  for (i=1; i<=hseq->get_region_num(); ++i) {
    Region *r;
    if ((r = hseq->get_region(i))->type == REGION_CEXON) ncexonflag = 1;
    if ((r = hseq->get_region(i))->type == REGION_NCEXON)
      if (ncexonflag)
	for (k=r->start; k<=r->stop; ++k) hncexon3[++hncexon3ptr] = hseq->get(k);
      else
	for (k=r->start; k<=r->stop; ++k) hncexon5[++hncexon5ptr] = hseq->get(k);
  }
  ncexonflag = 0;
  for (i=1; i<=mseq->get_region_num(); ++i) {
    Region *r;
    if ((r = mseq->get_region(i))->type == REGION_CEXON) ncexonflag = 1;
    if ((r = mseq->get_region(i))->type== REGION_NCEXON)
      if (ncexonflag)
	for (k=r->start; k<=r->stop; ++k) mncexon3[++mncexon3ptr] = mseq->get(k);
      else
	for (k=r->start; k<=r->stop; ++k) mncexon5[++mncexon5ptr] = mseq->get(k);
  }
  if (hncexon5ptr>2 && mncexon5ptr>2) memalign(hncexon5, mncexon5, 1, hncexon5ptr, 1, mncexon5ptr, 1, -1, -3, hncexon5Img, mncexon5Img);
  if (hncexon3ptr>2 && mncexon3ptr>2) memalign(hncexon3, mncexon3, 1, hncexon3ptr, 1, mncexon3ptr, 1, -1, -3, hncexon3Img, mncexon3Img);

  double hncexon5len = MAX(0,hncexon5ptr);
  double hncexon3len = MAX(0,hncexon3ptr);
  double mncexon5len = MAX(0,mncexon5ptr);
  double mncexon3len = MAX(0,mncexon3ptr);
  /*
  double hncexon5matches=0, hncexon3matches=0;

  for (i=1; i<=hncexon5len; ++i)
    if (hncexon5Img[i] > 0 && hncexon5[i] == mncexon5[hncexon5Img[i]]) hncexon5matches+=1.0;
  for (i=1; i<=hncexon3len; ++i)
    if (hncexon3Img[i] > 0 && hncexon3[i] == mncexon3[hncexon3Img[i]]) hncexon3matches+=1.0;
  */
  delete[] hncexon5; delete[] hncexon5Img; delete[] hncexon3; delete[] hncexon3Img;
  delete[] mncexon5; delete[] mncexon5Img; delete[] mncexon3; delete[] mncexon3Img;

  int hnce5Cnt = 0, mnce5Cnt = 0, hnce3Cnt = 0, mnce3Cnt = 0;
  int hnce5lens[10], mnce5lens[10], hnce3lens[10], mnce3lens[10];
  int hnce5Regs[10], mnce5Regs[10], hnce3Regs[10], mnce3Regs[10];
  double hnce5Alignments[10], hnce3Alignments[10];
  arrayZero(hnce5Alignments, 10); arrayZero(hnce3Alignments, 10);

  ncexonflag = 0;
  for (i=1; i<=hseq->get_region_num(); ++i) {
    Region *r = hseq->get_region(i);
    switch(r->type) {
    case REGION_CEXON: ncexonflag = 1; break;
    case REGION_NCEXON:
      if (ncexonflag) {
	hnce3Regs[hnce3Cnt] = i;
	hnce3lens[hnce3Cnt++] = r->stop-r->start + 1;
      }
      else {
	hnce5Regs[hnce5Cnt] = i;
	hnce5lens[hnce5Cnt++] = r->stop-r->start + 1;
      }
      break;
    default: break;
    }
  }
  ncexonflag = 0;
  for (i=1; i<=mseq->get_region_num(); ++i) {
    Region *r = mseq->get_region(i);
    switch(r->type) {
    case REGION_CEXON: ncexonflag = 1; break;
    case REGION_NCEXON:
      if (ncexonflag) {
	mnce3Regs[mnce3Cnt] = i;
	mnce3lens[mnce3Cnt++] = r->stop-r->start + 1;
      }
      else {
	mnce5Regs[mnce5Cnt] = i;
	mnce5lens[mnce5Cnt++] = r->stop-r->start + 1;
      }
      break;
    default: break;
    }
  }

  double hncexon5matches=0, hncexon3matches=0;
  for (i=0; i<hnce5Cnt; ++i) {
    if (hnce5Cnt - i > mnce5Cnt) continue;
    Region *hr = hseq->get_region(hnce5Regs[i]);
    Region *mr = mseq->get_region(mnce5Regs[j = i + (mnce5Cnt-hnce5Cnt)]);
    int *hncexInt = new int[hnce5lens[i]+1], *hncexImg = new int[hnce5lens[i]+1];
    int *mncexInt = new int[mnce5lens[j]+1], *mncexImg = new int[mnce5lens[j]+1];
    arrayZero(hncexInt, hnce5lens[i]+1); arrayZero(hncexImg, hnce5lens[i]+1);
    arrayZero(mncexInt, mnce5lens[j]+1); arrayZero(mncexImg, mnce5lens[j]+1);

    for (k = 1; k<=hnce5lens[i]; ++k) hncexInt[k] = hseq->get(hr->start+k-1);
    for (k = 1; k<=mnce5lens[j]; ++k) mncexInt[k] = mseq->get(mr->start+k-1);

    memalign(hncexInt, mncexInt, 1, hnce5lens[i], 1, mnce5lens[j], 1, -1, -3, hncexImg, mncexImg);

    for (k=1; k<=hnce5lens[i]; ++k)
      if (hncexImg[k]>0 && hncexInt[k] == mncexInt[hncexImg[k]]) hnce5Alignments[i] += 1.0;
    hnce5Alignments[i] = similarityCount(hncexInt, mncexInt, hnce5lens[i], mnce5lens[j], hncexImg, mncexImg);

    hncexon5matches += hnce5Alignments[i];
    hnce5Alignments[i] *= (100.0 / (double) hnce5lens[i]);
    delete[] hncexInt; delete[] hncexImg;
    delete[] mncexInt; delete[] mncexImg;
  }

  for (i=0; i<hnce3Cnt; ++i) {
    if (i >= mnce3Cnt) continue;
    Region *hr = hseq->get_region(hnce3Regs[i]);
    Region *mr = mseq->get_region(mnce3Regs[j=i]);
    int *hncexInt = new int[hnce3lens[i]+1], *hncexImg = new int[hnce3lens[i]+1];
    int *mncexInt = new int[mnce3lens[j]+1], *mncexImg = new int[mnce3lens[j]+1];
    arrayZero(hncexInt, hnce3lens[i]+1); arrayZero(hncexImg, hnce3lens[i]+1);
    arrayZero(mncexInt, mnce3lens[j]+1); arrayZero(mncexImg, mnce3lens[j]+1);

    for (k = 1; k<=hnce3lens[i]; ++k) hncexInt[k] = hseq->get(hr->start+k-1);
    for (k = 1; k<=mnce3lens[j]; ++k) mncexInt[k] = mseq->get(mr->start+k-1);

    memalign(hncexInt, mncexInt, 1, hnce3lens[i], 1, mnce3lens[j], 1, -1, -3, hncexImg, mncexImg);

    for (k=1; k<=hnce3lens[i]; ++k)
      if (hncexImg[k]>0 && hncexInt[k] == mncexInt[hncexImg[k]]) hnce3Alignments[i] += 1.0;
    hnce3Alignments[i] = similarityCount(hncexInt, mncexInt, hnce3lens[i], mnce3lens[j], hncexImg, mncexImg);
	  
    hncexon3matches += hnce3Alignments[i];
    hnce3Alignments[i] *= (100.0 / (double) hnce3lens[i]);
    delete[] hncexInt; delete[] hncexImg;
    delete[] mncexInt; delete[] mncexImg;
  }

  int hnci5Cnt = 0, mnci5Cnt = 0, hnci3Cnt = 0, mnci3Cnt = 0, hintCnt = 0 , mintCnt = 0;
  int hnci5lens[10] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, mnci5lens[10] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
  int hnci3lens[10] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1}, mnci3lens[10] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
  int hintlens[100], mintlens[100]; arrayInit(-1, hintlens, 100); arrayInit(-1, mintlens, 100); 
  int hnci5Regs[10], mnci5Regs[10], hnci3Regs[10], mnci3Regs[10], hintregs[100], mintregs[100];
  double hnci5Alignments[10], hnci3Alignments[10], hintAlignments[100];
  arrayZero(hnci5Alignments, 10); arrayZero(hnci3Alignments, 10); arrayZero(hintAlignments, 100);

  int nciflag = 0;
  for (i=1; i<=hseq->get_region_num(); ++i) {
    Region *r = hseq->get_region(i);
    switch(r->type) {
    case REGION_CEXON:
      if (r == hseq->lastCExon()) nciflag = 2;
      else                        nciflag = 1; 
      break;
    case REGION_INTRON:
      switch (nciflag) {
      case 0:
	hnci5Regs[hnci5Cnt] = i;
	hnci5lens[hnci5Cnt++] = r->length();
	break;
      case 1:
	hintregs[hintCnt] = i;
	hintlens[hintCnt++] = r->length();
	break;
      case 2:
	hnci3Regs[hnci3Cnt] = i;
	hnci3lens[hnci3Cnt++] = r->length();
	break;
      default: break;
      }
    default: break;
    }
  }
  nciflag = 0;
  for (i=1; i<=mseq->get_region_num(); ++i) {
    Region *r = mseq->get_region(i);
    switch(r->type) {
    case REGION_CEXON:
      if (r == mseq->lastCExon()) nciflag = 2;
      else                        nciflag = 1; 
      break;
    case REGION_INTRON:
      switch (nciflag) {
      case 0:
	mnci5Regs[mnci5Cnt] = i;
	mnci5lens[mnci5Cnt++] = r->length();
	break;
      case 1:
	mintregs[mintCnt] = i;
	mintlens[mintCnt++] = r->length();
	break;
      case 2:
	mnci3Regs[mnci3Cnt] = i;
	mnci3lens[mnci3Cnt++] = r->length();
	break;
      default: break;
      }
    default: break;
    }
  }
  for (i=0; i<MAX(hnci5Cnt, mnci5Cnt); ++i) {
    hnci5lens[i] = MAX(0,hnci5lens[i]);
    mnci5lens[i] = MAX(0,mnci5lens[i]);
  }
  for (i=0; i<MAX(hnci3Cnt, mnci3Cnt); ++i) {
    hnci3lens[i] = MAX(0,hnci3lens[i]);
    mnci3lens[i] = MAX(0,mnci3lens[i]);
  }

  double hintrMatches=0, hintrTotlen = 0, mintrTotlen= 0;
  for (i=0; i<hnci5Cnt; ++i) {
    if (hnci5Cnt - i > mnci5Cnt) continue;
    Region *hr = hseq->get_region(hnci5Regs[i]);
    Region *mr = mseq->get_region(mnci5Regs[j = i + (mnci5Cnt-hnci5Cnt)]);
    int *hncixInt = new int[hnci5lens[i]+1], *hncixImg = new int[hnci5lens[i]+1];
    int *mncixInt = new int[mnci5lens[j]+1], *mncixImg = new int[mnci5lens[j]+1];
    arrayZero(hncixInt, hnci5lens[i]+1); arrayZero(hncixImg, hnci5lens[i]+1);
    arrayZero(mncixInt, mnci5lens[j]+1); arrayZero(mncixImg, mnci5lens[j]+1);

    for (k = 1; k<=hnci5lens[i]; ++k) hncixInt[k] = hseq->get(hr->start+k-1);
    for (k = 1; k<=mnci5lens[j]; ++k) mncixInt[k] = mseq->get(mr->start+k-1);

    memalign(hncixInt, mncixInt, 1, hnci5lens[i], 1, mnci5lens[j], 1, -1, -3, hncixImg, mncixImg);

    for (k=1; k<=hnci5lens[i]; ++k)
      if (hncixImg[k]>0 && hncixInt[k] == mncixInt[hncixImg[k]]) hnci5Alignments[i] += 1.0;
    hnci5Alignments[i] = similarityCount(hncixInt, mncixInt, hnci5lens[i], mnci5lens[j], hncixImg, mncixImg);

    hintrMatches += hnci5Alignments[i];
    hintrTotlen  += hnci5lens[i];
    mintrTotlen  += mnci5lens[j];
    hnci5Alignments[i] *= (100.0 / (double) hnci5lens[i]);
    delete[] hncixInt; delete[] hncixImg;
    delete[] mncixInt; delete[] mncixImg;
  }

  for (i=0; i<hnci3Cnt; ++i) {
    if (i >= mnci3Cnt) continue;
    Region *hr = hseq->get_region(hnci3Regs[i]);
    Region *mr = mseq->get_region(mnci3Regs[j=i]);
    int *hncixInt = new int[hnci3lens[i]+1], *hncixImg = new int[hnci3lens[i]+1];
    int *mncixInt = new int[mnci3lens[j]+1], *mncixImg = new int[mnci3lens[j]+1];
    arrayZero(hncixInt, hnci3lens[i]+1); arrayZero(hncixImg, hnci3lens[i]+1);
    arrayZero(mncixInt, mnci3lens[j]+1); arrayZero(mncixImg, mnci3lens[j]+1);

    for (k = 1; k<=hnci3lens[i]; ++k) hncixInt[k] = hseq->get(hr->start+k-1);
    for (k = 1; k<=mnci3lens[j]; ++k) mncixInt[k] = mseq->get(mr->start+k-1);

    memalign(hncixInt, mncixInt, 1, hnci3lens[i], 1, mnci3lens[j], 1, -1, -3, hncixImg, mncixImg);

    for (k=1; k<=hnci3lens[i]; ++k)
      if (hncixImg[k]>0 && hncixInt[k] == mncixInt[hncixImg[k]]) hnci3Alignments[i] += 1.0;
    hnci3Alignments[i] = similarityCount(hncixInt, mncixInt, hnci3lens[i], mnci3lens[j], hncixImg, mncixImg);

    hintrMatches += hnci3Alignments[i];
    hintrTotlen  += hnci3lens[i];
    mintrTotlen  += mnci3lens[j];
    hnci3Alignments[i] *= (100.0 / (double) hnci3lens[i]);
    delete[] hncixInt; delete[] hncixImg;
    delete[] mncixInt; delete[] mncixImg;
  }
  

  for (i=1; i<=hseq->get_region_num(); ++i) {
    if (hseq->get_region(i)->type == REGION_CEXON) {
      Region *exon = hseq->get_region(i);
      hexonLengths[hexonCnt] = exon->length();
      assert(i < hseq->get_region_num());
      
      Region *mexon;
      if (strcmp(hseq->get_locus(), "HSAPOAIA")) {
	int mexonPtr = 0;
	for (j=1; j<=mseq->get_region_num(); ++j)
	  if (mseq->get_region(j)->type == REGION_CEXON) {
	    if (mexonPtr == correspondingE[hexonCnt]) break;
	    mexonPtr++;
	  }
	assert(mexonPtr == correspondingE[hexonCnt]);
	mexon = mseq->get_region(j);
      }
      else continue;

      int hexonl = (int)hexonLengths[hexonCnt], mexonl = mexon->stop - mexon->start+1;
 
      int *hexonInt = new int[hexonl+1];
      int *mexonInt = new int[mexonl+1];
      for (k=1; k<=hexonl; ++k) hexonInt[k] = hseq->get(exon->start + k-1);
      for (k=1; k<=mexonl; ++k) mexonInt[k] = mseq->get(mexon->start + k-1);

      int *hexonImg = new int[hexonl+1], *mexonImg = new int[mexonl+1];
      arrayZero(hexonImg, hexonl+1); arrayZero(mexonImg, mexonl+1);

      memalign(hexonInt, mexonInt, 1, hexonl, 1, mexonl, 1, -1, -3, hexonImg, mexonImg);
      
      for (k = 1; k <= hexonl; ++k)
	if (hexonImg[k] > 0 && hexonInt[k] == mexonInt[hexonImg[k]])
	  hexonNucmatches[hexonCnt] += 1.0;

      hexonNucmatches[hexonCnt] = similarityCount(hexonInt, mexonInt, hexonl, mexonl, hexonImg, mexonImg);
      totalNucmatches += hexonNucmatches[hexonCnt];

      delete[] hexonInt; delete[] hexonImg;
      delete[] mexonInt; delete[] mexonImg;

      Region *rnext = hseq->get_region(i+1);
      if (rnext->type == REGION_INTRON) {
	assert(hintregs[hexonCnt] == i+1);
	assert(j < mseq->get_region_num());
	Region *mrnext = mseq->get_region(j+1);
	if (mrnext->type == REGION_INTRON) {
	  int *hintrInt = new int[rnext->length()+1];
	  int *mintrInt = new int[mrnext->length()+1];
	  for (k=1; k<=rnext->length(); ++k) hintrInt[k] = hseq->get(rnext->start + k-1);
	  for (k=1; k<=mrnext->length(); ++k) mintrInt[k] = mseq->get(mrnext->start + k-1);
	  
	  int *hintrImg = new int[rnext->length()+1], *mintrImg = new int[mrnext->length()+1];
	  arrayZero(hintrImg, rnext->length()+1); arrayZero(mintrImg, mrnext->length()+1);
	  
	  memalign(hintrInt, mintrInt, 1, rnext->length(), 1, mrnext->length(), 1, -1, -3, hintrImg, mintrImg);
	  
	  for (k = 1; k <= rnext->length(); ++k)
	    if (hintrImg[k] > 0 && hintrInt[k] == mintrInt[hintrImg[k]])
	      hintAlignments[hexonCnt] += 1.0;
	  hintAlignments[hexonCnt] = similarityCount(hintrInt, mintrInt, rnext->length(), mrnext->length(), hintrImg, mintrImg);

	  hintrMatches += hintAlignments[hexonCnt];
	  hintrTotlen  += rnext->length();
	  mintrTotlen  += mrnext->length();
	  delete[] hintrInt; delete[] hintrImg;
	  delete[] mintrInt; delete[] mintrImg;
	}
      }



//       for (k = exon->start; k <= exon->stop; ++k)
// 	if (hmod->humImg[k] > 0 && 
// 	    (j==0 || (hmod->humImg[k] >= mexon->start && hmod->humImg[k] <= mexon->stop)) &&
// 	    hseq->get(k) == mseq->get(hmod->humImg[k])) hexonNucmatches[hexonCnt] += 1.0;
//       totalNucmatches += hexonNucmatches[hexonCnt];

      int haastart, maastart, haastop, maastop;
      int haalen, maalen;
      int hstart = exon->start, hstop = exon->stop;
      int mstart = mexon->start, mstop = mexon->stop;
      char *haa    = new char[haalen = (hstop-hstart+3)/3 +1];
      char *maa    = new char[maalen = (mstop-mstart+3)/3 +1];
      int  *haaimg = new int[haalen];
      int  *maaimg = new int[maalen];
      
      int hfr = ( (3 - exon->frame)%3 + hstart )%3;
      int mfr = ( (3 - mexon->frame)%3 + mstart )%3;

      hexonAAmatches[hexonCnt] = 0;
      if (haalen <=3 || maalen <=3)
	totalAAmatches += (hexonAAmatches[hexonCnt] = haalen-1);
      else {
	alignExonPair(hseq, mseq, hstart, hstop, mstart, mstop, hfr, mfr, hmod->humImg, hmod->mouseImg, haa, maa, 
		      haastart, maastart, haastop, maastop, haaimg, maaimg);
	
	assert(!((haastop-haastart+1)%3) && (haastop-haastart+1)/3 < haalen); haalen = (haastop-haastart+1)/3;
	assert(!((maastop-maastart+1)%3) && (maastop-maastart+1)/3 < maalen); maalen = (maastop-maastart+1)/3;
      
	for (j=0; j<haalen; ++j)
	  if (haaimg[j] >=0 && haa[j] == maa[haaimg[j]]) hexonAAmatches[hexonCnt] += 1.0;
	totalAAmatches += hexonAAmatches[hexonCnt];
      }
      hexonCnt++;

      delete[] haa; delete[] maa;
      delete[] haaimg; delete[] maaimg;
    }
  }
  assert(hexonCnt>0);
  for (i=1; i<=mseq->get_region_num(); ++i) if (mseq->get_region(i)->type == REGION_CEXON) {
    mexonLengths[mexonCnt++] = mseq->get_region(i)->length();
  }
  
  // Computing the precent of sequence that aligns overall: #matches inside a "well aligning region";
  int *cumMatches = new int[hseql+1]; arrayZero(cumMatches, hseql+1);
  for (i = 1; i<=hseql; ++i)
    if (hmod->humImg[i] > 0 && hseq->get(i) == mseq->get(hmod->humImg[i]))
      cumMatches[i] = cumMatches[i-1]+1;
    else cumMatches[i] = cumMatches[i-1];
  double totalSeqNucmatches = 0;
  for (i=1; i<=hseql; ++i) {
    if (i>20 && i<hseql-20 && cumMatches[i+20] - cumMatches[i-20] <= 20) continue;
    if (cumMatches[i] > cumMatches[i-1]) totalSeqNucmatches += 1.0;
  }
  // end of computing percent of aligning sequence;
  
  int exonPredictions[100]; arrayZero(exonPredictions,100);
  int stats[50]; arrayZero(stats,50);

  checkParses(hseq, seqnum, stats, 1, exonPredictions);

  fout.precision(4);

  int infoTypeH = 0;
  if (hnce5Cnt) infoTypeH += 1;
  if (hnce3Cnt) infoTypeH += 2;

  int infoTypeM = 0;
  if (mnce5Cnt) infoTypeM += 1;
  if (mnce3Cnt) infoTypeM += 2;

  fout << hseq->get_locus() << "\t" << infoTypeH << "\t" << hseq->get_length() << "\t+" << hintrTotlen << "\t"  << hnce5Cnt << "\t" << hncexon5len;
  for (i=0; i<MAX(hnce5Cnt, mnce5Cnt); ++i) {
    fout << "\t";
    if (mnce5Cnt > hnce5Cnt)
      if (i >= mnce5Cnt - hnce5Cnt) {
	if (hnci5lens[i - mnce5Cnt + hnce5Cnt]>=0)
	  fout << hnci5lens[i - mnce5Cnt + hnce5Cnt] << "+\t";
	fout << hnce5lens[i - mnce5Cnt + hnce5Cnt];
      }
      else fout << 0;
    else {
      fout << hnce5lens[i];
      if (hnci5lens[i]>0) fout << "\t+" << hnci5lens[i];
    }
  }

  fout << "\t" << hexonCnt << "\t" << hmod->codingl << "\t";
  for (i=0; i<hexonCnt; ++i) {
    fout << hexonLengths[i] << "\t";
    if (hintlens[i]>=0) fout << hintlens[i] << "+\t";
  }
  fout << hnce3Cnt << "\t" << hncexon3len;

  for (i=0; i<MAX(hnce3Cnt, mnce3Cnt); ++i) {
    fout << "\t";
    if (i < hnce3Cnt) {
      fout << hnce3lens[i];
      if (hnci3lens[i] >= 0) fout << "\t+" << hnci3lens[i];
    }
    else fout << 0;
  }
  fout << endl;

  fout << mseq->get_locus() << "\t" << infoTypeM << "\t" << mseq->get_length() << "\t+" << mintrTotlen << "\t" << mnce5Cnt << "\t" << mncexon5len;
  for (i=0; i<MAX(hnce5Cnt, mnce5Cnt); ++i) {
    fout << "\t";
    if (hnce5Cnt > mnce5Cnt)
      if (i >= hnce5Cnt - mnce5Cnt) {
	if (mnci5lens[i - hnce5Cnt + mnce5Cnt]>=0) fout << mnci5lens[i - hnce5Cnt + mnce5Cnt] << "+\t";
	fout << mnce5lens[i - hnce5Cnt + mnce5Cnt];
      }
      else fout << 0;
    else {
      fout << mnce5lens[i];
      if (mnci5lens[i] >= 0) fout << "\t+" << mnci5lens[i];
    }
  }
  
  fout<< "\t" << mexonCnt << "\t" << mmod->codingl << "\t";
  for (i=0; i<mexonCnt; ++i) {
    fout << mexonLengths[i] << "\t";
    if (mintlens[i]>=0) fout << mintlens[i] << "+\t";
  }
  fout << mnce3Cnt << "\t" << mncexon3len;

  for (i=0; i<MAX(hnce3Cnt, mnce3Cnt); ++i) {
    fout << "\t";
    if (i < mnce3Cnt) {
      fout << mnce3lens[i];
      if (mnci3lens[i] >= 0) fout << "\t+" << mnci3lens[i];
    }
    else fout << 0;
  }
  fout << endl;

  if (hncexon5len==0) hncexon5len=1;
  if (hncexon3len==0) hncexon3len=1;
  
  fout << "\t\t" << 100*totalSeqNucmatches/MIN((double)hseq->get_length(), (double)mseq->get_length()) << "\t+" 
       << ( hintrTotlen ? (100*hintrMatches/hintrTotlen) : 0 ) << "\t\t" << 100*MAX(0.0,hncexon5matches/(hncexon5len)) << "\t";
  for (i=0; i<MAX(hnce5Cnt, mnce5Cnt); ++i) {
    if (mnce5Cnt > hnce5Cnt)
      if (i >= mnce5Cnt - hnce5Cnt) {
	if (hnci5lens[i - mnce5Cnt + hnce5Cnt]>=0) 
	  fout << ( (hnci5lens[i - mnce5Cnt + hnce5Cnt]) ? hnci5Alignments[i - mnce5Cnt + hnce5Cnt] : 0 ) << "+\t";
	fout << hnce5Alignments[i - mnce5Cnt + hnce5Cnt];
      }
      else fout << 0;
    else {
      fout << hnce5Alignments[i];
      if (hnci5lens[i] >= 0) fout << "\t+" << (hnci5lens[i] ? hnci5Alignments[i - mnce5Cnt + hnce5Cnt] : 0);
    }
    fout << "\t";
  }
  fout << "\t" << 100*totalNucmatches/(double)hmod->codingl << "\t";
  for (i=0; i<hexonCnt; ++i) {
    fout << 100*hexonNucmatches[i]/hexonLengths[i] << "\t";
    if (hintlens[i]>=0) fout << (hintlens[i] ? 100*hintAlignments[i]/hintlens[i] : 0 ) << "+\t";
  }
  fout << "\t" << 100*MAX(0.0,hncexon3matches/hncexon3len);

  for (i=0; i<MAX(hnce3Cnt, mnce3Cnt); ++i) {
    fout << "\t";
    if (i < hnce3Cnt) {
      fout << hnce3Alignments[i];
      if (hnci3lens[i] >= 0) fout << "\t+" << (hnci3lens[i] ? hnci3Alignments[i] : 0);
    }
    else fout << 0;
  }
  fout << endl;
  
  for (i=0; i<MAX(hnce5Cnt, mnce5Cnt); ++i) {
    fout << "\t";
    if (hnci5lens[i]) fout << "\t";
  }
  fout << "\t\t\t\t\t\t\t" << 300*totalAAmatches/(double)hmod->codingl << "\t";
  for (i=0; i<hexonCnt; ++i) fout << 300*hexonAAmatches[i]/hexonLengths[i] << "\t\t";
  fout << endl;

  for (i=0; i<MAX(hnce5Cnt, mnce5Cnt); ++i) {
    fout << "\t";
    if (hnci5lens[i]) fout << "\t";
  }
  fout << "\t\t\t\t\t\t\t\t";
  for (i=0; i<hexonCnt; ++i) fout << exonPredictions[i] << "\t\t";
  fout << endl << endl;

  for (i=0; i<22; ++i) delete[] currAlignPAM[i];
  delete[] currAlignPAM;

}


int parseAligningRegions(String hlocus, String mlocus, int seqnum, Registry *reg, 
			 StatEvaluator *startCodon, StatEvaluator *stopCodon, StatEvaluator *ATGCodon, LongTupleTable *ltt) {

  int i;

  int revCompH = reg->lookupVal("reverseComplementHuman").contains("yes");
  int revCompM = reg->lookupVal("reverseComplementMouse").contains("yes");

  char hseqbuf[100], mseqbuf[100];
  strcpy(hseqbuf, toa(seqnum));                 strcpy(hseqbuf+strlen(hseqbuf), "_");
  strcpy(mseqbuf, toa(seqnum));                 strcpy(mseqbuf+strlen(mseqbuf), "_");
  if (revCompH)
    strcpy(hseqbuf+strlen(hseqbuf), "RC_");
  if (revCompM)
    strcpy(mseqbuf+strlen(mseqbuf), "RC_");
 
  strcpy(hseqbuf+strlen(hseqbuf), hlocus); strcpy(hseqbuf+strlen(hseqbuf), "_Regions_H");
  strcpy(mseqbuf+strlen(mseqbuf), mlocus); strcpy(mseqbuf+strlen(mseqbuf), "_Regions_M");
  
  char halignbuf[100], malignbuf[100];
  strcpy(halignbuf, toa(seqnum));                   strcpy(halignbuf+strlen(halignbuf), "_");
  strcpy(malignbuf, toa(seqnum));                   strcpy(malignbuf+strlen(malignbuf), "_");
  if (revCompH)
    strcpy(halignbuf+strlen(halignbuf), "RC_");
  if (revCompM)
    strcpy(malignbuf+strlen(malignbuf), "RC_");

  strcpy(halignbuf+strlen(halignbuf), hlocus); strcpy(halignbuf+strlen(halignbuf), "_Alignments_H");
  strcpy(malignbuf+strlen(malignbuf), mlocus); strcpy(malignbuf+strlen(malignbuf), "_Alignments_M");

  char hrepbuf[100], mrepbuf[100];
  strcpy(hrepbuf, toa(seqnum));            strcpy(hrepbuf+strlen(hrepbuf), "_");
  strcpy(mrepbuf, toa(seqnum));            strcpy(mrepbuf+strlen(mrepbuf), "_");
  if (revCompH)
    strcpy(hrepbuf+strlen(hrepbuf), "RC_");
  if (revCompM)
    strcpy(mrepbuf+strlen(mrepbuf), "RC_");

  strcpy(hrepbuf+strlen(hrepbuf), hlocus); strcpy(hrepbuf+strlen(hrepbuf), "_Repeats_H");
  strcpy(mrepbuf+strlen(mrepbuf), mlocus); strcpy(mrepbuf+strlen(mrepbuf), "_Repeats_M");
 
  char houtbuf[100];
  strcpy(houtbuf, toa(seqnum));                 strcpy(houtbuf+strlen(houtbuf), "_");
  if (revCompH)
    strcpy(houtbuf+strlen(houtbuf), "RC_");

  strcpy(houtbuf+strlen(houtbuf), hlocus); strcpy(houtbuf+strlen(houtbuf), "_Parse");
  
  ifstream hseqFin(hseqbuf); ifstream halignFin(halignbuf), halignFinCopy(halignbuf);
  ifstream mseqFin(mseqbuf); ifstream malignFin(malignbuf), malignFinCopy(malignbuf); 
  ofstream houtFout(houtbuf);
  
  while(hseqFin.good() && mseqFin.good()) {
    FilterSequence *phseq = new FilterSequence();
    FilterSequence *pmseq = new FilterSequence();
    hseqFin >> *phseq;
    mseqFin >> *pmseq;
    
    ifstream hrepFin(hrepbuf), hrepFinCopy(hrepbuf);
    ifstream mrepFin(mrepbuf), mrepFinCopy(mrepbuf);
    
    if (VERBOSE)    cout << "Read partial hseq with length " << phseq->get_length() << ", mseq with length " << pmseq->get_length() << endl;
    
    int phl = phseq->get_length(), pml = pmseq->get_length();
    
    Modules *hmod = new Modules(phseq, reg);
    Modules *mmod = new Modules(pmseq, reg);

    if (ltt) {
      hmod->computeIntronSignals(ltt);
      mmod->computeIntronSignals(ltt);
    }
    
    hmod->setStatEvaluators(startCodon, stopCodon, ATGCodon);
    mmod->setStatEvaluators(startCodon, stopCodon, ATGCodon);

    hmod->setImages(halignFin,     malignFin);
    mmod->setImages(malignFinCopy, halignFinCopy);
    
    hmod->setRepeats(hrepFin,     mrepFin);
    mmod->setRepeats(mrepFinCopy, hrepFinCopy);

    hmod->computeStopMatrix(reg);    
    mmod->computeStopMatrix(reg);
    
    for (i=0; i<=phl; ++i)
      if (hmod->humImg[i]>0) {
	assert(hmod->humImg[i] <= pml);
	assert(hmod->mouseImg[hmod->humImg[i]] == i);
      }

    if (VERBOSE) cout << "GENERATING PARTIAL PARSE..." << endl;
    
    Parse *p = hmod->mouseGeneratePartialParse(mmod,reg);
    if (revCompH) hmod->mouseOutputParseRevComp(p, houtFout);
    else          hmod->mouseOutputParse(p,houtFout);
    
    delete phseq;
    delete pmseq;
    delete hmod;
    delete mmod;
    delete p;
  }
    
  houtFout << "-1 \t -1 \t -1" << endl;
}

int fileContains(const char *str, const char *filename) {

  ifstream fin(filename);

  char buf[100];

  while (fin.good()) {
    fin >> buf;
    if (!strcmp(buf, str)) return 1;
  }
  return 0;
}


main(int argc, char *argv[]) {

  Registry *reg=new Registry();  // Read in the registry
  ifstream regfile(argv[1]);
  regfile >> *reg;
  regfile.close();
  ifstream humanFile(reg->lookupVal("geneDatabase"));
  ifstream mouseFile(reg->lookupVal("mouseGeneDatabase"));

  int firstSequence   = atoi(reg->lookupVal("firstSequence"));
  int lastSequence    = atoi(reg->lookupVal("lastSequence"));

  int i, j, k;
  int stats[50];  arrayInit(0,stats,50);   // Contains statistics on performace

  cout << "Parse Assumptions: " << atoi(reg->lookupVal("ParseAssumptions")) << endl;
  
  // stats[0] = number of predicted exons that are internal and perfect on both ends
  // stats[1] = total number of coding exons
  // stats[2] = total number of predicted exons
  // stats[3] = total number of genes
  // stats[4] = total number of frames predicted correctly
  // stats[5] = total number of initial exons that are not terminal
  // stats[6] = total number of terminal exons that are not initial
  // stats[7] = total number of coding exons that are both initial and terminal (single exon genes)
  // stats[8] = total number of nucleotides predicted to be coding
  // stats[9] = total number of nucleotides that are coding
  // stats[10] = total number of nucleotides predicted to be coding that are coding
  // stats[11] = total number of predicted exons that match on 5' end (internal)
  // stats[12] = total number of predicted exons that match on 3' end (internal)
  // stats[13] = number of predicted exons that are initial and perfect on both ends
  // stats[14] = number of predicted exons that are terminal and perfect on both ends
  // stats[15] = number of predicted exons that are in single exon genes and perfect on both ends
  // stats[16] = total number of predicted exons that match on 5' end (initial)
  // stats[17] = total number of predicted exons that match on 3' end (initial)
  // stats[18] = total number of predicted exons that match on 5' end (terminal)
  // stats[19] = total number of predicted exons that match on 3' end (terminal)
  // stats[20] = total number of predicted exons that match on 5' end (single)
  // stats[21] = total number of predicted exons that match on 3' end (single)
  // stats[22] = total number of predicted exon of length > 50 
  // stats[23] = total number of perfect genes
  // stats[24] = total number of coding exons of length > 50
  // stats[25] = total number of false positives that do not overlap any coding exon
  // stats[26] = total number of splice sites predicted that are the boundaires of noncoding exons
  // stats[27] = total number of false negatives completely uncovered
  // stats[28] = total number of predicted exon that match on neither end by overlap (internal)
  // stats[29] = total number of predicted exon that match on neither end by overlap (initial)
  // stats[30] = total number of predicted exon that match on neither end by overlap (terminal)
  // stats[31] = total number of predicted exon that match on neither end by overlap (single)
  // stats[32] = total number of nucleotides examined
  
  FilterSequence *hseq = new FilterSequence();
  FilterSequence *mseq = new FilterSequence();
  for(i=0;i<(firstSequence-1);i++) humanFile >> *hseq;
  for(i=0;i<(firstSequence-1);i++) mouseFile >> *mseq;
  
  ifstream lttfin(reg->lookupVal("IntronSignalsTable"));
  cout << " I S  file: " << reg->lookupVal("IntronSignalsTable") << endl;
  LongTupleTable *ltt = NULL;
  if (lttfin.good()) {
    ltt = new LongTupleTable();
    ltt->set_lengths(9,3);
    lttfin >> *ltt;
    cout << "INTRON SIGNALS LOADED" << endl;
  }
  
  StatEvaluator *startCodon = new StatEvaluator();
  StatEvaluator *stopCodon  = new StatEvaluator();
  StatEvaluator *ATGCodon   = new StatEvaluator();
  
  ifstream acceptorTrainFile("/data2/rosetta/tables/train_acceptor_outfile");
  ifstream donorTrainFile("/data2/rosetta/tables/train_donor_outfile");
  ifstream ATGfin("/data2/rosetta/tables/train_ATG_outfile");
  
  startCodon->initializeBurge(acceptorTrainFile, AG_S);
  stopCodon->initializeBurge(donorTrainFile,     GT_S);
  ATGCodon->initializeBurge(ATGfin,              ATG_S);

  //ofstream alignStatFout("whitemouseAlignStatistics.txt");
  ofstream exonStatFout("/data2/rosetta/output/exonStatFout.txt");
  
  int revCompH = reg->lookupVal("reverseComplementHuman").contains("yes");
  int revCompM = reg->lookupVal("reverseComplementMouse").contains("yes");

  for(i=firstSequence;i<lastSequence;i++) {

    humanFile >> *hseq;
    mouseFile >> *mseq;

    String s=hseq->get_locus();
    String sm=mseq->get_locus();
    

    if (MERGE_PARSES) {
      cout << "MERGING PARSES, sequence " << i << endl;

      char bufForward[100];
      strcpy(bufForward, toa(i)); strcpy(bufForward+strlen(bufForward), "_");
      strcpy(bufForward+strlen(bufForward), s); strcpy(bufForward+strlen(bufForward), "_Parse");

      cout << "BufForward: " << bufForward << endl;

      char bufFinal[100];
      strcpy(bufFinal, toa(i)); strcpy(bufFinal+strlen(bufFinal), "_");
      strcpy(bufFinal+strlen(bufFinal), s); strcpy(bufFinal+strlen(bufFinal), "_FinalParse");
      ofstream finalParse(bufFinal);
      
      ifstream forwardf(bufForward);
      
      char bufReverse[100];
      strcpy(bufReverse, toa(i)); strcpy(bufReverse+strlen(bufReverse), "_");
      strcpy(bufReverse+strlen(bufReverse), "RC_");
      strcpy(bufReverse+strlen(bufReverse), s); strcpy(bufReverse+strlen(bufReverse), "_Parse");
      
      cout << "BufRC: " << bufReverse << endl;

      ifstream reversef(bufReverse);
      
      int beginExon[MAX_EXON_COUNT],   endExon[MAX_EXON_COUNT],   frame[MAX_EXON_COUNT];
      int beginExonFw[MAX_EXON_COUNT], endExonFw[MAX_EXON_COUNT], frameFw[MAX_EXON_COUNT];
      int beginExonRc[MAX_EXON_COUNT], endExonRc[MAX_EXON_COUNT], frameRc[MAX_EXON_COUNT];
      int directions[MAX_EXON_COUNT];
      int exonCount = 0, exonCountFw = 0, exonCountRc = 0;
    
      while (forwardf.good()) {
	forwardf >> beginExonFw[exonCountFw]; forwardf >> endExonFw[exonCountFw]; forwardf >> frameFw[exonCountFw];
	if (beginExonFw[exonCountFw] == -1) break;
	else exonCountFw++;
      }
      while (reversef.good()) {
	reversef >> endExonRc[exonCountRc]; reversef >> beginExonRc[exonCountRc]; reversef >> frameRc[exonCountRc];
	if (endExonRc[exonCountRc] == -1) break;
	else exonCountRc++;
      }
      int WINDOW_SZ = 2000;
      int fw_cover = 0, rc_cover = 0;
      
      cout << "ExonCountFw: " << exonCountFw << "\t ExonCountRc: " << exonCountRc << endl;

      for (j=0; j<exonCountFw; ++j) {
	fw_cover = rc_cover = 0;
	
	for (k=0; k<exonCountFw; ++k)
	  if (endExonFw[k] <= endExonFw[j])
	    fw_cover += MAX(0, endExonFw[k] - MAX(beginExonFw[j]-WINDOW_SZ, beginExonFw[k]));
	  else
	    fw_cover += MAX(0, MIN(endExonFw[j]+WINDOW_SZ, endExonFw[k]) - beginExonFw[k]);

	for (k=0; k<exonCountRc; ++k)
	  if (endExonRc[k] <= endExonFw[j])
	    rc_cover += MAX(0, endExonRc[k] - MAX(beginExonFw[j]-WINDOW_SZ, beginExonRc[k]));
	  else
	    rc_cover += MAX(0, MIN(endExonFw[j]+WINDOW_SZ, endExonRc[k]) - beginExonRc[k]);
	
	if (fw_cover > rc_cover) {
	  beginExon[exonCount]    = beginExonFw[j];
	  endExon[exonCount]      = endExonFw[j];
	  frame[exonCount]        = frameFw[j];
	  directions[exonCount++] = 1;
	}
      }

      for (j=0; j<exonCountRc; ++j) {
	fw_cover = rc_cover = 0;
	
	for (k=0; k<exonCountFw; ++k)
	  if (endExonFw[k] <= endExonRc[j])
	    fw_cover += MAX(0, endExonFw[k] - MAX(beginExonRc[j]-WINDOW_SZ, beginExonFw[k]));
	  else
	    fw_cover += MAX(0, MIN(endExonRc[j]+WINDOW_SZ, endExonFw[k]) - beginExonFw[k]);

	for (k=0; k<exonCountRc; ++k)
	  if (endExonRc[k] <= endExonRc[j])
	    rc_cover += MAX(0, endExonRc[k] - MAX(beginExonRc[j]-WINDOW_SZ, beginExonRc[k]));
	  else
	    rc_cover += MAX(0, MIN(endExonRc[j]+WINDOW_SZ, endExonRc[k]) - beginExonRc[k]);
	
	if (fw_cover < rc_cover) {
	  beginExon[exonCount]    = beginExonRc[j];
	  endExon[exonCount]      = endExonRc[j];
	  frame[exonCount]        = frameRc[j];
	  directions[exonCount++] = -1;
	}
      }
      int changeSort = 0;
      do {
	changeSort = 0;
	for (j=0; j<exonCount-1; ++j) {
	  assert(beginExon[j] < endExon[j]);
	  if (beginExon[j] > beginExon[j+1]) {
	    int be = beginExon[j], ee = endExon[j], fe = frame[j], de = directions[j];
	    beginExon[j]  = beginExon[j+1];   beginExon[j+1]  = be;
	    endExon[j]    = endExon[j+1];     endExon[j+1]    = ee;
	    frame[j]      = frame[j+1];       frame[j+1]      = fe;
	    directions[j] = directions[j+1];  directions[j+1] = de;
	    changeSort = 1;
	  }
	}
      } while (changeSort);

      for (j=0; j<exonCount; ++j) {
	cout << j << ".\t" << beginExon[j] << "," << endExon[j] << "\t" 
	     << frame[j] << "\t" << directions[j] << endl;
	
	finalParse << beginExon[j] << "\t" << endExon[j] << "\t" << frame[j] << "\t" << directions[j] << endl;
      }
      finalParse << "-1\t-1\t-1\t-1\n";
      
      continue;
    }
    if (revCompH)
      hseq->reverse_complement();
    if (revCompM)
      mseq->reverse_complement();
    
    
    if (s=="HSALDOA") // skip gene with masked mouse
      continue;
    if (s=="HSU11870") // interleukin's don't match.
      continue;
    //    if (s=="HUMCAPG") // GC in mouse splice site
    //  continue;
    if (s=="HSU70065") // Screwed up annotations for splice sites
      continue;
    if (s=="HUMHEN1A") // Gene ends on last bp - artificial
      continue;
    if (s=="HSHISTN15") // Histone doesn't match
      continue;
    cout << "Processing gene " << i << ". HUMAN LOCUS: " << s << "; MOUSE LOCUS: " << sm <<  endl; stats[3]++;
    
    //    alignmentStatistics(hseq, mseq, i, reg, alignStatFout);
    //    continue;
    
    //exonsAlignmentStatistics(hseq, mseq, i, reg, exonStatFout);    
    //continue;
    
    int mseql = mseq->get_length(), hseql = hseq->get_length();
    assert(mseql && hseql);
    fixUnknownNuc(hseq); fixUnknownNuc(mseq);
    
    if (reg->lookupVal("createFasta").contains("yes"))
      createFasta(hseq,i);
    if (reg->lookupVal("createRepeatInformation").contains("yes"))   
      createRepeatInformation(hseq, mseq, i, reg);
    if (reg->lookupVal("createAlignmentRegions").contains("yes"))   
      createAlignmentRegions(hseq, mseq, i, reg);
    if (reg->lookupVal("parseAligningRegions").contains("yes"))   
      parseAligningRegions(hseq->get_locus(), mseq->get_locus(), i, reg, 
			   startCodon, stopCodon, ATGCodon, ltt);
  
    if (reg->lookupVal("parseAllSequence").contains("yes")) {
      cout << "Parsing all sequence, locuses: " << hseq->get_locus() << ", " << mseq->get_locus() << endl;
      char hallalignbuf[100], mallalignbuf[100];
      strcpy(hallalignbuf, toa(i));          strcpy(hallalignbuf+strlen(hallalignbuf), "_");
      strcpy(mallalignbuf, toa(i));          strcpy(mallalignbuf+strlen(mallalignbuf), "_");
      if (revCompH)
	strcpy(hallalignbuf+strlen(hallalignbuf), "RC_");
      if (revCompM)
	strcpy(mallalignbuf+strlen(mallalignbuf), "RC_");

      strcpy(hallalignbuf+strlen(hallalignbuf), hseq->get_locus()); 
      strcpy(hallalignbuf+strlen(hallalignbuf), "_Global_Alignment_H");
      strcpy(mallalignbuf+strlen(mallalignbuf), mseq->get_locus()); 
      strcpy(mallalignbuf+strlen(mallalignbuf), "_Global_Alignment_M");

      char hrepbuf[100], mrepbuf[100];
      strcpy(hrepbuf, toa(i));                 strcpy(hrepbuf+strlen(hrepbuf), "_");
      strcpy(mrepbuf, toa(i));                 strcpy(mrepbuf+strlen(mrepbuf), "_");
      if (revCompH)
	strcpy(hrepbuf+strlen(hrepbuf), "RC_");
      if (revCompM)
	strcpy(mrepbuf+strlen(mrepbuf), "RC_");

      strcpy(hrepbuf+strlen(hrepbuf), hseq->get_locus()); strcpy(hrepbuf+strlen(hrepbuf), "_Repeats_H");
      strcpy(mrepbuf+strlen(mrepbuf), mseq->get_locus()); strcpy(mrepbuf+strlen(mrepbuf), "_Repeats_M");

      char houtbuf[100];
      strcpy(houtbuf, toa(i));                 strcpy(houtbuf+strlen(houtbuf), "_");
      if (revCompH)
	strcpy(houtbuf+strlen(houtbuf), "RC_");
      
      strcpy(houtbuf+strlen(houtbuf), hseq->get_locus()); strcpy(houtbuf+strlen(houtbuf), "_Parse");
      
      ofstream houtfout(houtbuf);
      ifstream halignFin(hallalignbuf), halignFinCopy(hallalignbuf);
      ifstream malignFin(mallalignbuf), malignFinCopy(mallalignbuf); 
      ifstream hrepFin(hrepbuf), hrepFinCopy(hrepbuf);
      ifstream mrepFin(mrepbuf), mrepFinCopy(mrepbuf);

      cout << "Creating modules" << endl;
      Modules *hmod = new Modules(hseq, reg);
      Modules *mmod = new Modules(mseq, reg);
      cout << "created modules" << endl;

      if (ltt) {
	hmod->computeIntronSignals(ltt);
	mmod->computeIntronSignals(ltt);
      }
      hmod->setStatEvaluators(startCodon, stopCodon, ATGCodon);
      mmod->setStatEvaluators(startCodon, stopCodon, ATGCodon);

      hmod->setImages(halignFin, malignFin);
      mmod->setImages(malignFinCopy, halignFinCopy);
      
      cout << "setting human repeats from " << hrepbuf << ", " << mrepbuf << endl;
      hmod->setRepeats(hrepFin,     mrepFin);
      cout << "setting mouse repeats from " << hrepbuf << ", " << mrepbuf << endl;
      mmod->setRepeats(mrepFinCopy, hrepFinCopy);
      

      ofstream temphuman("temphuman.txt");
      ofstream tempmouse("tempmouse.txt");
      
      hmod->setAligningRegions(halignFin);
      mmod->setAligningRegions(malignFin);

      cout << "computing stop matrices" << endl;
      hmod->computeStopMatrix(reg);
      mmod->computeStopMatrix(reg);

      hmod->outputValidPositions(temphuman);
      mmod->outputValidPositions(tempmouse);
      
      for (int k=0; k<=hseql; ++k)
	if (hmod->humImg[k]>0) {
	  assert(hmod->humImg[k] <= mseql);
	  assert(hmod->mouseImg[hmod->humImg[k]] == k);
	}

      cout << "GENERATING PARSE..." << endl;
      
      Parse *p = hmod->mouseGeneratePartialParse(mmod,reg);
      if (revCompH) hmod->mouseOutputParseRevComp(p,houtfout);
      else          hmod->mouseOutputParse(p,houtfout);
      
      houtfout << "-1 \t -1 \t -1" << endl;
      
      delete hmod;
      delete mmod;
      delete p;
      cout << "***  done" << endl;
    }
    
    if (reg->lookupVal("checkParses").contains("yes")) {
      int exonPredictions[100];
      checkParses(hseq,i,stats,1,exonPredictions);
    }
  }
  printResults(stats);
  
}
















