#include "modules.h"
#include <strstream.h>

int main(int argc, char *argv[]) {

  if (argc<3) {
    cout << "USAGE: " << argv[0] << " <temp.regfile> <registry> <outputfile> [key=value...]" << endl;
    exit(1);
  }

  int i;

  Registry *tempReg=new Registry();
  ifstream tempIn(argv[1]);
  tempIn >> *tempReg;
  tempIn.close();

  Registry *reg=new Registry();  // Read in the registry
  ifstream regfile(argv[2]);
  regfile >> *reg;
  regfile.close();

  ifstream fin_seq(reg->lookupVal("geneDatabase")); // Load the gene and spit it out

  if (argc<3) {
    cout << "USAGE: <registry file> <output file> [optional key=value]" << endl;
    return 0;
  }
  String args; // Load any command line args.
  for(i=3;i<argc;args+=argv[i++]+(String)("\n")) reg->erase(((String)argv[i]).before("="));
  istrstream argstream(args);
  argstream >> *reg;
  if ((atoi(tempReg->lookupVal("firstSequence"))==0)||
      (atoi(tempReg->lookupVal("lastSequence"))==0)) {
    tempReg->change("firstSequence",reg->lookupVal("firstSequence"));
    tempReg->change("lastSequence",reg->lookupVal("lastSequence"));
  }
  ifstream seqfile(reg->lookupVal("geneDatabase")); // Load the gene and spit it out
  FilterSequence *fseq=new FilterSequence();
  if (VERBOSE) cout << "Reading " << reg->lookupVal("geneDatabase") << "... " << endl;

  ifstream in(reg->lookupVal("tupleTable").chars());
  in.close();
 
  int firstSequence   = atoi(tempReg->lookupVal("firstSequence"));
  int lastSequence    = atoi(tempReg->lookupVal("lastSequence"));
  for(i=0;i<(firstSequence-1);i++) seqfile >> *fseq;

  int totalOverlap    = atoi(tempReg->lookupVal("totalOverlap"));
  int totalNonOverlap = atoi(tempReg->lookupVal("totalNonOverlap"));
  int totalPerfect    = atoi(tempReg->lookupVal("totalPerfect"));
  int totalMatch      = atoi(tempReg->lookupVal("totalMatch"));
  int totalMiss       = atoi(tempReg->lookupVal("totalMiss"));
  int totalExons      = atoi(tempReg->lookupVal("totalExons"));
  int totalPExons     = atoi(tempReg->lookupVal("totalPExons"));

  vector<int>    *totalFrame = new vector<int>;
  vector<int>    *nucData    = new vector<int>;
  vector<double> *GCInfo     = new vector<double>;

  for(i=0;i<12;i++)
    totalFrame->push_back(atoi(tempReg->lookupVal(((String)"totalFrame")+toa(i))));
  for(i=0;i<8;i++)
    GCInfo->push_back(atoi(tempReg->lookupVal(((String)"GCInfo")+toa(i))));
  for(i=0;i<20;i++)
    nucData->push_back(atoi(tempReg->lookupVal(((String)"nucData")+toa(i))));

  int proteinMatch=0;
  int usingMethod1 = 0, usingMethod2 = 0;;
  //ofstream parsefile("parses.out");

  for(i=firstSequence;i<lastSequence;i++) {
    cout << "Processing gene: " << i << endl;

    verb("Updating registry...");    
    tempReg->change("firstSequence",toa(i));
    reg->change("sequenceNumber",toa(i));
    if (i==firstSequence) 
      tempReg->change("loopCount",toa(atoi(tempReg->lookupVal("loopCount"))+1));
    else tempReg->change("loopCount","0");
    if (atoi(tempReg->lookupVal("loopCount"))>2) {
      tempReg->change("totalSkipped",toa(atoi(tempReg->lookupVal("totalSkipped"))+1));
      verb("Loop detected - skipping sequence!!!");
      continue;
    }
    tempReg->change("totalOverlap",    toa(totalOverlap));
    tempReg->change("totalNonOverlap", toa(totalNonOverlap));
    tempReg->change("totalPerfect",    toa(totalPerfect));
    tempReg->change("totalMatch",      toa(totalMatch));
    tempReg->change("totalMiss",       toa(totalMiss));
    tempReg->change("totalExons",      toa(totalExons));
    tempReg->change("totalPExons",     toa(totalPExons));
    for(int j=0;j<12;j++) tempReg->change(((String)"totalFrame")+ toa(j), toa((*totalFrame)[j]));
    for(int j=0;j<8;j++)  tempReg->change(((String)"GCInfo")    + toa(j), toa((*GCInfo)[j]));
    for(int j=0;j<20;j++) tempReg->change(((String)"nucData")   + toa(j), toa((*nucData)[j]));

    ofstream tempOut(argv[1]);
    tempOut << *tempReg;
    tempOut.close();

    seqfile >> *fseq;
    cout << "Locus: " << fseq->get_locus() << endl;

    if (!strlen(fseq->get_locus())) reg->add("error","Bad sequence");
    
    if (reg->lookupVal("showSequence").contains("yes",0)) cout << *fseq;

    Modules* module=new Modules(fseq,reg);
    module->computeDictionaryInfo(reg);
    
    { // Start, Stop, ATG code;
      StatEvaluator *startCodon = new StatEvaluator();
      StatEvaluator *stopCodon  = new StatEvaluator();
      StatEvaluator *ATGCodon   = new StatEvaluator();
      ifstream acceptorTrainFile("/data2/rosetta/tables/train_acceptor_outfile");
      ifstream donorTrainFile("/data2/rosetta/tables/train_donor_outfile");
      ifstream ATGfin("/data2/rosetta/tables/train_ATG_outfile");
      
      startCodon->initializeBurge(acceptorTrainFile, AG_S);
      stopCodon->initializeBurge(donorTrainFile,     GT_S);
      ATGCodon->initializeBurge(ATGfin,              ATG_S);

      module->setStatEvaluators(startCodon, stopCodon, ATGCodon);
    }

    module->computeStopMatrix(reg);
    module->loadTupleTable(reg->lookupVal("tupleTable").chars(),reg); // Load tt
    module->computeWindowFrameScores(reg);
    module->computeIntronExonProb();
    module->frameScoreMatrixFind(reg); // Calculate frameScore matrix
    
    Parse* parse;

    parse=module->newGenerateParse(reg);
    
    module->outputParse(parse,reg,totalOverlap,totalNonOverlap,
			totalPerfect,totalMatch,totalMiss,totalExons,totalPExons,
			nucData,totalFrame,GCInfo);
    
    delete module;
    delete parse;
  }
  
  // DONE WITH LOOP
  
  tempReg->change("firstSequence",toa(lastSequence));
  
  ofstream tempRegFile(argv[1]);
  tempRegFile << *tempReg;
  tempRegFile.close();

  ofstream out(argv[3]);
  out << "-------------------------------------------------------------" << endl;
  out << "Total overlaps: \t" << totalOverlap << endl;
  out << "Total frame matches: \t" << totalMatch << endl;
  out << "Frames correct (overlap): ";

  for(i=0;i<6;i+=2) {
    out << "(" << (*totalFrame)[i] << "/" << (*totalFrame)[i+1] << "=";
    out << ((double)((*totalFrame)[i])/(double)((*totalFrame)[i+1]))*100 << "%) ";
  }
  out << (((double)(((*totalFrame)[0]+(*totalFrame)[2]+(*totalFrame)[4])))/
	  ((double)((*totalFrame)[1]+(*totalFrame)[3]+(*totalFrame)[5])))*100 << "% vs. ";
  out << ((double)((*totalFrame)[1]+(((double)(*totalFrame)[3])/2)+(((double)(*totalFrame)[5])/3)))/
    ((double)((*totalFrame)[1]+(*totalFrame)[3]+(*totalFrame)[5]))*100 << "%" << endl;
  out << "Frames correct (perfect): ";
  for(i=6;i<12;i+=2) {
    out << "(" << (*totalFrame)[i] << "/" << (*totalFrame)[i+1] << "=";
    out << ((double)((*totalFrame)[i])/(double)((*totalFrame)[i+1]))*100 << "%) ";
  }
  out << (((double)((*totalFrame)[6]+(*totalFrame)[8]+(*totalFrame)[10]))/
    ((double)((*totalFrame)[7]+(*totalFrame)[9]+(*totalFrame)[11])))*100 << "% vs. ";
  out << ((double)((*totalFrame)[7]+(((double)(*totalFrame)[9])/2)+
		    (((double)(*totalFrame)[11])/3)))/
       ((double)((*totalFrame)[7]+(*totalFrame)[9]+(*totalFrame)[11]))*100 << "%" << endl;
  out << "Total perfect exons: \t" << totalPerfect << endl;
  out << "Total false positives: \t" << totalNonOverlap << endl;
  out << "Total misses: \t" << totalMiss << endl;
  out << "Total exons: \t" << totalExons << endl;

  out << endl;  

  out << "Accuracy per nucleotide: Sn=";
  out << (((double)((*nucData)[4]))/((double)((*nucData)[2])));
  out << " Sp=" << (((double)((*nucData)[4]))/((double)((*nucData)[0])));
  out << " AC=" << ((((((double)((*nucData)[4]))/((double)((*nucData)[2])))+
		       (((double)((*nucData)[4]))/((double)((*nucData)[0])))+
		       (((double)((*nucData)[6]))/((double)((*nucData)[3])))+
		      (((double)((*nucData)[6]))/((double)((*nucData)[1]))))/2)-1) << endl;
  out << "PP=" << ((*nucData)[0]) << " PN=" << ((*nucData)[1]);
  out << " AP=" << ((*nucData)[2]) << " AN=" << ((*nucData)[3]);
  out << " TP=" << ((*nucData)[4]) << " FP=" << ((*nucData)[5]);
  out << " TN=" << ((*nucData)[6]) << " FN=" << ((*nucData)[7]) << endl;
  out << endl;
  out << "Accuracy per exon: Sn=" << (((double)totalPerfect)/((double)totalExons));
  out << " Sp=" << (((double)totalPerfect)/((double)totalPExons));
  out << " ME=" << (((double)totalMiss)/((double)totalExons));
  out << " WE=" << (((double)totalNonOverlap)/((double)totalPExons)) << endl;
  out << "TE=" << totalPerfect << " PE=" << totalPExons << " AE=" << totalExons << endl;
  out << endl;
  out << ((((double)((*nucData)[8]))/((double)((*nucData)[2])))*100);
  out << "% Protein correct" << endl;

  out << "Segments removed: "       << (*nucData)[9+SEG_REMOVED]      << endl;
  out << "Exon Positions Covered: " << (*nucData)[9+EXON_POS_COVERED] << endl;
  out << "Exon Positions Uncov  : " << (*nucData)[9+EXON_POS_UNCOV]   << endl;
  out << "Intron Positions Covered: " << (*nucData)[9+INTRON_POS_COVERED] << endl;
  out << "Intron Positions Uncov  : " << (*nucData)[9+INTRON_POS_UNCOV]   << endl;

  out << "Total sequences skipped: \t" << tempReg->lookupVal("totalSkipped") << endl;
  out << endl;
  double GCTotal=(*GCInfo)[0]+(*GCInfo)[1]+(*GCInfo)[2]+(*GCInfo)[3];
  out << "ACGT content: (" << (((*GCInfo)[0]/GCTotal)*100) << "% ";
  out << ((*GCInfo)[1]/GCTotal)*100 << "% " << ((*GCInfo)[2]/GCTotal)*100 << "% ";
  out << ((*GCInfo)[3]/GCTotal)*100 << "%)" << endl;
  GCTotal=(*GCInfo)[4]+(*GCInfo)[5]+(*GCInfo)[6]+(*GCInfo)[7];
  out << "(n*ACGT)/l: (" << ((*GCInfo)[4]/GCTotal)*100 << "% ";
  out << ((*GCInfo)[5]/GCTotal)*100 << "% " << ((*GCInfo)[6]/GCTotal)*100 << "% ";
  out << ((*GCInfo)[7]/GCTotal)*100 << "%)" << endl;
  out << "PERCENTAGE SEQUENCES PROTEIN USED: " << (double) 100*proteinMatch/(lastSequence-firstSequence) << endl;

  if (usingMethod1 + usingMethod2) {
    out << endl << "Used Method 1: " << usingMethod1 << "  Method 2: " << usingMethod2 << endl;
  }
  out.close();
  
  
  delete nucData;
  delete GCInfo;
  delete totalFrame;
  delete fseq;
  delete reg;
  delete tempReg;
  return 0;
}










