#include "common.h"
#include <string.h>

int ATSZ = 7;
enum ATBL { LOC = 0, GEN = 1, TYPE = 2, DIR = 3, HBEGIN = 4, HEND = 5,  PRED = 6 };
enum ETYPE { INITIAL = 0, INTERNAL = 1, FINAL = 2, SINGLE = 3 };
enum DIR { FW = 0, RC = 1 };
enum PREDICTION { PERFECT = 0, LEFT = 1, RIGHT = 2, OVL = 3, MISS = 4 };


int GENSCAN_MODE = 0;

main(int argc, char *argv[]) {
//   assert(argc==3);
//   cout << "input  file: " << argv[1] << endl;
//   cout << "output file: " << argv[2] << endl;

  char buf[1010];
  ifstream fin(argv[1]);
  ofstream fout(argv[2]);
  
  while (fin.good()) {
    fin.getline(buf, 1010, '\n');
    assert(strlen(buf) < 1000);
    
    if (!strncmp(buf, "AC", 2) || !strncmp(buf, "CDS", 3) || !strncmp(buf, "Sums:", 5))
      fout << buf << endl;
  }
  cout << "done" << endl;
  fin.close();
    fout.close();
  

  int genecount  = 0;
  int locuscount = 0;
  int currentry  = 0;
  int annotations[10000][ATSZ];
  char locuses[1000][40], dummybuf1[10], dummybuf2[10];
  int i, j, k;

  for (i=0; i<10000; ++i) 
    for (j=0; j<ATSZ; ++j)
      annotations[i][j] = -1;
  for (i=0; i<1000; ++i) strcpy(locuses[i], "EM:");
  
  cout << "Reading annotations from " << argv[2] << endl;
  fin.open(argv[2]);

  while (fin.good()) {  
    fin.getline(buf, 1010, '\n');
    assert(strlen(buf) < 1000);

    if (!strncmp(buf, "AC", 2)) {
      locuscount++;
      i=2;
      while (buf[i] == ' ' || buf[i] == '\t') i++; // find start of mouse locus
      while (buf[i] != ' ' && buf[i] != '\t') i++; // find end of mouse locus
      while (buf[i] == ' ' || buf[i] == '\t') i++; // find start of human locus
      j = i;
      while (buf[i] != ' ' && buf[i] != '\t' && buf[i] != '\n') {
	locuses[locuscount][3 + (i-j)] = buf[i];
	i++;
      }
      locuses[locuscount][3+(i-j)] = '\0';
      cout << "human locus #" << locuscount << ": " << locuses[locuscount] << endl;
    }


    else if (!strncmp(buf, "Sums:", 5)) {
      if (currentry>0 && annotations[currentry-1][GEN] == genecount)
	annotations[currentry-1][TYPE] = 
	  (int) ( (annotations[currentry-1][TYPE] == INTERNAL) ? FINAL : SINGLE);
      genecount++;
    }
    

    else if (!strncmp(buf, "CDS", 3)) {
      i=3;
      while (buf[i] == ' ' || buf[i] == '\t') i++; // find start of number for mouse region begin
      while (buf[i] != ' ' && buf[i] != '\t') i++; // find end of mouse region end
      while (buf[i] == ' ' || buf[i] == '\t') i++; // find start of number for human region begin
      j = i;
      while (buf[i] != ',') { // until end of human region begin
	dummybuf1[i-j] = buf[i];
	i++;
      }
      dummybuf1[i-j] = '\0';
      j = ++i;
      while (buf[i] != ' ' && buf[i] != '\t' && buf[i] != '\n') { // until end of human region end
	dummybuf2[i-j] = buf[i];
	i++;
      }
      dummybuf2[i-j] = '\0';

      annotations[currentry][ LOC  ] = locuscount;
      annotations[currentry][ GEN  ] = genecount;
      annotations[currentry][ TYPE ] = (int) ( (currentry && annotations[currentry-1][GEN] == genecount) ? INTERNAL : INITIAL);
      annotations[currentry][ DIR  ] = dummybuf1[0] == 'c' ? -1 : 1;

      if (dummybuf1[i=0] == 'c')
	while (dummybuf1[i] != '\0') {
	  dummybuf1[i] = dummybuf1[i+1];
	  i++;
	}
      
      annotations[currentry][ HBEGIN ] = atoi(dummybuf1);
      annotations[currentry][ HEND   ] = atoi(dummybuf2);
      currentry++;
    }
  
  }
  
  for (i=0; i<currentry; ++i)
    cout << annotations[i][LOC] << '\t' << annotations[i][GEN]    << '\t' << annotations[i][TYPE] << '\t'
	 << annotations[i][DIR] << '\t' << annotations[i][HBEGIN] << '\t' << annotations[i][HEND] << endl;
  
  int predictions[100][200][5];
  for (i=0; i<100; ++i)
    for (j=0; j<200; ++j)
      for (k=0; k<5; ++k)
	predictions[i][j][k] = -2;

  int predicted_counts[100];
  for (i=0; i<100; ++i)
    predicted_counts[i] = 0;

  char parsebuf[100];
  int currpcount=0, currbegin, currend, currframe, currdir, currtype;
  cout << "locuscount: " << locuscount << endl; 
  for (i=0; i<=locuscount; currpcount = 0, ++i) {
    
    if (GENSCAN_MODE) 
      strcpy(parsebuf, "/data2/rosetta/genscan/77test/");
    else {             
      strcpy(parsebuf, "/data2/rosetta/output/");
      strcpy(parsebuf+strlen(parsebuf), toa(i));
      strcpy(parsebuf+strlen(parsebuf), "_");
    }
    strcpy(parsebuf+strlen(parsebuf), locuses[i]);
    if (GENSCAN_MODE) 
      strcpy(parsebuf+strlen(parsebuf), "_GenscanParse");
    else 
      strcpy(parsebuf+strlen(parsebuf), "_FinalParse");
    

    ifstream parsefile(parsebuf);
    if (!parsefile.good()) {
      cout << "Warning: could not open parse file " << parsebuf << endl;
      continue;
    }
    
    do {
      parsefile >> currbegin;
      parsefile >> currend;
      parsefile >> currframe;
      parsefile >> currdir;
      if (GENSCAN_MODE) parsefile >> currtype;
      predictions[i][currpcount][0] = currbegin;
      predictions[i][currpcount][1] = currend;
      predictions[i][currpcount][2] = currdir;
      if (GENSCAN_MODE) predictions[i][currpcount][3] = currtype;
      currpcount++;
    } while (currbegin != -1);
    predicted_counts[i] = currpcount-1;

    cout << "Predictions for locus " << locuses[i] << endl;
    for (j=0; j<currpcount; ++j)
      cout << predictions[i][j][0] << ", " << predictions[i][j][1] << ";   " << predictions[i][j][2] << endl;
    cout << endl << endl;
  }

  int currloc;
  int pbegin, pend, pdir;
  for (i=0; i<currentry; ++i) {
    currloc   = annotations[i][LOC];
    currbegin = annotations[i][HBEGIN];
    currend   = annotations[i][HEND];
    currdir   = annotations[i][DIR];
      
    annotations[i][PRED] = (int) MISS;
    for (j=0; j<predicted_counts[currloc]; ++j) {
      pbegin = predictions[currloc][j][0];
      pend   = predictions[currloc][j][1];
      pdir   = predictions[currloc][j][2];
				      
      if (pbegin == currbegin && 
	  pend   == currend   &&
	  pdir   == currdir) {
	annotations[i][PRED] = (int) PERFECT;
	break;
      }

      if (pbegin == currbegin &&
	  pdir   == currdir) {
	annotations[i][PRED] = (int) LEFT;
	break;
      }
      if (pend == currend &&
	  pdir == currdir) {
	annotations[i][PRED] = (int) RIGHT;
	break;
      }

      if ( ( (pbegin > currbegin && pbegin < currend) ||
	     (pend   > currbegin && pend   < currend) ||
	     (pbegin < currbegin && pend   > currend)) &&
	   pdir == currdir) {
	annotations[i][PRED] = (int) OVL;
	break;
      }
    }
  }
  
  int annotation_stats[5][5];
  for (i=0; i<5; ++i)
    for (j=0; j<5; ++j)
      annotation_stats[i][j] = 0;

  for (i=0; i<currentry; ++i)
    annotation_stats[ annotations[i][TYPE] ][ annotations[i][PRED] ]++;
  for (i=0; i<4; ++i)
    for (j=0; j<5; ++j)
      annotation_stats[4][j] += annotation_stats[i][j];


  int covered_annotations[5][2]; // i:: type.  j:: 0: covered positions; 1: uncovered positions.
  for (i=0; i<4; ++i)
    covered_annotations[i][0] = covered_annotations[i][1] = 0;
    
  int covered = 0;
  for (i=0; i<currentry; ++i, covered = 0) {
    currloc   = annotations[i][LOC];
    currbegin = annotations[i][HBEGIN];
    currend   = annotations[i][HEND];
    currdir   = annotations[i][DIR];
    currtype  = annotations[i][TYPE];
    
    for (j=0; j<predicted_counts[currloc]; ++j) {
      pbegin = predictions[currloc][j][0];
      pend   = predictions[currloc][j][1];
      pdir   = predictions[currloc][j][2];

      if (pdir != currdir || pbegin > currend || pend < currbegin) continue;
      if (pbegin < currbegin) pbegin = currbegin;
      if (pend   > currend  ) pend   = currend;

      covered += pend - pbegin + 1;
    }
    covered_annotations[currtype][0] += covered;
    covered_annotations[currtype][1] += currend - currbegin + 1 - covered;
  }
  for (i=0; i<4; ++i)
    for (j=0; j<2; ++j)
      covered_annotations[4][j] += covered_annotations[i][j];

  cout.precision(3);
  
  for (i=0; i<currentry; ++i)
    cout << annotations[i][LOC]  << '\t' << annotations[i][GEN]    << '\t' << annotations[i][TYPE] << '\t'
	 << annotations[i][DIR]  << '\t' << annotations[i][HBEGIN] << '\t' << annotations[i][HEND] << "\t$"
	 << annotations[i][PRED] << endl;

  cout << endl << endl << " Annotation statistics: " << endl << endl;
  for (i=0; i<5; ++i) {
    switch (i) {
    case INITIAL:  cout << "Initial  exons: "; break;
    case INTERNAL: cout << "Internal exons: "; break;
    case FINAL:    cout << "Final    exons: "; break;
    case SINGLE:   cout << "Single   exons: "; break;
    case 4:        cout << "TOTAL:          "; break;
    default:       cout << "ERROR" << endl; exit(0);
    }
    int tot = 0;
    for (j=0; j<5; ++j) {
      cout << annotation_stats[i][j] << "\t";
      tot += annotation_stats[i][j];
    }
    cout << "( Sn: " << ((double)annotation_stats[i][0])/((double)tot);
    cout << " )\t\t" << covered_annotations[i][0] << ", " << covered_annotations[i][1];
    cout << ";\t( Sn: " 
	 << ((double)covered_annotations[i][0])/((double)(covered_annotations[i][0]+covered_annotations[i][1]));
    cout << " )" << endl;
  }
  cout << endl;

  for (i=0; i<locuscount; ++i) {
    for (k=0; k<predicted_counts[i]; ++k) {
      currloc   = i;
      currbegin = predictions[i][k][0];
      currend   = predictions[i][k][1];
      currdir   = predictions[i][k][2];
      
      predictions[i][k][4] = MISS;
      
      for (j=0; j<currentry && annotations[j][LOC] <= currloc; ++j) {
	if (annotations[j][LOC] != currloc) continue;

	if (annotations[j][HBEGIN] == currbegin && 
	    annotations[j][HEND  ] == currend   &&
	    annotations[j][DIR   ] == currdir) {
	  predictions[i][k][4] = (int) PERFECT;
	  break;
	}
	
	if (annotations[j][HBEGIN] == currbegin &&
	    annotations[j][DIR   ] == currdir) {
	  predictions[i][k][4] = (int) LEFT;
	  break;
	}
	if (annotations[j][HEND] == currend &&
	    annotations[j][DIR ] == currdir) {
	  predictions[i][k][4] = (int) RIGHT;
	  break;
	}
	
	if ( ( (annotations[j][HBEGIN] > currbegin && annotations[j][HBEGIN] < currend) ||
	       (annotations[j][HEND  ] > currbegin && annotations[j][HEND  ] < currend) ||
	       (annotations[j][HBEGIN] < currbegin && annotations[j][HEND  ] > currend)) &&
	     annotations[j][DIR] == currdir) {
	  predictions[i][k][4] = (int) OVL;
	  break;
	}
      }
    }
  }

  int prediction_stats[5][5];
  for (i=0; i<5; ++i)
    for (j=0; j<5; ++j)
      prediction_stats[i][j] = 0;

  for (i=0; i<locuscount; ++i)
    for (j=0; j<predicted_counts[i]; ++j)
      prediction_stats[predictions[i][j][3]][predictions[i][j][4]]++;
 
  for (i=0; i<4; ++i)
    for (j=0; j<5; ++j)
      prediction_stats[4][j] += prediction_stats[i][j];

  
  int covered_predictions[5][2]; // i:: type.  j:: 0: covered positions; 1: uncovered positions.
  for (i=0; i<4; ++i)
    covered_predictions[i][0] = covered_predictions[i][1] = 0;
    

  for (i=0, covered = 0; i<locuscount; ++i) {
    for (k=0; k<predicted_counts[i]; ++k, covered = 0) {
      currloc   = i;
      currbegin = predictions[i][k][0];
      currend   = predictions[i][k][1];
      currdir   = predictions[i][k][2];
      currtype  = predictions[i][k][3];
      
      for (j=0; j<currentry && annotations[j][LOC] <= currloc; ++j) {
	if (annotations[j][LOC] != currloc) continue;
	
	pbegin = annotations[j][HBEGIN];
	pend   = annotations[j][HEND];
	pdir   = annotations[j][DIR];
	
	if (pdir != currdir || pbegin > currend || pend < currbegin) continue;
	if (pbegin < currbegin) pbegin = currbegin;
	if (pend   > currend  ) pend   = currend;
	
	covered += pend - pbegin + 1;
      }
      covered_predictions[currtype][0] += covered;
      covered_predictions[currtype][1] += currend - currbegin + 1 - covered;
    }
  }
  for (i=0; i<4; ++i)
    for (j=0; j<2; ++j)
      covered_predictions[4][j] += covered_predictions[i][j];

  cout << endl << endl << "Predictions: " << endl;
  for (i=0; i<locuscount; ++i)
    for (j=0; j<predicted_counts[i]; ++j)
      cout << i << ", " << j << ":\t"
	   << predictions[i][j][0]  << '\t' << predictions[i][j][1] << '\t' 
	   << predictions[i][j][2]  << "\t#" << predictions[i][j][4] << '\t' << endl;


  cout << endl << endl << " Prediction statistics: " << endl << endl;
  for (i=0; i<5; ++i) {
    switch (i) {
    case INITIAL:  cout << "Initial  exons: "; break;
    case INTERNAL: cout << "Internal exons: "; break;
    case FINAL:    cout << "Final    exons: "; break;
    case SINGLE:   cout << "Single   exons: "; break;
    case 4:        cout << "TOTAL:          "; break;
    default:       cout << "ERROR" << endl; exit(0);
    }
    int tot = 0;
    for (j=0; j<5; ++j) {
      cout << prediction_stats[i][j] << "\t";
      tot += prediction_stats[i][j];
    }
    cout << "( Sp: " << ((double)prediction_stats[i][0])/((double)tot);
    cout << " )\t\t" << covered_predictions[i][0] << ", " << covered_predictions[i][1];
    cout << ";\t( Sp: " 
	 << ((double)covered_predictions[i][0])/((double)(covered_predictions[i][0]+covered_predictions[i][1]));
    cout << " )" << endl;
  }
}

  
  
    



