#include "fseq.h"
#include <fstream.h>
#include <math.h>
#include <assert.h>
#include <stdlib.h>
#include <stdinc.h>
#include "strstream.h"
#include "vector.h"

// This program creates a list of the genes in the human/mouse dataset.
// It outputs in latex format. Only up to 13 exons are outputed in the
// exon length section

int readLine(istream& in, char *s) {
  
  int i=0;
  char c='\0';
  while((in.get(c)).good()&&(c!='\n')) if (c!='\n') s[i++] = c;
  s[i++] = '\0';
  return in.good();
}

int putLine(ostream &out, char *s) {
  
  int i=0;
  while(s[i] != '\0') out << s[i++];
  return i;
}

main() {
  
  int i=0,j=0;

  ifstream human("/data2/rosetta/databases/mouse/humfile.txt");
  ifstream mouse("/data2/rosetta/databases/mouse/musfile.txt");

  ifstream hseqf("/data2/databases/mouse/humandata.seq");
  ifstream mseqf("/data2/databases/mouse/mousedata.seq");

  ofstream eric("ericsList.tex");

  char buf[2000];

  FilterSequence hseq, mseq;
  
  // setup latex:
  eric << "\\documentstyle[11pt]{article}" << endl;
  eric << "\\begin{document}" << endl;
  eric << "\\footnotesize" << endl;
  
  int k=1;
  while (human.good() && mouse.good() && hseqf.good() && mseqf.good()) {
    
    // eric << endl << "--------------------------------" << endl;
    //if ((k-1)%3==0)
      //      eric << "\\newpage" << endl;
    
    eric << "\\noindent Gene number: " << k++ << "\\\\"  << endl;
    eric << "Human ";
    for (i=0; i<4; ++i) {
      readLine(human, buf);
      putLine(eric, buf);
      eric << "\\\\" << endl;
    }
    eric << "Mouse ";
    for (i=0; i<4; ++i) {
      readLine(mouse, buf);
      putLine(eric, buf);
      eric << "\\\\" << endl;
    }
    
    hseqf >> hseq; mseqf >> mseq;

    int hexons=0, mexons=0;
    int hclen=0, mclen=0;
    int tothexons=0;
    int totmexons=0;
    for (j=1; j <= hseq.get_region_num(); ++j) {
      Region *r = hseq.get_region(j);
      if (r->type == REGION_CEXON) 
	tothexons++;
    }    
    for (j=1; j <= mseq.get_region_num(); ++j) {
      Region *r = mseq.get_region(j);
      if (r->type == REGION_CEXON) 
	totmexons++;
    }
    eric << "Exon Lengths: \\\\" << endl;
    //    eric << "Human Exon Lengths \t";
    eric << "\\begin{tabular}{";
    for (j=1; j<=min(max(tothexons,totmexons)+1,14); ++j)
      eric << "l ";
    eric << "}" << endl;
    eric << "H:";
    for (j=1; j <= hseq.get_region_num(); ++j) {
      Region *r = hseq.get_region(j);
      if (r->type == REGION_CEXON) {
	hexons++;
	hclen += r->stop - r->start + 1;
	if (hexons<14)
	  eric << "&" <<  r->stop - r->start + 1;
      }
    }
    eric << "\\\\" << endl;
    //    eric << "Mouse Exon Lengths \t";
    eric << "M:";
    for (j=1; j <= mseq.get_region_num(); ++j) {
      Region *r = mseq.get_region(j);
      if (r->type == REGION_CEXON) {
	mexons++;
	mclen += r->stop - r->start + 1;
	if (mexons<14)
	  eric << "&" << r->stop - r->start + 1;
      }
    }
    eric << "\\end{tabular}" << endl;
    eric << "\\\\" << " \\\\" << endl;
    eric << "\\begin{tabular}{ l l l}" << endl;
    eric << "                       &" << "Human &" << "\t\t" << "Mouse" << "\\\\" <<  endl; 
    eric << "Number of coding exons &" << hexons << "&"  << mexons << "\\\\" << endl;
    eric << "Total coding length    &" << hclen  << "&" << mclen  << "\\\\" << endl;

    eric << "\\end{tabular}" << endl << endl;;
    if ((k-1)%3!=0)
      eric << "\\noindent ----------------------------------------------------------------------------------------------------------------\\\\" << endl;
    else
      eric << "\\newpage" << endl;
    //    eric << endl;
  }
  // finish latex
  eric << "\\end{document}" << endl;
}
    











