#include <stdio.h>
#include <stdlib.h>
#include <iostream.h>
#include <fstream.h>


#include "/data2/rosetta/cpp/vector.h"
#include "/data2/rosetta/cpp/dictionary.h"

int main (int argc, char **argv)
{
  ofstream outfile("outfile1");
  ofstream outfile2("outfile2");
  ofstream bestfile("bestfile1");
  ofstream bestfile2("bestfile2");

  unsigned long int tupcutoff;
  float percutoff;
  char name[64], name2[64], goodname[64], goodname2[64], errormsg[128], seq[1<<20], seq2[1<<20];
  Dictionary *gene, *cdna, *gene2, *cdna2;

  if (argc != 7) exit (1);
  sscanf (argv[1], "%lu", &tupcutoff);
  sscanf (argv[2], "%f", &percutoff);
  gene = new Dictionary (argv[3], errormsg);
  if (errormsg[0]) exit (1);
  cdna = new Dictionary (argv[4], errormsg);
  if (errormsg[0]) { delete (gene); exit (1); }

  gene2 = new Dictionary (argv[5], errormsg);
  if (errormsg[0]) exit (1);
  cdna2 = new Dictionary (argv[6], errormsg);
  if (errormsg[0]) { delete (gene2); exit (1); }
  
  printf ("Tuple Cutoff: %lu.\n", tupcutoff);
  printf ("Percent Cutoff: %8.3f%c.\n", percutoff, '%');
  printf ("Gene A Dictionary: %s.\n", argv[3]);
  printf ("cDNA A Dictionary: %s.\n", argv[4]);

  printf ("Gene B Dictionary: %s.\n", argv[5]);
  printf ("cDNA B Dictionary: %s.\n", argv[6]);
  
  unsigned long int hitsArray[1000], hitsArray2[1000];

  for (unsigned long int i = 0; i < cdna->numSequences (); i++)
    {
      unsigned long int len, len2;
      vector<unsigned long int> segposs, segposs2, segposp, segposp2, seglen, seglen2, segacc, segacc2;

      cdna->getSequence (i, seq);
      gene->Segments (seq, len = strlen (seq), tupcutoff, segposs, segposp, seglen, segacc);
      int hitptr = 0, found = 0;
      for (int j=0; j<seglen.size(); ++j) {
	found = 0;
	for (int k=0; k<hitptr; ++k)
	  if (hitsArray[k] == segacc[j]) { found = 1; break; }
	if (!found) hitsArray[hitptr++] = segacc[j];
      }

      name[0] = '\0';
      found = 0;
      double bestHitCover = 0;

      for (unsigned long int s = 0; s < hitptr; s++)
	{
	  unsigned long int count = 0;	      
	  memset (seq, 0, len << 2);

	  for (long int j = 0; j < seglen.size (); j++)
	    if (segacc[j] == hitsArray[s])
	      for (unsigned long int k = segposs[j]; k < segposs[j] + seglen[j]; k++)
		seq[k] = 1;

	  for (unsigned long int j = 0; j < len; j++) count += seq[j];

	  const double frac = (double)(100*count) / (double)(len);

	  if (frac >= percutoff) {
	    gene->getSequenceName (hitsArray[s], name);
	    printf ("%lu  Matches gene %lu (%s): %8.5f%c.\n", (i+1), hitsArray[s], name, frac, '%');
	    outfile << i+1 << "  matches gene " << hitsArray[s] << "\t" << name << "\t" << frac << "%" << endl;
	    if (frac >= bestHitCover)
	      {
		found = 1;
		bestHitCover = frac;
		gene->getSequenceName (hitsArray[s], goodname);
	      }
	  }
	}
      if (!found) continue;
      
      cdna2->getSequence (i, seq2);
      gene2->Segments (seq2, len2 = strlen (seq2), tupcutoff, segposs2, segposp2, seglen2, segacc2);
      int hitptr2 = 0, found2 = 0;
      for (int j=0; j<seglen2.size(); ++j) {
	found2 = 0;
	for (int k=0; k<hitptr2; ++k)
	  if (hitsArray2[k] == segacc2[j]) { found2 = 1; break; }
	if (!found2) hitsArray2[hitptr2++] = segacc2[j];
      }

      bestHitCover = 0; found = 0;
      for (unsigned long int s = 0; s < hitptr2; s++)
	{
	  unsigned long int count = 0;	      
	  memset (seq2, 0, len2 << 2);
	  
	  for (long int j = 0; j < seglen2.size (); j++)
	    if (segacc2[j] == hitsArray2[s])
	      for (unsigned long int k = segposs2[j]; k < segposs2[j] + seglen2[j]; k++)
		seq2[k] = 1;

	  for (unsigned long int j = 0; j < len2; j++) count += seq2[j];

	  const double frac = (double)(100*count) / (double)(len2);

	  if (frac >= percutoff) {
	    gene2->getSequenceName (hitsArray2[s], name2);
	    printf ("%lu  Matches gene %lu (%s): %8.5f%c.\n", (i+1), hitsArray2[s], name2, frac, '%');
	    outfile2 << i+1 << "  matches gene " << hitsArray2[s] << "\t" << name2 << "\t" << frac << "%" << endl;
	    if (frac > bestHitCover) {
	      found = 1;
	      bestHitCover = frac;
	      gene2->getSequenceName (hitsArray2[s], goodname2);
	    }
	  }
	}
      
      
      if (!found) continue;
      
      
      bestfile << i+1 << ".\t" << goodname << endl;
      bestfile2 << i+1 << ".\t" << goodname2 << endl;
      
    }
  
  delete (gene);
  delete (cdna);
}



