// Modified for the new seq specs;
#include "tuples_tables.h"


int tuple_to_index_fixskips(int pos, int l, Sequence &seq, int *skip) {
  assert(pos>=1 && pos+l-1 <= seq.get_length());
  
  int factor = 1; int sum=0;
  for (int i=l-1; i>=0; --i) {
    switch (skip[i]) {
    case 1:
      continue;
      break;
    case 0: 
      if (seq.get(pos+i)>3 || seq.get(pos+i)<0) return -1;
      sum+=((int)seq.get(pos+i))*factor;
      factor*=4;
      break;
    case (int)'a':
    case (int)'g':
    case (int)'c':
    case (int)'t':      
      if (skip[i]==(int)nucl2char(seq.get(pos+i))) continue;
      else return -1;
      break;
    default:
      err("Invalid SKIP");
    }
  }
  return sum;
}


int tuple_to_index_fixskips(int l, int *array, int *skip) {
    
  int factor = 1; int sum=0;
  for (int i=l-1; i>=0; --i) {
    switch (skip[i]) {
    case 1:
      continue;
      break;
    case 0: 
      if (array[i]>3 || array[i]<0) return -1;
      sum+=array[i]*factor;
      factor*=4;
      break;
    case (int)'a':
    case (int)'g':
    case (int)'c':
    case (int)'t':      
      if (skip[i]==array[i]) continue;
      else return -1;
      break;
    default:
      err("Invalid SKIP");
    }
  }
  return sum;
}




void display_options() {
  cout << "                  OPTIONS" << endl;
  cout << " Note: (F) means that full table is needed." << endl;
  cout << "-------------------------------------------" << endl;
  cout << " 0. Options Menu." << endl;
  cout << " 1. View tuples of frequency above a cutoff." << endl;
  cout << " 2. View frequencies of particular pattern. (F)" << endl;
  cout << " 3. Like (1), but verbose." << endl;
  cout << " 4. Pairs of patterns in same region. (F)" << endl;
  cout << " 5. Number of regions exhibiting frequent patterns. (F)" << endl;
  cout << " 6. Create table with strong/weak frame signals for frametest. (F)" << endl;
  cout << " 7. Create table. (F)" << endl;
  cout << " 8. Create ranks table. (F)" << endl;
  cout << "-1. Exit." << endl;
  cout << "-------------------------------------------" << endl;
  cout << endl;
}

int** economic_create_table(int l, int *skip, char *input_file, int MASK_RT) {
  // Table that keeps the frequencies of tuples in introns and in coding exons only !!!!;

  // l: tuple length; skip: array of skipped positions;
  int i=0,j=0;
  
  int skipped = 0;             // Number of skipped positions;
  for (i=0;i<l; ++i) 
    if (skip[i]) skipped++;
  assert(l-skipped <= MAXACTIVE);      // We do not want more than 9 active positions;

  int **tuples = new (int*)[2];
  for (i=0; i<2; ++i) {
    tuples[i] = new int[power(4,l-skipped)+1];
    for (j=0; j<=power(4,l-skipped); ++j)
      tuples[i][j]=0;
  }
  int tuplecounts[10];
  for (i=0; i<8; tuplecounts[i++]=0);
  
  ifstream fin(input_file);
  
  FilterSequence seq;
  int sequence_count=0;

  int marks[MAXSEQLEN];
  for (i=0; i<MAXSEQLEN; marks[i++]=GARB);
  
  while (fin.good()) {
    fin >> seq;
    sequence_count++;
    
    if (seq.get_region_num()<=2) {
      cout << seq.get_region_num(); fflush(stdout);
      continue;
    }

    /* // CHANGE ALL N's into C's: !!!!!!!!;
       for (i=1; i<=seq.get_length(); ++i)
       if (seq.get(i)<0 || seq.get(i)>3) {
       seq[i-1]=1;
       }
       */ // Not needed because of the GARBAGE_IGNORE option;
    
    if (MASK_RT) {
      cout << "*"; fflush(stdout); 
      RepIdentify repIdObj;
      vector<int> beginVec, endVec, regionVec;
      vector<String> repeatVec;
      repIdObj.rep_find(&seq,beginVec, endVec, repeatVec, regionVec);
      for (i=0; i<beginVec.size(); ++i)
	for (j=beginVec[i]; j<=endVec[i]; ++j)
	  seq[j]=BASE_UNKNOWN;
    }
    else { cout << ":"; fflush(stdout); }

    if (seq.get_length()>=MAXSEQLEN) cout << "length:" << seq.get_length() << endl;
    assert(MAXSEQLEN>seq.get_length());
    
    mark_sequence(seq,l,marks);

    for (i=0; i<seq.get_length(); ++i)
      assert(marks[i]<10);

    int index=0;
    int seqlength = seq.get_length();
    for (i=1; i<=seqlength-l+1; ++i) {
      index = tuple_to_index_fixskips(i,l,seq,skip);
      if (index==-1) continue;
      assert(index < power(4,l-skipped));
      switch(marks[i]) {
      case FR0:
      case FR1:
      case FR2: tuples[0][index]++; break;
      case INTR: tuples[1][index]++; break;
      default: break;
      tuplecounts[marks[i]]++;
      }
    }
    for (i=0; i<seq.get_length(); ++i) marks[i]=GARB;
  }
  cout << "Total number of tuples:" << endl;
  cout << "Exons:   " << tuplecounts[FR0] + tuplecounts[FR1] + tuplecounts[FR2] << endl;
  cout << "Introns: " << tuplecounts[INTR] << endl;
  tuples[0][power(4,l-skipped)]=tuplecounts[FR0] + tuplecounts[FR1] + tuplecounts[FR2];
  tuples[1][power(4,l-skipped)]=tuplecounts[INTR];
  return tuples;
}

int** create_table(int l, int *skip, char *input_file, int MASK_RT) {
  
  // l: tuple length; skip: array of skipped positions;
  int i=0,j=0;
  
  int skipped = 0;             // Number of skipped positions;
  for (i=0;i<l; ++i) 
    if (skip[i]) skipped++;
  assert(l-skipped <= MAXACTIVE);      // We do not want more than 9 active positions;

  int **tuples = new (int*)[10];
  for (i=0; i<10; ++i) {
    tuples[i] = new int[power(4,l-skipped)+1];
    for (j=0; j<=power(4,l-skipped); ++j)
      tuples[i][j]=0;
  }
  int tuplecounts[10];
  for (i=0; i<8; tuplecounts[i++]=0);
  
  ifstream fin(input_file);
  
  FilterSequence seq;
  int sequence_count=0;

  int marks[MAXSEQLEN];
  for (i=0; i<MAXSEQLEN; marks[i++]=GARB);
  
  while (fin.good()) {
    cout << "Sequence: " << ++sequence_count;
    if (sequence_count%5) cout << '\t';
    else                  cout << endl;

    fin >> seq;
    
    if (seq.get_region_num()<=2) {
      cout << seq.get_region_num(); fflush(stdout);
      continue;
    }

    /* // CHANGE ALL N's into C's: !!!!!!!!;
       for (i=1; i<=seq.get_length(); ++i)
       if (seq.get(i)<0 || seq.get(i)>3) {
       seq[i-1]=1;
       }
       */ // Not needed because of the GARBAGE_IGNORE option;
    
    if (MASK_RT) {
      cout << "*"; fflush(stdout); 
      RepIdentify repIdObj;
      vector<int> beginVec, endVec, regionVec;
      vector<String> repeatVec;
      repIdObj.rep_find(&seq,beginVec, endVec, repeatVec, regionVec);
      for (i=0; i<beginVec.size(); ++i)
	for (j=beginVec[i]; j<=endVec[i]; ++j)
	  seq[j]=BASE_UNKNOWN;
    }
    else { cout << ":"; fflush(stdout); }

    if (seq.get_length()>=MAXSEQLEN) cout << "length:" << seq.get_length() << endl;
    assert(MAXSEQLEN>seq.get_length());
    
    mark_sequence(seq,l,marks);

    for (i=0; i<seq.get_length(); ++i)
      assert(marks[i]<10);

    int index=0;
    int seqlength = seq.get_length();
    for (i=1; i<=seqlength-l+1; ++i) {
      index = tuple_to_index_fixskips(i,l,seq,skip);
      if (index==-1) continue;
      assert(index < power(4,l-skipped));
      tuples[marks[i]][index]++;
      tuplecounts[marks[i]]++;
    }
    for (i=0; i<seq.get_length(); ++i) marks[i]=GARB;
  }
  for (i=0; i<10; ++i)
    tuples[i][power(4,l-skipped)]=tuplecounts[i];
  return tuples;
}

int frametablevalue(int Sc,double SfE,double SfI,int Wc,double WfE,double WfI,int **table, int i) {
  if (table[FR0][i] > Sc)
    if (table[FR1][i] <= SfE * table[FR0][i] &&
	table[FR2][i] <= SfE * table[FR0][i] &&
	table[INTR][i] <= 10 * SfI * table[FR0][i])
      return 3;

  if (table[FR1][i] > Sc)
    if (table[FR0][i] <= SfE * table[FR1][i] &&
	table[FR2][i] <= SfE * table[FR1][i] &&
	table[INTR][i] <= 10 * SfI * table[FR1][i])
      return 4;
  
  if (table[FR2][i] > Sc)
    if (table[FR1][i] <= SfE * table[FR2][i] &&
	table[FR0][i] <= SfE * table[FR2][i] &&
	table[INTR][i] <= 10 * SfI * table[FR2][i])
      return 5;

  if (table[FR0][i] > Wc)
    if (table[FR1][i] <= WfE * table[FR0][i] &&
	table[FR2][i] <= WfE * table[FR0][i] &&
	table[INTR][i] <= 10 * WfI * table[FR0][i])
      return 0;

  if (table[FR1][i] > Wc)
    if (table[FR0][i] <= WfE * table[FR1][i] &&
	table[FR2][i] <= WfE * table[FR1][i] &&
	table[INTR][i] <= 10 * WfI * table[FR1][i])
      return 1;
  
  if (table[FR2][i] > Wc)
    if (table[FR1][i] <= WfE * table[FR2][i] &&
	table[FR0][i] <= WfE * table[FR2][i] &&
	table[INTR][i] <= 10 * WfI * table[FR2][i])
      return 2;
  return -1;
}









