#ifndef __REP_IDENTIFY_H__
#define __REP_IDENTIFY_H__

#include <iostream.h>
#include <String.h>
#include <fstream.h>
#include "common.h"
#include "vector.h"
#include "fseq.h"
#include "rep_entry.h"

enum RepeatType { REP_INTRON=1, REP_CEXON=2, REP_NCEXON=4, REP_ANY=8 };

class RepIdentify {

public:
  
  //arrays for information extracted from RepeatMasker list of repeats
  //locus contains all the locus names
  //repArray is an array of linked RepeatEntry pointers which contain 
  //the corresponding repeat information for each locus 
  String locus[1000];
  RepeatEntry **repArray;
  int entries;
  ifstream f1;

  RepIdentify() {
    entries = 0;
    repArray = new (RepeatEntry*) [1000];
    for (int i = 0; i < 1000; i++) {
      locus[i] = "";
      repArray[i] = NULL;
    }
  // change this line later on.
    f1.open("cl_repeats2");
  }

  ~RepIdentify() {
    if (repArray) {
      for (int i = 0; i <= entries; i++) 
	if (repArray[i])
	  delete repArray[i];
      delete repArray;
    }
  }


  void rm_read(ifstream& f) {
    //takes in an input stream opened from a file containing a RepeatMasker
    //style file and parses it into two arrays for later access

    String entry;
    String token[15];
    for (int i = 0; i < 15; i++) 
      token[i] = "";
    int index = 0;
    while (readline(f,entry)) { 
      int j = split(entry,token,15,RXwhite);
      if (j < 9) {
	if (j == 2 && token[1] == "none") {
	  if (locus[index] == "") {
	    locus[index] = token[0];
	    repArray[index] = NULL;
	  }
	  else {
	    index = index + 1;
	    locus[index] = token[0];
	    repArray[index] = NULL;
	  }
	}
	else 
	  cout << "error in RepeatMasker file format" << endl;
      }
      else { 
	String name = token[4];
	int begin = atoi(token[5]);
	int end = atoi(token[6]);
	String align = token[8];
	String repeat = token[9];
	RepeatEntry *curRepeat;
	if (locus[index] == "") {
	  locus[index] = name;
	  repArray[index] = new RepeatEntry(begin,end,align,repeat);
	  curRepeat = repArray[index];
	}
	else if (locus[index] == name) {
	  curRepeat->next = new RepeatEntry(begin,end,align,repeat);
	  curRepeat = curRepeat->next;
	}
	else {
	  index = index + 1;
	  locus[index] = name;
	  repArray[index] = new RepeatEntry(begin,end,align,repeat);
	  curRepeat = repArray[index];
	}
      }
    }
    entries = index;
  }

  int overlap_id(String repeat) {
    // removed repeats that only overlapped by one bp
    // 1 - intron, 2 - coding, 4 - noncoding, 8 - unclassified
    if (repeat == "GC_rich")
      return (REP_NCEXON + REP_INTRON + REP_ANY);
    else if (repeat == "AT_rich")
      return (REP_NCEXON + REP_ANY);
    else if (repeat == "L1PA12")
      return (REP_INTRON + REP_CEXON + REP_NCEXON);
    else if (repeat == "AluJo")
      return (REP_ANY + REP_INTRON + REP_CEXON);
    else if (repeat == "TIGGER1")
      return (REP_ANY + REP_CEXON + REP_INTRON);
    else if (repeat == "L2")
      return (REP_INTRON + REP_NCEXON);
    else if (repeat == "FLAM_C")
      return (REP_INTRON + REP_NCEXON);
    else if (repeat == "(CAT)n")
      return (REP_CEXON + REP_ANY);
    else if (repeat == "MIR")
      return (REP_INTRON + REP_NCEXON);
    else if (repeat == "(CGGG)n")
      return (REP_NCEXON + REP_CEXON);
    else if (repeat == "MER54B")
      return (REP_ANY + REP_NCEXON);
    else return 0;
  }
    

  void rep_find(FilterSequence* seq, vector<int>& begin, vector<int>& end, 
		vector<String>& repeat, vector<int>& region) {
    
    rm_read(f1);
    String name = seq->get_locus();
    int i;
    for (i = 0; i <= entries; i++) 
      if (name == locus[i]) 
	break;
    if (i <= entries) {
      RepeatEntry *rep = repArray[i];
      while (rep) {
	begin.push_back(rep->begin);
	end.push_back(rep->end);
	repeat.push_back(rep->repeat);
	region.push_back(overlap_id(rep->repeat));
	rep = rep->next;
      }
    }
  }
};

#endif
