#include "localprob.h"

Positions::Positions(int left, int right) {
  try {
    if (left < 0 || right < 0) throw "Position error: ";
  }

  catch (const char *e) {
    cout << e << "illegal position.\n";
    exit(1);
  }

  this->left = left;
  this->right = right;
}

// Set the order for comparison between positions used in the map container
bool Positions::operator<(const Positions &rhs) const {
  if (left < rhs.left) return true;
  else if (left > rhs.left) return false;
  else if (right < rhs.right) return true;
  else if (right > rhs.right) return false;
  else return false;
}

LocalProb::LocalProb(string id, string seq, int winsize, int pairsize, float cutoffb, float nu, int diagsize, double thres, int region, int verbose) {
  this->id = id;
  this->seq = seq;

  // Check
  if (winsize > static_cast<int>(seq.length())) {
    this->winsize = seq.length();
    this->pairsize = seq.length();
  }

  else if (winsize < pairsize) {
    this->winsize = winsize;
    this->pairsize = winsize;
  }

  else {
    this->winsize = winsize;
    this->pairsize = pairsize;
  }

  this->cutoffb = cutoffb;
  this->nu = nu;

  if (diagsize != 3 && diagsize != 5) this->diagsize = 3;
  else this->diagsize = diagsize;

  this->thres = thres;
  this->region = region;
  this->verbose = verbose;
}

void LocalProb::calcLocalProb(map<Positions, double> &prob) {
  int seqlen = static_cast<int>(seq.length());

  if (region == 1 || verbose == 1) {
    cout << "Length of the sequence (" << id << "): " << seqlen << endl;
    cout << "\n";
  }

  double betaScale = 1.;
  Vienna::model_detailsT md;
  Vienna::set_model_details(&md);
  Vienna::pf_paramT *pf_parameters = Vienna::get_boltzmann_factors(Vienna::temperature, betaScale, md, -1);

  // Calculate local base-pairing probabilities by pfl_fold
  Vienna::plist *dpp = NULL; // Dummy
  Vienna::plist *pl = Vienna::pfl_fold_par(const_cast<char*>(seq.c_str()), winsize, pairsize, cutoffb, NULL, &dpp, NULL, NULL, pf_parameters);

  int i = 0;

  // The end of the plist is marked by sequence positions i and j equal to 0
  while (pl[i].j > 0) {
    Positions pos = Positions(pl[i].i, pl[i].j);
    prob.insert(make_pair(pos, pl[i].p));
    i++;
  }

  free(pl);
  free(pf_parameters);

  /*
  #if DEBUG
  map<Positions, double>::iterator p;
  cout << "Local base-pairing probability list:\n";
  for (p=prob.begin(); p!=prob.end(); p++) {
    cout << p->first.left << " " << p->first.right << " " << p->second << endl;
  }
  cout << "\n";
  #endif
  */

  /*
  #if DEBUG
  int seqlen = seq.length();
  vector<vector<int> > bppm;
  bppm.resize(seqlen+1);
  for (int i=0; i<=seqlen; i++) {
    bppm[i].resize(seqlen+1);
  }
  ostringstream ostr;
  ostr << seqlen;
  int idlen = ostr.str().length();
  map<Positions, double>::iterator p;
  cout << "Coarse-grained local base-pairing probability matrix:\n";
  for (p=prob.begin(); p!=prob.end(); p++) {
    bppm[p->first.left][p->first.right] = 1;
  }
  for (int i=1; i<=seqlen; i++) {
    cout << setw(idlen) << i << ": ";
    for (int j=1; j<=seqlen; j++) {
      if (i > j) cout << "*";
      else cout  << bppm[i][j];
    }
    cout << "\n";
  }
  cout << "\n";
  #endif
  */
}

void LocalProb::enumCandidate(map<Positions, double> &candidate, map<Positions, double> &prob) {
  int seqlen = static_cast<int>(seq.length());

  // [k, l]: sliding window of fixed size in the genome (l = k+winsize-1)

  // Calculate left & right probabilities
  map<Positions, double> lprob; // Left probability list (hash)
  map<Positions, double> rprob; // Right probability list (hash)
  map<Positions, double>::iterator p;
  double lvalue = 0, rvalue = 0;
  int sumsubseq = seqlen / 1000000 + 1; // The number of subsequences divided by 1M

  for (int j=1; j<=sumsubseq; j++) { // Calculate lprob, rprob, alpha and beta in each subsequence independently
    int endpos = 1000000*j;
    if (j == sumsubseq) endpos = seqlen;

    for (int i=1+1000000*(j-1); i<=endpos; i++) {
      for (int k=i; k>=max(1, i-2*winsize+1); k--) {
	Positions pos1 = Positions(k+1, i);
	p = lprob.find(pos1);
	if (p != lprob.end()) lvalue = p->second;
	Positions pos2 = Positions(k, i);
	p = prob.find(pos2);
	if (p != prob.end()) lvalue = lvalue + p->second;
	if (lvalue > cutoffb) lprob.insert(make_pair(pos2, lvalue)); // lprob(k, i)
	lvalue = 0;
      } //k

      for (int l=i; l<=min(i+2*winsize-1, seqlen); l++) {
	Positions pos1 = Positions(i, l-1);
	p = rprob.find(pos1);
	if (p != rprob.end()) rvalue = p->second;
	Positions pos2 = Positions(i, l);
	p = prob.find(pos2);
	if (p != prob.end()) rvalue = rvalue + p->second;
	if (rvalue > cutoffb) rprob.insert(make_pair(pos2, rvalue)); // rprob(i, l)
	rvalue = 0;
      } // l
    } // i

    /*
    #if DEBUG
    cout << "Left probabilities:\n";
    for (p=lprob.begin(); p!=lprob.end(); p++) {
      cout << "L(" << p->first.left << ", " << p->first.right << ") = " << p->second << endl;
    }
    cout << "\n";
    cout << "Right probabilities:\n";
    for (p=rprob.begin(); p!=rprob.end(); p++) {
      cout << "R(" << p->first.left << ", " << p->first.right << ") = " << p->second << endl;
    }
    cout << "\n";
    #endif
    */

    // Calculate inside-inside & inside-outside probabilities
    map<Positions, double> alpha; // Inside-inside probability list (hash)
    map<Positions, double> beta; // Inside-outside probability list (hash)

    for (int k=1+1000000*(j-1); k<=endpos-winsize+1; k++) {
      int l = k+winsize-1;
      double suma = 0, sumb = 0;

      for (int i=k; i<=l; i++) {
	// For inside-inside probabilities
	Positions posa1 = Positions(k, i);
	p = lprob.find(posa1);
	if (p != lprob.end()) suma = suma + p->second;
	Positions posa2 = Positions(i, l);
	p = rprob.find(posa2);
	if (p != rprob.end()) suma = suma + p->second;

	// For inside-outside probabilities
	Positions posb1 = Positions(max(1, k-winsize+1), i);
	p = lprob.find(posb1);
	if (p != lprob.end()) sumb = sumb + p->second;
	Positions posb2 = Positions(i, min(l+winsize-1, seqlen));
	p = rprob.find(posb2);
	if (p != rprob.end()) sumb = sumb + p->second;
      } // i

      if (suma > cutoffb) {
	Positions posa = Positions(k, l);
	alpha.insert(make_pair(posa, suma/2)); // alpha(k, l)
      }

      if (sumb > cutoffb) {
	Positions posb = Positions(k, l);
	p = alpha.find(posb);
	if (p != alpha.end()) beta.insert(make_pair(posb, sumb-2*p->second)); // beta(k, l)
	else beta.insert(make_pair(posb, sumb)); // beta(k, l)
      }

      // Compare alpha with beta
      double a = 0, b = 0;
      Positions pos = Positions(k, l);
      p = alpha.find(pos);
      if (p != alpha.end()) a = p->second;
      p = beta.find(pos);
      if (p != beta.end()) b = p->second;
      if (a > nu*b) candidate.insert(make_pair(pos, a-nu*b));
      alpha.clear();
      beta.clear();
    } // k

    lprob.clear();
    rprob.clear();

    /*
    #if DEBUG
    cout << "Inside-inside probabilities:\n";
    for (p=alpha.begin(); p!=alpha.end(); p++) {
      cout << "alpha(" << p->first.left << ", " << p->first.right << ") = " << p->second << endl;
    }
    cout << "\n";
    cout << "Inside-outside probabilities:\n";
    for (p=beta.begin(); p!=beta.end(); p++) {
      cout << "beta(" << p->first.left << ", " << p->first.right << ") = " << p->second << endl;
    }
    cout << "\n";
    alpha.clear();
    beta.clear();
    #endif
    */

    /*
    #if DEBUG
    cout << "Candidate intervals with expected number of base pairs such that alpha > beta:\n";
    for (p=candidate.begin(); p!=candidate.end(); p++) {
      cout << "[" << p->first.left << ", " <<  p->first.right << "]: " << p->second << endl;
    }
    cout << "\n";
    #endif
    */

  } // j
}

void LocalProb::slideWindow(map<Positions, vector<int> > &interval, int step) {
  int seqlen = static_cast<int>(seq.length()); // "seqlen >= winsize" must be always realized

  if (step == 0) makeInterval(interval, 1, seqlen);

  else {
    string ns = "";

    for (int i=0; i<step; i++) {
      ns = ns + "N";
    }

    string subseq = "";

    for (int i=1; i+winsize-1<=seqlen; i=i+step) {

      // Find the first position of 'N'
      subseq = seq.substr(i-1, winsize);
      if (subseq.find(ns) == string::npos) makeInterval(interval, i, i+winsize-1);
    }
  }

  if (verbose == 1) {
    map<Positions, vector<int> >::iterator p;
    cout << "Intervals for the sequence (" << id << "):\n";
    if (interval.empty()) cout << "None.\n";
    else {
      for (p=interval.begin(); p!=interval.end(); p++) {
	cout << "[" << p->first.left << ", " <<  p->first.right << "]\n";
      }
    }
    cout << "\n";
  }
}

void LocalProb::nonOverlap(map<Positions, vector<int> > &interval, map<Positions, double> &candidate) {

  // Take the interval pointed by p with the maximum value within a set of overlapping windows
  map<Positions, double>::iterator p, q;
  map<Positions, vector<int> >::iterator r;
  int flag = 0;

  for (p = candidate.begin(); p!=candidate.end(); p++) {
    if (interval.empty() == false) {
      r = interval.end();
      r--;
      if (p->first.left < r->first.right) continue;
    }

    Positions pos = Positions(p->first.left, p->first.right);

    // q moves within a set of overlapping intervals
    for (q=candidate.find(pos); q!=candidate.end(); q++) {
      if (q == p) continue;

      if (q->first.left < p->first.right) { // Overlapping case
	if (q->second > p->second) {
	  flag = 1;
	  break;
	}

	else if (q->second < p->second) continue;
      }

      else break; // Non-overlapping case
    } // q

    if (flag == 1) {
      flag = 0;
      continue;
    }

    vector<int> v;
    interval.insert(make_pair(pos, v));
  } // p

  if (region == 1 || verbose == 1) {
    cout << "Non-overlapped intervals for the sequence (" << id << "):\n";
    if (interval.empty()) cout << "None.\n";
    else {
      for (r=interval.begin(); r!=interval.end(); r++) {
	cout << "[" << r->first.left << ", " <<  r->first.right << "]\n";
      }
    }
    cout << "\n";
  }
}

// Make a binary profile vector anti-diagonally with sliding window of size 3 or 5
void LocalProb::makeVector(map<Positions, vector<int> > &interval, map<Positions, double> &prob) {
  map<Positions, vector<int> >::iterator p;
  map<Positions, double>::iterator q;
  vector<vector<double> > diag1, diag2;
  vector<int>::iterator r;

  for (p=interval.begin(); p!=interval.end(); p++) {
    int k = p->first.left;
    int l = p->first.right;
    diag1.resize(l-k+1); // Count from 0
    diag2.resize(l-k); // Count from 0
    traverseDiagonally(diag1, prob, k, l, 1);
    traverseDiagonally(diag2, prob, k, l, 2);
    vector<int> ivect; // Integrated binary vector
    integrate(ivect, diag1, diag2, k, l, diagsize, thres);
    p->second = ivect;
    diag1.clear();
    diag2.clear();
    ivect.clear();

    /*
    #if DEBUG
    int seqlen = seq.length();
    vector<vector<int> > bppm;
    bppm.resize(seqlen+1);
    for (int i=0; i<=seqlen; i++) {
      bppm[i].resize(seqlen+1);
    }
    ostringstream ostr;
    ostr << seqlen;
    int idlen = ostr.str().length();
    cout << "Coarse-grained local base-pairing probability matrix for the interval [" << k << ", " << l << "]:\n";
    for (q=prob.begin(); q!=prob.end(); q++) {
      bppm[q->first.left][q->first.right] = 1;
    }
    for (int i=k; i<=l; i++) {
      cout << setw(idlen) << i << ": ";
      for (int j=k; j<=l; j++) {
	if (i > j) cout << "*";
	else cout  << bppm[i][j];
      }
      cout << "\n";
    }
    cout << "\n";
    #endif
    */

  } // p

  /*
  #if DEBUG
  cout << "Potential structured regions [k, l] and their profile vectors for the sequence:\n";
  if (interval.empty()) cout << "None.\n";
  else {
    int i = 1;
    for (p=interval.begin(); p!=interval.end(); p++) {
      cout << i << ": [" << p->first.left << ", " <<  p->first.right << "]\n";
      for (r=p->second.begin(); r!=p->second.end(); r++) {
	cout << *r;
      }
      ++i;
      cout << "\n";
    }
  }
  cout << "\n";
  #endif
  */
}

void makeInterval(map<Positions, vector<int> > &interval, int left, int right) {
  Positions pos = Positions(left, right);
  vector<int> v;
  interval.insert(make_pair(pos, v));
}

void traverseDiagonally(vector<vector<double> > &diag, map<Positions, double> &prob, int k, int l, int did) {
  map<Positions, double>::iterator p;

  for (int h=k; h<=l; h++) {
    int i = 0, j = 0;

    if (did == 1) { // For diag1
      i = h;
      j = h;
    }

    else if (did == 2) { // For diag2
      i = h;
      j = h + 1;
    }

    while (i >=k && j <= l) {
      Positions pos = Positions(i, j);
      p = prob.find(pos);
      if (p != prob.end()) diag[h-k].push_back(p->second);
      else diag[h-k].push_back(0);
      i--;
      j++;
    }
  } // h

  /*
  #if DEBUG
  cout << "Array of diagonal " << did << endl;
  for (int i=0; i<static_cast<int>(diag.size()); i++) {
    for (int j=0; j<static_cast<int>(diag[i].size()); j++) {
      cout << diag[i][j];
    }
    cout << "\n";
  }
  cout << "\n";
  #endif
  */
}

void integrate(vector<int> &ivect, vector<vector<double> > &diag1, vector<vector<double> > &diag2, int k, int l, int diagsize, double thres) {
  vector<vector<double> > adiag; // Alternating vectors consisting of diag1 and diag2

  for (int i=0; i<static_cast<int>(diag1.size()); i++) { // Always #diag1 = #diag2 + 1
    if (i == static_cast<int>(diag1.size())-1) {
      adiag.push_back(diag1[i]);
    }

    else {
      adiag.push_back(diag1[i]);
      adiag.push_back(diag2[i]);
    }
  }

  /*
  #if DEBUG
  cout << "Vectors obtained by traversing the probability matrix for [" << k << ", " << l << "] anti-diagonally:\n";
  for (int i=0; i<static_cast<int>(adiag.size()); i++) {
    for (int j=0; j<static_cast<int>(adiag[i].size()); j++) {
      cout << setprecision(2) << adiag[i][j] << " ";
    }
      cout << "\n";
  }
  cout << "\n";
  #endif
  */

  int offset = diagsize / 2; // Offset from the center of the diagonal sliding window
  int begin = 0 + offset;
  int end = (l - k) * 2 - offset;
  vector<int> length; // Lengths of vectors in adiag
  vector<double> cell; // Array of probabilities to calculate a binary digit
  vector<int> tempv; // Part of ivect
  vector<int>::iterator p;

  for (int h=begin; h<=end; h++) { // 'h' shows the center index
    for (int i=h-offset; i<=h+offset; i++) {
      length.push_back(adiag[i].size());
    }

    p = min_element(length.begin(), length.end());
    int lmin = *p;
    p = max_element(length.begin(), length.end());
    int lmax = *p;

    for (int j=0; j<lmax; j++) {
      if (j < lmin) {
	for (int i=h-offset; i<=h+offset; i++) {
	  cell.push_back(adiag[i][j]);
	}
      }

      else {
	for (int i=h-offset; i<=h+offset; i++) {
	  if (j < static_cast<int>(adiag[i].size())) cell.push_back(adiag[i][j]);
	}
      }

      tempv.push_back(getBinary(cell, thres));
      cell.clear();
    } // j

    // ivect = ivect + tempv;
    for (p=tempv.begin(); p!=tempv.end(); p++) {
      ivect.push_back(*p);
    }

    length.clear();
    tempv.clear();
  } // h

}

int getBinary(vector<double> &cell, double thres) {
  int bin = 0;
  double sum = 0.0;
  vector<double>::iterator p;

  for (p=cell.begin(); p!=cell.end(); p++) {
    sum = sum + *p;
  }

  if (sum > thres) bin = 1;

  return bin;
}

/*
int getBinary1(vector<double> &cell) {
  int bin = 0;

  for (int i=0; i<static_cast<int>(cell.size()); i++) {
    if (cell[i] > 0) {
      bin = 1;
      break;
    }
  }

  return bin;
}
*/

/*
int getBinary3(vector<double> &cell) {
  double th1 = 0.4, th2 = 0.3;
  int bin = 0;
  double sum = 0.0;
  double max = 0.0;
  vector<double>::iterator p;

  for (p=cell.begin(); p!=cell.end(); p++) {
    sum = sum + *p;
  }

  if (sum > th1) bin = 1;

  else {
    p = max_element(cell.begin(), cell.end());
    max = *p;
    if (max > th2) bin = 1;
  }

  return bin;
}
*/
