/* plateau.c
 *
 * contains functions to identify plateau like regions in a matrix
 *
 *    Copyright (C) 2014  Nikolai Hecker
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_cdf.h>
#include <limits.h>
#include <float.h>
#include "datatypes.h"
#include "constants.h"
#include "matrix.h"
#include "utility.h"

/* ---------------------- convert2log10_mat ----------------------------
 *
 * converts all alues of a matrix to log10 
 *
 */
void
convert2log10_mat(NMMatrix *m)
{
  unsigned long i, j;

  for(i=0; i < m->nrows; i++)
    for(j=0; j < m->ncols; j++)
      {
	m->e[i][j] = log10(m->e[i][j]);
      }
}


/* ---------------------- convert2prob_mat ----------------------------
 *
 * converts all ddG values of a matrix to probabilities
 *
 */
void
convert2prob_mat(NMMatrix *m, const double temp)
{
  unsigned long i, j;
  double kt;

  kt = (temp + 273.15) * 1.98717/1000;

  for(i=0; i < m->nrows; i++)
    for(j=0; j < m->ncols; j++)
      {
	m->e[i][j] = exp( m->e[i][j] / kt);
      }
}


/* ---------------------- get_largest_id_area -------------------------
 * 
 * returns largest area where all entries have the given id value
 * returns size of area = number of elements
 * 
 */
unsigned long
get_largest_id_area(NMINTMatrix *m, const unsigned long id, const unsigned long i, const unsigned long j, const unsigned long max_k, unsigned long *radius)
{
  unsigned long area, s, e, k, h, p,  nel;
  int abort;

  s = e = p = h = 0;
  area = nel = 0;
  
  abort = 0;
  area = 1;

  radius[0] = 0;

  for(k=1; k < max_k; k++)
    {
      
      /* get elements */
      nel = 0;

      /* -- verticals -- */
      s = k <= i ? (i-k) : 0;
      e = (i+k) < m->nrows ? (i+k) : (m->nrows-1);
      
      /* left - skip if j-k is outside matrix */
      if( j >= k )
	{
	  p = j-k;
	  
	  for(h=s; h <= e; h++)
	    {	      
	      if(m->e[h][p] != id)
		{
		  abort = 1;
		  break;
		}
	      nel += 1;
	      
	    }
	}
      else
	abort = 1;
      
      /* right - skip if j+k is outside matrix */
      if( (p=j+k) < m->ncols)
	for(h=s; h <= e; h++)
	  {
	    if(m->e[h][p] != id)
	    {
	      abort = 1;
	      break;
	    }
	    nel += 1;
	  }
      else
	abort = 1;
      
      /* -- horizontals -- */
      s = k <= (j+1) ? (j-k+1) : 0;
      e = (j+k-1) < m->ncols ? (j+k-1) : (m->ncols-1);

      
      /* bottom - skip if i-k is outside matrix */
      if( i >= k)
	{
	  p = i - k;
	  
	  for(h=s; h <= e; h++)
	    {
	      if(m->e[p][h] != id)
		{
		  abort = 1;
		  break;
		}
	      nel += 1;
	    }
	}
      else
	abort = 1;
      
      /* top - skip if i+k is outside matrix */
      if( (p=i+k) < m->nrows)
	for(h=s; h <= e; h++)
	  {
	    /* printf("* [%lu, %lu]\n", h, p); */
	    if(m->e[p][h] != id)
	    {
	      abort = 1;
	      break;
	    }
	    nel += 1;
	  }
      else
	abort = 1;
      
      if(abort)
	break;
	  
      radius[0] += 1;
      area += nel;
    }

  return area;
}

/* ---------------------- get_k_distance_ring -------------------------
 * returns a vector which contains all elements in a ring (squaure)
 * around element (i,j) which are separated by k-1 elements from (i,j)
 * or surrounding (i,j) in a distance of k
 * fills array elements which has to be allocated sufficiently
 *
 * returns number of elements in k distance ring
 */
unsigned long
get_k_distance_ring(NMMatrix *m, const unsigned long k, const unsigned long i, const unsigned long j, double *elements)
{
  unsigned long pos, s, e, p, h;
  
  s = e = p = h = 0;
  pos = 0;

  /* get elements */
  if(k == 0)
    {
      elements[pos++] = m->e[i][j];
      return pos;
    }

  /* -- verticals -- */
  s = k <= i ? (i-k) : 0;
  e = (i+k) < m->nrows ? (i+k) : (m->nrows-1);

  /*printf("V [%lu - %lu]\n", s, e);*/

  /* left - skip if j-k is outside matrix */
  if( j >= k )
    {
      p = j-k;
      
      for(h=s; h <= e; h++)
      {
	/* printf("* [%lu, %lu]\n", p, h); */
	elements[pos++] = m->e[h][p];

      }
    }
  
  /* right - skip if j+k is outside matrix */
  if( (p=j+k) < m->ncols)
    for(h=s; h <= e; h++)
      {
	/*printf("* [%lu, %lu]\n", p, h);*/
	elements[pos++] = m->e[h][p];
	
       }

  /* -- horizontals -- */
  s = k <= (j+1) ? (j-k+1) : 0;
  e = (j+k-1) < m->ncols ? (j+k-1) : (m->ncols-1);
  
  /* printf("H [%lu - %lu]\n", s, e); */

  
  /* bottom - skip if i-k is outside matrix */
  if( i >= k)
    {
      p = i - k;
      
      for(h=s; h <= e; h++)
	{
	  /* printf("* [%lu, %lu]\n", h, p); */
	  elements[pos++] = m->e[p][h];
	}
    }

  /* top - skip if i+k is outside matrix */
  if( (p=i+k) < m->nrows)
    for(h=s; h <= e; h++)
      {
	/* printf("* [%lu, %lu]\n", h, p); */
	elements[pos++] = m->e[p][h];
      }
  
  return pos;
}


/* ---------------------- get_best_extension --------------------------
 * starting from (idxi, idxj) extends inner area and shifts outer
 * ring to get optimal extension for plateau like area
 * m should contain logarithmic values
 *
 * inside, outside have to be allocated (size M * N)
 * p: percentage to get quantile
 * mindist: determines radius of inner circle
 *
 * output will be written to bestd and bestdiff (single values NOT arrays)
 */
void
get_best_extension(NMMatrix *m, const unsigned long idxi, const unsigned long idxj, const unsigned long mindist, const unsigned long maxdist, const unsigned long owidth, const double p, unsigned long* bestd, double* bestdiff, double *s1, double *s2, double *inside, double *outside)
{
  unsigned long d, j, length, totlength;
  double q, min, nmin, diff;
  
  d = j = 0;
  length = totlength = 0;
  min = nmin = DBL_MAX;
  q = diff = 0;

  for(d=0; d <= maxdist; d++)
    {
      
      /* inside */
      length = get_k_distance_ring(m, d, idxi, idxj, inside);

      nmin = gsl_stats_min(inside, 1, length);
      min = min > nmin ? nmin : min;
      
      if( d >= mindist )
	{
	  /* outside - get ring of width owidth*/
	  totlength = 0;
	  for(j=1; j <= owidth; j++)
	    {
	      length = get_k_distance_ring(m, (d+j), idxi, idxj, &outside[totlength] );
	      totlength += length;
	    }

	  if(totlength > 0)
	    {
	      qsort(outside, totlength, sizeof(outside[0]), *compare_double);
	      q = gsl_stats_quantile_from_sorted_data( outside, 1, totlength, p);
	      
	      diff = min - q;
	      
	      if(diff > *bestdiff)
		{
		  *bestdiff = diff;
		  *bestd = d;
		  *s1 = min;
		  *s2 = q;
		}
	    }
	}
    }
}


/* ---------------------- get_best_extension_minmean --------------------------
 * starting from (idxi, idxj) extends inner area and shifts outer
 * ring to get optimal extension for plateau like area
 * m should contain non-logarithmic values
 * compares minimum area vs. mean ring
 *
 * inside, outside have to be allocated (size M * N)
 * mindist: determines radius of inner circle
 *
 * output will be written to bestd and bestdiff (single values NOT arrays)
 */
void
get_best_extension_minmean(NMMatrix *m, const unsigned long idxi, const unsigned long idxj, const unsigned long mindist, const unsigned long maxdist, const unsigned long owidth, unsigned long* bestd, double* bestdiff, double *s1, double *s2, double *inside, double *outside)
{
  unsigned long d, j, length, totlength;
  double outmean, min, nmin, diff;
  
  d = j = 0;
  length = totlength = 0;
  min = nmin = DBL_MAX;
  outmean = diff = 0;

  for(d=0; d <= maxdist; d++)
    {
      
      /* inside */
      length = get_k_distance_ring(m, d, idxi, idxj, inside);

      nmin = gsl_stats_min(inside, 1, length);
      min = min > nmin ? nmin : min;
      

      if( d >= mindist )
	{
	  /* outside - get ring of width owidth*/
	  totlength = 0;
	  for(j=1; j <= owidth; j++)
	    {
	      length = get_k_distance_ring(m, (d+j), idxi, idxj, &outside[totlength] );
	      totlength += length;
	    }

	  if(totlength > 0)
	    {
	      outmean = gsl_stats_mean(outside, 1, totlength);
	      
	      diff = min / (outmean + SMALLEST);
	      
	      if(diff > *bestdiff)
		{
		  *bestdiff = diff;
		  *bestd = d;
		  *s1 = min;
		  *s2 = outmean;
		}
	    }
	}
    }
}


/* ---------------------- get_best_extension_quantiles --------------------------
 * starting from (idxi, idxj) extends inner area and shifts outer
 * ring to get optimal extension for plateau like area
 * m should contain logarithmic values
 * compares quantiles for inner and outer circle (ring)
 *
 * inside, outside have to be allocated (size M * N)
 * pin, pout: percentage to get quantiles
 * mindist: determines radius of inner circle
 *
 * output will be written to bestd and bestdiff (single values NOT arrays)
 */
void
get_best_extension_quantiles(NMMatrix *m, const unsigned long idxi, const unsigned long idxj, const unsigned long mindist, const unsigned long maxdist, const unsigned long owidth, const double pin, const double pout, unsigned long* bestd, double* bestdiff, double *s1, double *s2, double *inside, double *outside)
{
  unsigned long d, j, length, totlength, totlengthin;;
  double qin, qout, diff;
  
  d = j = 0;
  length = totlength = totlengthin = 0;
  qin = qout = diff = 0;

  for(d=0; d <= maxdist; d++)
    {
      
      /* inside */
      length = get_k_distance_ring(m, d, idxi, idxj, &inside[totlengthin]);
      totlengthin += length;
      qsort(inside, totlengthin, sizeof(inside[0]), *compare_double);

      qin = gsl_stats_quantile_from_sorted_data( inside, 1, totlengthin, pin);
      
      if( d >= mindist )
	{
	  /* outside - get ring of width owidth*/
	  totlength = 0;
	  for(j=1; j <= owidth; j++)
	    {
	      length = get_k_distance_ring(m, (d+j), idxi, idxj, &outside[totlength] );
	      totlength += length;
	    }
	  if(totlength > 0)
	    {
	      qsort(outside, totlength, sizeof(outside[0]), *compare_double);
	      qout = gsl_stats_quantile_from_sorted_data( outside, 1, totlength, pout);
	      
	      diff = qin - qout;
	  
	      if(diff > *bestdiff)
		{
		  *bestdiff = diff;
		  *bestd = d;
		  *s1 = qin;
		  *s2 = qout;
		}
	    }
	}
    }
}

/* ---------------------- get_best_extension_mean --------------------------
 * starting from (idxi, idxj) extends inner area and shifts outer
 * ring to get optimal extension for plateau like area
 * m should contain proababilities
 *
 * evaluates ratio of [inner mean]/[outer mean]
 *
 * inside, outside have to be allocated (size M * N)
 * mindist: determines radius of inner circle
 *
 * output will be written to bestd and bestdiff (single values NOT arrays)
 */
void
get_best_extension_mean(NMMatrix *m, const unsigned long idxi, const unsigned long idxj, const unsigned long mindist, const unsigned long maxdist, const unsigned long owidth, unsigned long* bestd, double* bestdiff, double *s1, double *s2, double *inside, double *outside)
{
  unsigned long d, j, length, totlength, totlengthin, old_length;
  double inmean, outmean, diff, old_inmean;
  
  d = j = 0;
  length = totlength = totlengthin = old_length = 0;
  inmean = outmean = old_inmean = 0;
  diff = 0;

  for(d=0; d <= maxdist; d++)
    {
      
      /* inside */
      length = get_k_distance_ring(m, d, idxi, idxj, &inside[totlengthin]);
      old_length = totlengthin;
      totlengthin += length;
      if(d == 0)
	{
	  inmean = gsl_stats_mean(inside, 1, totlengthin);
	}
      else
	{
	  old_inmean = inmean;
	  inmean = gsl_stats_mean(&inside[old_length], 1, length);
	  inmean = old_length * old_inmean / totlengthin + length * inmean / totlengthin;	  
	}
      
      if( d >= mindist )
	{
	  /* outside - get ring of width owidth*/
	  totlength = 0;
	  for(j=1; j <= owidth; j++)
	    {
	      length = get_k_distance_ring(m, (d+j), idxi, idxj, &outside[totlength] );
	      totlength += length;
	    }

	  if(totlength > 0)
	    {
	      outmean = gsl_stats_mean(outside, 1, totlength);
	      
	      diff = inmean/ (outmean + SMALLEST);
	      
	      if(diff > *bestdiff)
		{
		  *bestdiff = diff;
		  *bestd = d;
		  *s1 = inmean;
		  *s2 = outmean;
		}
	    }
	}
    }
}


/* ---------------------- select_starting_points ----------------------------
 * determines starting points to search for plateau like area
 * w: working array will be overwritten
 * p: percentage (0-1) to determine quantile value
 * selected points will be written to ids_i and ids_j, i.e. entries which are
 * larger or equal to quantile values
 * 
 * returns number of selected elements
 */
unsigned long
select_starting_points(NMMatrix *m, double *w, unsigned long *ids_i, unsigned long *ids_j, const double p)
{
  unsigned long i, j, length, c;
  double q;

  q = 0;
  length = i = j = c = 0;

  /* get quantile */
  length = m->ncols * m->nrows;  
  memcpy(w, m->entries, length * sizeof(w[0]) );
  qsort(w, length, sizeof(w[0]), *compare_double);

  q = gsl_stats_quantile_from_sorted_data(w, 1, length, p);

  /* select ids */
  for(i=0; i < m->nrows; i++)
    for(j=0; j < m->ncols; j++)
      {
	if(m->e[i][j] >= q)
	  {
	    ids_i[c] = i;
	    ids_j[c++] = j;
	  }
      }
  
  return c;
}
