/* findplateau.c
 *
 * this program identifies and filters for plateu like regions in a ddG matrix
 * as supplied by parsed RNAcop output;
 * different criteria can be used to identify, but all of them compare a 
 * statistics
 * of an interior circle to an adjacent ring
 *
 *    Copyright (C) 2014  Nikolai Hecker
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include <math.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_cdf.h>
#include <limits.h>
#include <getopt.h>
#include "datatypes.h"
#include "matrix.h"
#include "file.h"
#include "plateau.h"
#include "utility.h"


/* --------------------- print_usage ---------------------------------------- */
static void
print_usage(const char *filename, const unsigned long mindist, const unsigned long maxdist, const unsigned long owidth, const double pin, const double pout, const double pstart, const double thr)
{
  printf("Usage:\n");
  printf("\t%s [OPTIONS]\n", filename);
  printf("\t\ti|input\tFILENAME\t\tInput filename\n");
  printf("\t\tm|minradius\tINTEGER\t\tMin. radius for plateau area (default=%lu)\n", mindist);
  printf("\t\td|maxradius\tINTEGER\t\tMax. radius for plateau area (default=%lu)\n", maxdist);
  printf("\t\tr|ringwidth\tINTEGER\t\tWidth of outer ring around plateau (default=%lu)\n", owidth);
  printf("\t\tc|compare\tEXPRESSION\tstatistic for plateau area/outer ring comparison (either: mean, quantiles, minquantile, minmean)\n");
  printf("\t\tp|p-ring\tFLOAT\t\tP to determine quantile for outer ring(default=%.3f)\n", pout);
  printf("\t\ta|p-area\tFLOAT\t\tP to determine quantile for plateau area (default=%.3f)\n", pin);
  printf("\t\ts|p-start\tFLOAT\t\tP to determine quantile for starting points (default=%.3f)\n", pstart);
  printf("\t\tt|threshold\tFLOAT\t\tMin. threshold for difference between plateau area and outer ring (default=%.3f)\n", thr);

  printf("\n\nIf 'mean' or 'minmean' is used ddG values are converted to proability whereas log10(probability) is used for 'quantiles' and 'minquantile'.\n");
  printf("\t'minquantile':\tmin. AREA is compared agains P-quantile RING\n");
  printf("\t'quantiles':\tP-quantile AREA is compared agains P-quantile RING\n");
  printf("\t'mean':\t\tmean AREA is compared agains mean RING\n");
  printf("\t'minmean':\tmin. AREA is compared agains mean RING\n");
}
 
/* ---------------------- main ---------------------------------------------- */
int
main(int argc, char *argv[])
{
  NMMatrix *m;
  unsigned long max_bfsize, mindist, maxdist, owidth;
  unsigned long idxi, idxj, *idsi, *idsj, length_ids, i;
  double *inside, *outside;
  double pin, pout, pstart, t, diffthr;
  int opt_idx, c;
  char *inf, *method;

  unsigned long bestd;
  double bestdiff, s1, s2;
  
  /* options */
  static struct option options[] =
    {
      {"input", required_argument, 0, 'i'},
      {"minradius", required_argument, 0, 'm'},
      {"maxradius", required_argument, 0, 'd'},
      {"ringwidth", required_argument, 0, 'r'},
      {"compare", required_argument, 0, 'c'},
      {"p-ring", required_argument, 0, 'p'},
      {"p-area", required_argument, 0, 'a'},
      {"threshold", required_argument, 0, 't'},
      {"p-start", required_argument, 0, 's'},
      {0, 0, 0, 0}
    };

  /* parameters/default values */
  t = 37.0;
  m = NULL;
  inf = method = NULL;
  max_bfsize = 10000;

  pin = pout = 0.5;
  pstart = 0.9;

  maxdist = 100;
  mindist = 2;
  owidth = 1;
  diffthr = 2;
  bestdiff = 0;
  bestd = 0;

  idsi = idsj = NULL;
  length_ids = 0;
  idxi = idxj = i = 0;
  
  /* ---- option parsing ---- */
  while(1)
    {
      c = getopt_long(argc, argv, "i:m:d:r:c:p:a:t:s:", options, &opt_idx);
      

      /* no option left */
      if( c == -1)
	break;

      /* process options */
      switch(c)
	{
	case 0:
	  if (options[opt_idx].flag != 0)
	    break;
	  printf ("option %s", options[opt_idx].name);
	  if (optarg)
	    printf (" with arg %s", optarg);
	  printf ("\n");
	  break;

	case 'i':
	  if( (inf = malloc(strlen(optarg)+1 * sizeof(inf[0]))) == NULL)
	    {
	      fprintf(stderr, "Error! Mem allocation for filename failed.\n");
	      exit(EXIT_FAILURE);
	    }	  
	  strcpy(inf, optarg);
	  break;
	  
	case 'm':
	  mindist = atol(optarg);
	  break;

	case 'd':
	  maxdist = atol(optarg);
	  break;

	case 'r':
	  owidth = atol(optarg);
	  break;
	  
	case 'c':

	  if( (method = malloc(strlen(optarg)+1 * sizeof(method[0]))) == NULL)
	    {
	      fprintf(stderr, "Error! Mem allocation for method failed.\n");
	      exit(EXIT_FAILURE);
	    }	  
	  strcpy(method, optarg);

	  if( (strcmp(method, "mean") != 0) && (strcmp(method, "quantiles") != 0) && (strcmp(method, "minquantile") != 0) && (strcmp(method, "minmean") != 0) )
	    {
	      fprintf(stderr, "Comparison method has to be either 'mean', 'quantiles', 'minquantiles' or 'minmean' .\n");
	      
	      if(inf != NULL)
		free(inf);
	      free(method);

	      exit(EXIT_FAILURE);
	    }
	  break;

	case 'p':
	  pout = atof(optarg);
	  break;

	case 'a':
	  pin = atof(optarg);
	  break;

	case 't':
	  diffthr = atof(optarg);
	  break;

	case 's':
	  pstart = atof(optarg);
	  break;
	}
    }

  if(inf == NULL)
    {
      print_usage(argv[0], mindist, maxdist, owidth, pin, pout, pstart, diffthr);
      fprintf(stderr, "Missing input file name.\n");

      if(method != NULL)
	free(method);
      
      exit(EXIT_FAILURE);
    }

  if(method == NULL)
    {
      print_usage(argv[0], mindist, maxdist, owidth, pin, pout, pstart, diffthr);
      fprintf(stderr, "Missing comparison method, it has to be either 'mean', 'quantiles' or 'minquantile'.\n");

      free(inf);
      
      exit(EXIT_FAILURE);
    }

  /* --- computations --- */

  /* read input */
  m = read_rddG_mat(inf, max_bfsize);

  if(m == NULL)
    {
      fprintf(stderr, "Error while reading '%s'! Check file name or format.\n", inf);

      free(inf);
      free(method);
      
      exit(EXIT_FAILURE);
    }

  /* inside and outside array */
  if((inside = malloc( m->ncols * m->nrows * sizeof(inside)) ) == NULL)
    {
      fprintf(stderr, "Error! Mem allocation for inside array failed.\n");
      exit(EXIT_FAILURE);
    }
  if((outside = malloc( m->ncols * m->nrows * sizeof(outside)) ) == NULL)
    {
      fprintf(stderr, "Error! Mem allocation for outside array failed.\n");
      exit(EXIT_FAILURE);
    }

  if((idsi = malloc( m->ncols * m->nrows * sizeof(idsi)) ) == NULL)
    {
      fprintf(stderr, "Error! Mem allocation for idsi array failed.\n");
      exit(EXIT_FAILURE);
    }

  if((idsj = malloc( m->ncols * m->nrows * sizeof(idsj)) ) == NULL)
    {
      fprintf(stderr, "Error! Mem allocation for idsj array failed.\n");
      exit(EXIT_FAILURE);
    }

  /* convert matrix to probabilities */
  convert2prob_mat(m, t);
  
  printf("i\tj\tradius\tdiff\ts_area\ts_ring\n");

  /* get pairwise best extension */
  if(strcmp(method, "mean") == 0)
    {
      length_ids = select_starting_points(m, inside, idsi, idsj, pstart);

      for(i=0; i < length_ids; i++)
	{
	  idxi = idsi[i];
	  idxj = idsj[i];

	  bestd = 0;
	  bestdiff = 0;
	  
	  get_best_extension_mean(m, idxi, idxj, mindist, maxdist, owidth, &bestd, &bestdiff, &s1, &s2, inside, outside);
	  
	  if(bestd > 0 && bestdiff >= diffthr)
	    printf("%lu\t%lu\t%lu\t%.5E\t%.5E\t%.5E\n", idxi, idxj, bestd, bestdiff, s1, s2);
	}
    }

  /* get pairwise best extension */
  if(strcmp(method, "quantiles") == 0)
    {
      convert2log10_mat(m);
      length_ids = select_starting_points(m, inside, idsi, idsj, pstart);

      for(i=0; i < length_ids; i++)
	{
	  idxi = idsi[i];
	  idxj = idsj[i];
	  
	  bestd = 0;
	  bestdiff = 0;
	  
	  get_best_extension_quantiles(m, idxi, idxj, mindist, maxdist, owidth, pin, pout, &bestd, &bestdiff, &s1, &s2, inside, outside);
	    if(bestd > 0 && bestdiff >= diffthr)
	      printf("%lu\t%lu\t%lu\t%.5f\t%.5f\t%.5f\n", idxi, idxj, bestd, bestdiff, s1, s2);
	}
    }
  
  /* get pairwise best extension */
  if(strcmp(method, "minquantile") == 0)
    {
      convert2log10_mat(m);
      length_ids = select_starting_points(m, inside, idsi, idsj, pstart);

      for(i=0; i < length_ids; i++)
	{
	  idxi = idsi[i];
	  idxj = idsj[i];

	  bestd = 0;
	  bestdiff = 0;
	  
	  get_best_extension(m, idxi, idxj, mindist, maxdist, owidth, pout, &bestd, &bestdiff, &s1, &s2, inside, outside);
	  if(bestd > 0 && bestdiff >= diffthr)
	    printf("%lu\t%lu\t%lu\t%.5f\t%.5f\t%.5f\n", idxi, idxj, bestd, bestdiff, s1, s2);
	}
    }

  /* get pairwise best extension */
  if(strcmp(method, "minmean") == 0)
    {
      length_ids = select_starting_points(m, inside, idsi, idsj, pstart);

      for(i=0; i < length_ids; i++)
	{
	  idxi = idsi[i];
	  idxj = idsj[i];

	  bestd = 0;
	  bestdiff = 0;
	  
	  get_best_extension_minmean(m, idxi, idxj, mindist, maxdist, owidth, &bestd, &bestdiff, &s1, &s2, inside, outside);
	  if(bestd > 0 && bestdiff >= diffthr)
	    printf("%lu\t%lu\t%lu\t%.5E\t%.5E\t%.5E\n", idxi, idxj, bestd, bestdiff, s1, s2);
	}
    }

  free(idsi);
  free(idsj);
  free(method);
  free(inf);
  free(inside);
  free(outside);
  nmm_free(m);

  exit(EXIT_SUCCESS);
}
