/* findplateau.c
 *
 * This tool can be used to select flanking regions from processed RNAcop
 * output. It takes output from 'rank_flanks.pl' that is *_rddG.tsv files
 * as input.
 *
 *    Copyright (C) 2015  Nikolai Hecker
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include <math.h>
#include <limits.h>
#include <getopt.h>
#include "datatypes.h"
#include "matrix.h"
#include "constants.h"
#include "file.h"
#include "plateau.h"
#include "utility.h"
#include "disjoint.h"


/* --------------------- print_usage ---------------------------------------- */
static void
print_usage(const char *filename, const double size_thr, const double thr, 
	    unsigned long maxi, unsigned long maxj, 
	    unsigned long mini, unsigned long minj, unsigned long maxdist)
{
  printf("Usage:\n");
  printf("\t%s [OPTIONS]\n", filename);
  printf("\t\ti|input\t\tFILENAME\tInput filename\n");
  printf("\t\to|output\tFILENAME\tOutput TSV filename\n");
  printf("\t\ts|setcoverage\tFLOAT\t\tminimum ratio of covered elements in each set (default=%.3f)\n", size_thr);
  printf("\t\tt|threshold\tFLOAT\t\tMax. difference to maximum of matrix (default=%.3f)\n", thr);
  printf("\t\td|maxradius\tINTEGER\t\tMaximum area extension/radius (default=%lu)\n", maxdist);
  printf("\t\tx|maxi\t\tINTEGER\t\tPrune matrix to maximum row index (default=%lu)\n", maxi);
  printf("\t\ty|maxj\t\tINTEGER\t\tPrune matrix to maximum column index (default=%lu)\n", maxj);
  printf("\t\tl|mini\t\tINTEGER\t\tMinimum row index (default=%lu)\n", mini);
  printf("\t\tr|minj\t\tINTEGER\t\tMinimum column index (default=%lu)\n", minj);
  
}
 
/* ---------------------- main ---------------------------------------------- */
int
main(int argc, char *argv[])
{
  FILE *fout;
  NMMatrix *m;
  NMINTMatrix *mint, *mpid;
  unsigned long max_bfsize, count, area, best_i, best_j, best_area, maxi, maxj, mini, minj, oncols, onrows, maxdist;
  unsigned long i, j, idxi, idxj, pid, nsets, sid, eid, left, right, down, up, r, best_r;
  double t, diffthr, mass, x, y;
  int opt_idx, c;
  char *inf, *outf;
  double csize, size_thr;

  unsigned long *parent, *rank, *size, *roots;
  TWOIDDbl *svals;
  
  /* options */
  static struct option options[] =
    {
      {"input", required_argument, 0, 'i'},
      {"output", required_argument, 0, 'o'},
      {"maxradius", required_argument, 0, 'd'},
      {"threshold", required_argument, 0, 't'},
      {"setcoverage", required_argument, 0, 's'},
      {"maxi", required_argument, 0, 'x'},
      {"maxj", required_argument, 0, 'y'},
      {"mini", required_argument, 0, 'l'},
      {"minj", required_argument, 0, 'r'},
      {0, 0, 0, 0}
    };

  /* parameters/default values */
  t = 37.0;
  m = NULL;
  mint = NULL;
  inf = NULL;
  outf = NULL;
  max_bfsize = 10000;
  mass = 0;
  size_thr = 0.6;

  maxi = maxj = maxdist = 500;
  mini = minj = 0;

  diffthr = 1;
  r = best_r = 0;

  j = i = idxi = idxj = pid = sid = eid = 0;
  count = 0;
  nsets = 0;
  x = y = 0;
  area = best_i = best_j = best_area = 0;
  csize = 0;

  /* ---- option parsing ---- */
  while(1)
    {
      c = getopt_long(argc, argv, "i:o:d:t:s:x:y:l:r:", options, &opt_idx);
      

      /* no option left */
      if( c == -1)
	break;

      /* process options */
      switch(c)
	{
	case 0:
	  if (options[opt_idx].flag != 0)
	    break;
	  printf ("option %s", options[opt_idx].name);
	  if (optarg)
	    printf (" with arg %s", optarg);
	  printf ("\n");
	  break;

	case 'i':
	  if( (inf = malloc(strlen(optarg)+1 * sizeof(inf[0]))) == NULL)
	    {
	      fprintf(stderr, "Error! Mem allocation for filename failed.\n");
	      exit(EXIT_FAILURE);
	    }	  
	  strcpy(inf, optarg);
	  break;

	case 'o':
	  if( (outf = malloc(strlen(optarg)+1 * sizeof(outf[0]))) == NULL)
	    {
	      fprintf(stderr, "Error! Mem allocation for filename failed.\n");
	      exit(EXIT_FAILURE);
	    }	  
	  strcpy(outf, optarg);
	  break;
	  
	case 'd':
	  maxdist = atol(optarg);
	  break;

	case 'x':
	  maxi = atol(optarg);
	  break;

	case 'y':
	  maxj = atol(optarg);
	  break;

	case 'l':
	  mini = atol(optarg);
	  break;

	case 'r':
	  minj = atol(optarg);
	  break;

	case 't':
	  diffthr = atof(optarg);
	  break;

	case 's':
	  size_thr = atof(optarg);
	  break;
	}
    }

  if(inf == NULL)
    {
      print_usage(argv[0], size_thr, diffthr, maxi, maxj, mini, minj, maxdist);
      fprintf(stderr, "Missing input file name.\n");

      exit(EXIT_FAILURE);
    }

  if(outf == NULL)
    {
      print_usage(argv[0], size_thr, diffthr, maxi, maxj, mini, minj, maxdist);
      fprintf(stderr, "Missing output file name.\n");

      exit(EXIT_FAILURE);
    }

  /* check input */
  if(size_thr > 1)
    {
      fprintf(stderr, "Warning! Reset 'setcoverage' to 1.\n");
      size_thr = 1;
    }

  /* --- computations --- */

  /* read input */
  m = read_rddG_mat(inf, max_bfsize);
  
  oncols = m->ncols;
  onrows = m->nrows;

  m->nrows = m->nrows < (maxi + 1) ? m->nrows : (maxi + 1);
  m->ncols = m->ncols < (maxj + 1) ? m->ncols : (maxj + 1);
  
  
  mint = nmintm_init(m->nrows, m->ncols);
  mpid = nmintm_init(m->nrows, m->ncols);

  if(m == NULL)
    {
      fprintf(stderr, "Error while reading '%s'! Check file name or format.\n", inf);

      free(inf);
      exit(EXIT_FAILURE);
    }

  if(mint == NULL || mpid == NULL)
    {
      fprintf(stderr, "Failed to allocate index matrix.\n");

      free(inf);
      
      exit(EXIT_FAILURE);
    }

  if((svals = malloc( m->ncols * m->nrows * sizeof(svals[0])) ) == NULL)
    {
      fprintf(stderr, "Error! Mem allocation for array failed.\n");
      exit(EXIT_FAILURE);
    }

  if((parent = malloc( m->ncols * m->nrows * sizeof(parent[0])) ) == NULL)
    {
      fprintf(stderr, "Error! Mem allocation for array failed.\n");
      exit(EXIT_FAILURE);
    }

  if((rank = malloc( m->ncols * m->nrows * sizeof(rank[0])) ) == NULL)
    {
      fprintf(stderr, "Error! Mem allocation for array failed.\n");
      exit(EXIT_FAILURE);
    }
  
  if((size = malloc( m->ncols * m->nrows * sizeof(size[0])) ) == NULL)
    {
      fprintf(stderr, "Error! Mem allocation for array failed.\n");
      exit(EXIT_FAILURE);
    }
  if((roots = malloc( m->ncols * m->nrows * sizeof(size[0])) ) == NULL)
    {
      fprintf(stderr, "Error! Mem allocation for array failed.\n");
      exit(EXIT_FAILURE);
    }

  /* convert matrix to probabilities */
  convert2prob_mat(m, t);
  convert2log10_mat(m);

  /* init data structures*/
  count = 0;
  for(i=0; i < m->nrows; i++)
    for(j=0; j < m->ncols; j++)
      {
	svals[count].id = count;
	svals[count].i = i;
	svals[count].j = j;

	if((i < mini) || (j < minj))
	  {
	    m->e[i][j] = -99999;
	  }
	svals[count++].v = -m->e[i][j];
      }

  qsort(svals, (m->ncols * m->nrows), sizeof(svals[0]), compare_twoiddbl);
  
  for(i=0; i < (m->ncols * m->nrows); i++)
    {
      svals[i].id = i;
      svals[i].v = -svals[i].v;

      mint->e[svals[i].i][svals[i].j] = svals[i].id;
    }

  /* process energy landscape */
  for(i=0; i < (m->ncols * m->nrows); i++)
    {

      idxi = svals[i].i;
      idxj = svals[i].j;

      /* check abort criteria */
      if( (m->e[svals[0].i][svals[0].j] - m->e[idxi][idxj]) >= diffthr)
	break;

      count = i;

      /* create new set */
      make_set(svals[i].id, parent, rank, size);
      nsets += 1;

      /* test element up, down, left, right */
      /* down */
      if( idxi > 0 )
	{
	  if(mint->e[idxi-1][idxj] < svals[i].id)
	    {
	      if(union_sets(mint->e[idxi-1][idxj], svals[i].id, parent, rank, size))
		{
		  nsets -= 1;
		  /*printf("Attached down\n");*/
		}
	    }
	}
      /* up */
      if( idxi < m->nrows-1 )
	{
	  if(mint->e[idxi+1][idxj] < svals[i].id)
	    {
	      if(union_sets(mint->e[idxi+1][idxj], svals[i].id, parent, rank, size))
		{
		  nsets -= 1;
		  /*printf("Attached up\n");*/
		}
	    }
	}
      /* left */
      if( idxj > 0 )
	{
	  if(mint->e[idxi][idxj-1] < svals[i].id)
	    {
	      if(union_sets(mint->e[idxi][idxj-1], svals[i].id, parent, rank, size))
		{
		  nsets -= 1;
		  /*printf("Attached left\n");*/
		}
	    }
	}
      /* right */
      if( idxj < m->ncols-1 )
	{
	  if(mint->e[idxi][idxj+1] < svals[i].id)
	    {
	      if(union_sets(mint->e[idxi][idxj+1], svals[i].id, parent, rank, size))
		{
		  nsets -= 1;
		  /*printf("Attached right\n");*/
		}
	    }
	}
      
      pid = find_set(svals[i].id, parent);
      /*printf("Step %lu=%lu [%lu, %lu] ): attached to set %lu [%lu]; number sets: %lu\n", i, svals[i].id, svals[i].i, svals[i].j, pid, size[pid], nsets);*/
      
    }

  /*open output*/
  if( (fout = fopen(outf, "w")) == NULL)
    {
      fprintf(stderr, "Error! Can't open '%s' for write access.\n", outf);
      exit(EXIT_FAILURE);
    }
  fprintf(fout, "l5prime\tl3prime\tarea\tradius\tlogp\n");

  /* process sets */
  nmintm_fill(mpid, UINT_MAX);

  /* compress paths for all, get roots , fill PID matrix*/
  for(i=0; i <= count; i++)
    {
      svals[i].pid = find_set(svals[i].id, parent);
      mpid->e[svals[i].i][svals[i].j] = svals[i].pid;
    }

  /* sort roots */
  qsort(svals, count+1, sizeof(svals[0]), compare_pid_twoiddbl);
  
  
  for(i=0; i <= count; i++)
    {
      pid = svals[i].pid;
      sid = i;
      eid = i;

      while(pid == svals[i].pid && i <= count)
	{
	  eid=i;
	  i++;
	}
      i=eid;

      /* process set if more than one element present */
      if( eid > sid)
	{
	  pid = svals[sid].pid;
	  printf("** Root %lu: [%lu] %lu..%lu ", pid, size[pid], sid, eid );
	  
	  /* get rectangular boundaries, center of mass */
	  down = UINT_MAX;
	  left = UINT_MAX;
	  right = 0;
	  up = 0;
	  
	  mass = 0;
	  x = 0;
	  y = 0;
	  for(j = sid; j <= eid; j++)
	    {
	      /*printf("element: %lu, %lu (%lu) [%lu, %lu, %lu, %lu]\n", svals[j].i, svals[j].j, svals[j].pid, down, left, up, right);*/
	      mass += pow(10, m->e[svals[j].i][svals[j].j]);
	      x +=  pow(10, m->e[svals[j].i][svals[j].j]) * (double)(svals[j].i);
	      y +=  pow(10, m->e[svals[j].i][svals[j].j]) * (double)(svals[j].j);
	      
	      if(svals[j].i < down)
		down = svals[j].i;
	      
	      if(svals[j].j < left)
		left = svals[j].j;
	      
	      if(svals[j].i > up)
		up = svals[j].i;
	      
	      if(svals[j].j > right)
		right = svals[j].j;
	      
	    }
	  x /= mass;
	  y /= mass;
	  printf(" => CoM (%.3f, %.3f) RECT [%lu, %lu, %lu, %lu]\n", x, y, down, left, up, right);

	  /* get largest square inside disjoint set */
	  csize = size[pid];
	  while( ((double)size[pid] - (double)csize)/(double)size[pid] < size_thr)
	    {
	      best_i = 0;
	      best_j = 0;
	      best_area = 0;
	      best_r = 0;

	      for(j = sid; j <= eid; j++)
		{		  
		  r = 0;
		  area = 0;
		  if(mpid->e[svals[j].i][svals[j].j] != UINT_MAX)
		    area = get_largest_id_area(mpid, svals[j].pid, svals[j].i, svals[j].j, maxdist, &r);
		  if(area > best_area)
		    {
		      best_i = svals[j].i;
		      best_j = svals[j].j;
		      best_area = area;
		      best_r = r;
		    }
		}
	      printf("BEST SQUARE CENTER (%lu, %lu) [%lu]: radius %lu\n", best_i, best_j, best_area, best_r);
	      fprintf(fout, "%lu\t%lu\t%lu\t%lu\t%.4f\n", best_i, best_j, best_area, best_r, m->e[best_i][best_j]);
	      /* set matrix elements to undefined */
	      nmintm_fill_radius(mpid, best_i, best_j, best_r, UINT_MAX);
	      csize -= best_area;
	    }
	}
    }
  fclose(fout);
  
  /* set matrix to original size */
  m->nrows = onrows;
  m->ncols = oncols;

  free(roots);
  free(parent);
  free(rank);
  free(size);
  free(svals);
  free(inf);
  free(outf);
  nmm_free(m);
  nmintm_free(mint);
  nmintm_free(mpid);

  exit(EXIT_SUCCESS);
}
