/*********************************************************************

  support.c								    

  usage: support
              [-s | --support] [-l<limit> | --limit=<limit>] [<file>]

  See man page for more info

  000630 Bjarne Knudsen (bk@daimi.au.dk)

  Copyright (C) 2000 Bjarne Knudsen

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  02111-1307, USA.

*********************************************************************/

#include "clib/col.h"
#include "clib/rna.h"
#include "clib/file.h"

Colfile *colfile;

typedef struct tagColno {
  int align_bp, alignpos;
  int seq_bp, seqpos;
  int nuc;
  int supp;
} Colno;

void usage(void);
int *pairnum(FILE *fp);
char *support(FILE *fp, int *pair, double limit);
int output(FILE *fp, char *supp, int option_s, int argc, char **argv);

int main(int argc, char **argv)
{
  FILE *fp;         /* For input */
  unsigned int len; /* Used with -C option */
  CmdArg *cmdarg;   /* Command line arguments */
  char *s;          /* String for arguments */
  double limit;     /* The limit for support */
  int *pair;        /* For pair numbers */
  char *supp;
  int option_s;     /* =1 on -s option */
  int ptr;          /* For making header info */
  char t[MAXCOLW];  /* For making header info */

  /* default options */
  limit = 2./3;
  option_s = 0;

  cmdarg = InitArgument(argc, argv);

  while ((s = GetArgument(cmdarg)) != NULL)
    if (strncmp(s, "l", 1) == 0) {
      if (sscanf(&s[1], "%lf%n",
		 &limit, &len) != 1 ||
	  len+1 != strlen(s)) {
	usage();
	return 1; }
    }
    else if (strncmp(s, "-limit=", 7) == 0) {
      if (sscanf(&s[7], "%lf%n",
		 &limit, &len) != 1 ||
	  len+7 != strlen(s)) {
	usage();
	return 1; }
    }
    else if (strncmp(s, "s", 1) == 0) {
      option_s = 1;
    }
    else {
      usage();
      return 1; }

  if ((s = GetFilename(cmdarg)) == NULL) {
    fp = stdin;
    if ((colfile = ReadColfile(fp)) == NULL)
      return 1;
  }
  else if (GetFilename(cmdarg) != NULL) {
    usage();
    return 1; }
  else if ((fp = fopen(s, "r")) == NULL) {
    fprintf(stderr, "support: Error in opening file '%s'\n", s);
    return 1; }

  /* Find pairs */

  if ((pair = pairnum(fp)) == NULL)
    return 1;

  if (fp != stdin && fclose(fp) != 0) {
    fprintf(stderr, "support: Error in closing file\n");
    return 1; }

  if (fp != stdin && (fp = fopen(s, "r")) == NULL) {
    fprintf(stderr, "support: Error in opening file '%s'\n", s);
    return 1; }

  /* Find supported pairs */

  if ((supp = support(fp, pair, limit)) == NULL)
    return 1;

  if (fp != stdin && fclose(fp) != 0) {
    fprintf(stderr, "support: Error in closing file\n");
    return 1; }

  if (fp != stdin && (fp = fopen(s, "r")) == NULL) {
    fprintf(stderr, "support: Error in opening file '%s'\n", s);
    return 1; }

  /* Output entries */

  cmdarg = InitArgument(argc, argv);

  sprintf(t, "; 'support");
  ptr = strlen(t);
  while ((s = GetArgument(cmdarg)) != NULL) {
    sprintf(t+ptr, " -%s", s);
    ptr = strlen(t);
  }
  sprintf(t+ptr, "' was run on this file\n");

  if (output(fp, supp, option_s, argc, argv) == 1)
    return 1;

  if (fp != stdin && fclose(fp) != 0) {
    fprintf(stderr, "support: Error in closing file\n");
    return 1; }

  return 0;
}

void usage(void)
{
  fprintf(stderr,
	  "usage: support\n"
	  "            [-s | --support] [-l<limit> | --limit=<limit>] [<file>]\n");
}

int *pairnum(FILE *fp)
{
  int read_error;      /* For keeping track of errors in reading entries */
  int *pair;           /* Pair numbers */
  Colno *colno;        /* Column numbers */
  int len;             /* Sequence length */
  int i;
  int pairno;
  Header *header;
  Entry *entry;
  int no;

  colno = (Colno *)malloc(sizeof(Colno));

  if (fp != stdin) {
    header = MakeHeader();
    entry = MakeEntry();
    if (ReadHeader(fp, header) != 0)
      return NULL;
  }

  len = -1;

  no = 0;
  read_error = 0;
  while (fp == stdin || (read_error = ReadEntry(fp, entry)) == 0) {
    if (fp == stdin && (entry = colfile->entry[no++]) == NULL)
      break;

    if (!ReadType(entry, "RNA"))
      continue;
    if ((colno->nuc = ReadColno(entry, "residue")) == 0)
      colno->nuc = ReadColno(entry, "nucleotide");
    colno->align_bp = ReadColno(entry, "align_bp");
    colno->alignpos = ReadColno(entry, "alignpos");
    colno->seq_bp = ReadColno(entry, "seq_bp");
    colno->seqpos = ReadColno(entry, "seqpos");
    if (colno->nuc == 0 ||
	((colno->align_bp == 0 || colno->alignpos == 0) &&
	 (colno->seq_bp == 0 || colno->seqpos == 0))) {
      fprintf(stderr,
	      "support: Warning: Ignoring sequence, no column info\n");
      continue; }

    if (len == -1) {
      len = EntryLength(entry);
      pair = (int *)malloc(len * sizeof(int));
      for (i = 1; i <= len; i++)
	pair[i-1] = 0;
    }
    else if (len != EntryLength(entry)) {
      fprintf(stderr, "support: sequences of unequal length\n");
      return NULL;
    }

    for (i = 1; i <= len; i++) {
      pairno = FindPair(entry, i, colno->align_bp, colno->alignpos,
		             colno->seq_bp, colno->seqpos);
      if (pairno != 0) {  /* A pair is present */
	if (pair[i-1] == -1)
	  continue;
	else if (pair[i-1] == 0)
	  pair[i-1] = pairno;
	else if (pair[i-1] != pairno)
	  pair[i-1] = -1;      /* Inconsistency */
      }
    }
  }

  if (read_error == 1)
    return NULL;

  return pair;
}

char *support(FILE *fp, int *pair, double limit)
{
  int read_error;        /* For keeping track of errors in reading entries */
  int *neg_supp;           /* Counting negative support */
  int *pos_supp;           /* Counting positive support */
  char *supp;              /* Support ('S', '.', 'N') */
  Colno *colno;            /* Column numbers */
  int len;                 /* Sequence length */
  int i;
  char field[MAXCOLW];
  char nuc1, nuc2;
  Header *header;
  Entry *entry;
  int no;

  colno = (Colno *)malloc(sizeof(Colno));

  if (fp != stdin) {
    header = MakeHeader();
    entry = MakeEntry();
    if (ReadHeader(fp, header) != 0)
      return NULL;
  }

  len = -1;

  no = 0;
  read_error = 0;
  while (fp == stdin || (read_error = ReadEntry(fp, entry)) == 0) {
    if (fp == stdin && (entry = colfile->entry[no++]) == NULL)
      break;

    if (!ReadType(entry, "RNA"))
      continue;
    if ((colno->nuc = ReadColno(entry, "residue")) == 0)
      colno->nuc = ReadColno(entry, "nucleotide");
    colno->align_bp = ReadColno(entry, "align_bp");
    colno->alignpos = ReadColno(entry, "alignpos");
    colno->seq_bp = ReadColno(entry, "seq_bp");
    colno->seqpos = ReadColno(entry, "seqpos");
    if (colno->nuc == 0 ||
	((colno->align_bp == 0 || colno->alignpos == 0) &&
	 (colno->seq_bp == 0 || colno->seqpos == 0)))
      continue;

    if (len == -1) {
      len = EntryLength(entry);  
      neg_supp = (int *)malloc(len * sizeof(int));
      pos_supp = (int *)malloc(len * sizeof(int));
      for (i = 1; i <= len; i++) {
	neg_supp[i-1] = 0;
	pos_supp[i-1] = 0;
      }
    }
    else if (len != EntryLength(entry)) {
      fprintf(stderr, "support: sequences of unequal length\n");
      return NULL;
    }

    for (i = 1; i <= len; i++) {
      if (pair[i-1] > 0) { /* Consistent pair */
	GetField(field, entry, i, colno->nuc);
	nuc1 = field[0];
	GetField(field, entry, pair[i-1], colno->nuc);
	nuc2 = field[0];
	if (colno->align_bp != 0)
	  GetField(field, entry, i, colno->align_bp);
	else
	  GetField(field, entry, i, colno->seq_bp);
	if (strcmp(field, ".") != 0 &&
	    StdPair6(nuc1, nuc2))    /* Position pair */
	  pos_supp[i-1]++;
	else if (nuc1 != '-' || nuc2 != '-')
	  neg_supp[i-1]++;
      }
    }
  }

  if (read_error == 1)
    return NULL;

  supp = (char *)malloc(len * sizeof(char));
  for (i = 1; i <= len; i++)
    if (pair[i-1] == -1)
      supp[i-1] = 'N';   /* inconsistency */
    else if (pair[i-1] == 0)
      supp[i-1] = '.';   /* no pairs */
    else if ((double) pos_supp[i-1]/(neg_supp[i-1]+pos_supp[i-1]) >= limit)
      supp[i-1] = 'S';   /* support >= limit */
    else
      supp[i-1] = '.';   /* support < limit */

  return supp;
}

int output(FILE *fp, char *supp, int option_s, int argc, char **argv)
{
  int read_error;   /* For keeping track of errors in reading entries */
  int i;
  Colno *colno;     /* Column numbers */
  int len;
  Header *header;
  Entry *entry;
  int no;

  colno = (Colno *)malloc(sizeof(Colno));

  if (fp != stdin) {
    header = MakeHeader();
    entry = MakeEntry();
    if (ReadHeader(fp, header) != 0)
      return 1;
  }
  else
    header = colfile->header;

  AddHeaderInfo(header, argc, argv);

  PrintHeader(stdout, header);

  len = -1;

  no = 0;
  read_error = 0;
  while (fp == stdin || (read_error = ReadEntry(fp, entry)) == 0) {
    if (fp == stdin && (entry = colfile->entry[no++]) == NULL)
      break;

    if (!ReadType(entry, "RNA")) {
      PrintEntry(stdout, entry);
      continue; }
    if ((colno->nuc = ReadColno(entry, "residue")) == 0)
      colno->nuc = ReadColno(entry, "nucleotide");
    colno->align_bp = ReadColno(entry, "align_bp");
    colno->alignpos = ReadColno(entry, "alignpos");
    colno->seq_bp = ReadColno(entry, "seq_bp");
    colno->seqpos = ReadColno(entry, "seqpos");
    if (colno->nuc == 0 ||
	((colno->align_bp == 0 || colno->alignpos == 0) &&
	 (colno->seq_bp == 0 || colno->seqpos == 0))) {
      PrintEntry(stdout, entry);
      continue; }

    colno->supp = EnsureCol(entry, "support", "   .");

    len = EntryLength(entry);

    for (i = 1; i <= len; i++) {
      ChgField(entry, i, colno->supp, "   %c", supp[i-1]);
    }
    PrintEntry(stdout, entry);
  }

  if (read_error == 1)
    return 1;

  if (option_s == 1) {
    /* Output support as an entry */
    
    printf("; TYPE              support\n");
    printf("; COL 1             label\n");
    printf("; COL 2             residue\n");
    printf("; COL 3             seqpos\n");
    printf("; ENTRY             support\n");
    printf("; ----------\n");
    
    for (i = 0; i < len; i++)
      printf("S     %c %5d\n", supp[i], i+1);
    
    printf("; **********\n");
  }

  return 0;
}
