/*********************************************************************

  txt2col.c

  usage: txt2col [-m] [FILE]

  This program takes input from stdin. The first line is assumed to be
  a pairing mask. Each consequtive line is aasumed to be a
  sequence. Such a line consists of two words, the first is a name and
  the second is the nucletoide sequence. If -m is used, the pairing
  mask is output in col format as well.

  Output is the col format, through stdout.

  00209 Bjarne Knudsen (bk@daimi.au.dk)

  Copyright (C) 2000 Bjarne Knudsen

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  02111-1307, USA.

*********************************************************************/

#include <stdio.h>
#include <ctype.h>
#include "clib/file.h"
#include "clib/col.h"

int main(int argc, char **argv)
{
  FILE *fp;
  char *s;              /* For read lines */
  char *mask;           /* The overall base-pairng mask */
  char *struc;          /* A specific base-pairng mask*/
  char *seq;            /* The sequence */
  char *name;           /* The sequence name */
  int len;              /* Alignment length */
  int seqnum;           /* The sequence number */
  int i, j;             /* Iterators */
  int maskno;           /* For identifying base-pairs */
  int nucpos;           /* For keeping track of the nucleotide
                           position */
  CmdArg *cmdarg;       /* Command line arguments */
  int option_m;
  char *t;
  int pairing;
  char *type;
  int first;

  cmdarg = InitArgument(argc, argv);

  type = "RNA";
  option_m = 0;

  while ((t = GetArgument(cmdarg)) != NULL)
    if (strcmp(t, "m") == 0)
      option_m = 1;
    else if (strcmp(t, "p") == 0)
      type = "protein";
    else if (strcmp(t, "d") == 0)
      type = "DNA";
    else if (strncmp(t, "-type=", 6) == 0)
      type = &t[6];
    else {
      fprintf(stderr, "Usage: txt2col [-m] [FILE]\n");
      return 1; }

  if ((t = GetFilename(cmdarg)) == NULL)
    fp = stdin;
  else if (GetFilename(cmdarg) != NULL) {
    fprintf(stderr, "Usage: txt2col [-m] [FILE]\n");
    return 1; }
  else if ((fp = fopen(t, "r")) == NULL) {
    fprintf(stderr, "txt2col: Error in opening file '%s'\n", t);
    return 1; }

  i = 0;
  pairing = 0;

  if (StrCmp(type, "RNA") == 0) {
    pairing = 1;
    while ((t = GetLine(fp)) != NULL) {
      if (StrnCmp(t, "pairingmask ", 12) == 0 ||
	  StrnCmp(t, "pairing_mask ", 13) == 0)
	break;
    }
  }

  if ((s = GetLine(fp)) == NULL) {
    fprintf(stderr, "No sequences read\n");
    if (StrCmp(type, "RNA") == 0)
      fprintf(stderr, "Did you remember 'pairingmask'?\n");
    return 1;
  }

  printf("; Generated by txt2col\n");
  printf("; ========================================================================\n");
  
  len = strlen(s)+MAXCOLW;  /* len is larger than any sequence length or name length */
  mask = (char *)malloc(len * sizeof(char));
  seq = (char *)malloc(len * sizeof(char));
  struc = (char *)malloc(len * sizeof(char));
  name = (char *)malloc(len * sizeof(char));

  if (pairing == 1 && sscanf(t, "%s %s", name, mask) != 2) {
    fprintf(stderr, "Structure line incorrect\n");
    return 1; }

  if (pairing == 1)
    len = strlen(mask);   /* len is the actual sequence length */
  else
    len = -1;

  if (option_m == 1 && pairing == 1) {
    /* Output mask in col format */
    printf("; TYPE              pairingmask\n");
    printf("; COL 1             label\n");
    printf("; COL 2             residue\n");
    printf("; COL 3             alignpos\n");
    printf("; ENTRY             pairingmask\n");
    printf("; ----------\n");
    
    for (i = 0; i < len; i++)
      printf("M     %c %5d\n", mask[i], i+1);
    
    printf("; **********\n");
  }

  /* Output sequences in col format */

  first = 1;

  for (seqnum = 1; first == 1 || (s = GetLine(fp)) != NULL; seqnum++) {
    first = 0;
    if (sscanf(s, "%s %s", name, seq) != 2) {
      if (sscanf(s, "%s", name) == EOF) /* empty line */
	continue;
      fprintf(stderr, "Sequence line %d incorrect\n", seqnum);
      return 1; }
    if (len == -1)
      len = strlen(seq);
    else if (strlen(seq) != len) {
      fprintf(stderr, "txt2col: warning: sequence has incorrect length\n");
      continue; }

    free(s);
    printf("; TYPE              %s\n", type);
    printf("; COL 1             label\n");
    printf("; COL 2             residue\n");
    printf("; COL 3             seqpos\n");
    printf("; COL 4             alignpos\n");
    if (pairing == 1)
      printf("; COL 5             align_bp\n");
    printf("; ENTRY             %s\n", name);
    printf("; ----------\n");

    /* Make pairing mask for this sequence */
    for (i = 0; i < len; i++)
      if (isupper(seq[i]))
	struc[i] = mask[i];
      else
	struc[i] = '-';

    nucpos = 1;
    for (i = 0; i < len; i++) {
      if (seq[i] == ' ')
	continue;
      else if (struc[i] != '-') {
	maskno = 0;
	for (j = 0; j <= i; j++)
	  if (struc[i] == struc[j])
	    maskno++;
	for (j = len-1;; j--)
	  if (struc[i] == struc[j]) {
	    maskno--;
	    if (maskno == 0)
	      break;
	  }
      }
      else
	j = i;
      
      /* Write position */
      if (pairing == 1) {
	if (!isalnum(seq[i]))
	  printf("G     %c     . %5d     .\n", seq[i], i+1);
	else
	  if (islower(seq[i]))
	    printf("N     %c %5d %5d     .\n", seq[i], nucpos++, i+1);
	  else
	    printf("N     %c %5d %5d %5d\n", seq[i], nucpos++, i+1, j+1);
      }
      else {
	if (!isalnum(seq[i]))
	  printf("G     %c     . %5d\n", seq[i], i+1);
	else
	  printf("N     %c %5d %5d\n", seq[i], nucpos++, i+1);
      }
    }
    printf("; **********\n");
  }

  if (fp != stdin && fclose(fp) != 0) {
    fprintf(stderr, "txt2col: Error in closing file\n");
    return 1; }

  return 0;
}
