/*********************************************************************

  Converts genbank files to col format

  See man page for more info

  001104 Bjarne Knudsen (bk@daimi.au.dk)

  Copyright (C) 2000 Bjarne Knudsen

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  02111-1307, USA.

*********************************************************************/

#include <stdio.h>
#include <string.h>
#include "../clib/file.h"
#include "../clib/llist.h"
#include "../clib/dna.h"
#include "../clib/protein.h"

#define MAXLEN 100  /* Maximum line length */
#define MAXLOC 10000 /* Maximum length of location description */
#define MAXACC 10000 /* Maximum length of accession */
#define START -1
#define END -2

typedef struct Loc {
  LList *range;
  LList *uncert;
} Loc;

typedef struct Range {
  int start;
  int end;
  int complement;
} Range;

void usage(void);
void PrintLoc(FILE *fp, Loc *loc);
Loc *ReadLoc(char *text);
void FreeLoc(Loc *loc);

int InLoc(Loc *loc, int pos);
int InLocRange(Loc *loc, int info_start, int info_end);
void annotate(Loc *loc, char *anno, int len, char sym);
void strand_annotate(Loc *loc, char *anno, int len);
void plus_annotate(Loc *loc, char *anno, int len, char sym, int ext);
void minus_annotate(Loc *loc, char *anno, int len, char sym, int ext);
void codon_annotate(Loc *loc, int *anno, int len, int codon);

int main(int argc, char **argv)
{
  FILE *fp;
  char *s;
  char t[MAXLEN];
  char t1[MAXLEN], t2[MAXLEN], junk[MAXLEN];
  char type[MAXLEN];
  int len;
  char *anno_intex;
  char *anno_strand;
  char *anno_outplus;       /* 'O' if position is going to be output
			   in the annotate option */
  char *anno_outminus;      /* 'O' if position is going to be output
			   in the annotate option */
  char *seq;
  int *anno_codon;
  char feature_type[MAXLEN];  /* The present feature type */
  char location_text[MAXLOC];      /* The location of a feature */
  char location_longtext[MAXLOC];      /* The location of a feature */
  int feature_start;
  int features;  /* =1 in features */
  int in_location;  /* =1 in location */
  int in_sequence;  /* =1 in sequence */
  int in_accession;  /* =1 in accession */
  char accession[MAXACC];
  int accptr;
  int newlocus;
  int codon_start;
  int i, pos, ptr;
  int option_info, option_longinfo, option_out;
  int option_g;
  int option_bothstrand;
  int option_anno;
  int option_posname;
  int option_ext;
  int info;
  int info_start, info_end;
  int cds;
  int newstart;
  char mutation;
  char nuc[5];      /* The area around info nucleotide */
  char oldnuc;
  int found;
  Loc *loc;
  void *elm;
  LList *feature_list;
  LListCounter *lcount;
  CmdArg *cmdarg;   /* Command line arguments */

  feature_list = MakeLList();
  
  cmdarg = InitArgument(argc, argv);
  option_anno = 0;
  option_g = 0;
  option_bothstrand = 0;
  option_out = 1;
  option_info = 0;
  option_longinfo = 0;
  option_ext = 0;
  option_posname = 0;
  mutation = '\0';

  while ((s = GetArgument(cmdarg)) != NULL)
    if (strcmp(s, "g") == 0)
      option_g = 1;
    else if (strcmp(s, "-gene") == 0)
      option_g = 1;
    else if (strcmp(s, "-bothstrand") == 0)
      option_bothstrand = 1;
    else if (strcmp(s, "-posname") == 0)
      option_posname = 1;
    else if (strncmp(s, "i", 1) == 0) {
      if (sscanf(&s[1], "%d%n",
		 &info, &len) != 1 ||
	  len+1 != strlen(s)) {
	usage();
	return 1; }
      option_info = 1;
      option_out = 0;
    }
    else if (strncmp(s, "-info=", 6) == 0) {
      if (sscanf(&s[6], "%d%n",
		 &info, &len) != 1 ||
	  len+6 != strlen(s)) {
	usage();
	return 1; }
      option_info = 1;
      option_out = 0;
    }
    else if (strncmp(s, "-longinfo=", 10) == 0) {
      if (sscanf(&s[10], "%d-%d%n",
		 &info_start, &info_end, &len) != 2 ||
	  len+10 != strlen(s)) {
	usage();
	return 1; }
      option_longinfo = 1;
      option_out = 0;
    }
    else if (strncmp(s, "-extend=", 8) == 0) {
      if (sscanf(&s[8], "%d%n",
		 &option_ext, &len) != 1 ||
	  len+8 != strlen(s)) {
      usage();
      return 1; }
    }
    else if (strncmp(s, "-mutation=", 10) == 0 && strlen(s) == 11) {
      mutation = s[10];
    }
    else if (strncmp(s, "-annotate=", 10) == 0) {
      option_anno = 1;
      ptr = 10;
      for (i = 10; s[i] != '\0'; i++)
	if (s[i] == ',') {
	  s[i] = '\0';
	  Push(feature_list, &s[ptr]);
	  ptr = i+1;
	}
      Push(feature_list, &s[ptr]);  /* And the last one */
    }
    else {
      usage();
      return 1; }
  
  if ((s = GetFilename(cmdarg)) == NULL)
    fp = stdin;
  else if (GetFilename(cmdarg) != NULL) {
    usage();
    return 1; }
  else if ((fp = fopen(s, "r")) == NULL) {
    fprintf(stderr, "gb2col: Error in opening file '%s'\n", s);
    return 1; }

  if (option_out == 1) {
    printf("; File generated by gb2col\n");
    printf("; ========================================================================\n");
  }

  newlocus = 1;

  while(fgets(t, MAXLEN, fp) != NULL) {
    if (newlocus == 1) { /* new locus */
      do {
	/* Remove junk between sequences */
	if (strncmp(t, "LOCUS ", 6) == 0)
	  break;
      } while(fgets(t, MAXLEN, fp) != NULL);
      newlocus = 0;
      if (sscanf(t, "LOCUS %s %s %s %s", t1, t2, junk, type) != 4) {
	fprintf(stderr, "Error in format\n");
	return 1; }
      len = atoi(t2);

      anno_intex = (char *)malloc((len) * sizeof(char));
      for (i = 0; i < len; i++)
	anno_intex[i] = '.';

      anno_strand = (char *)malloc((len) * sizeof(char));
      for (i = 0; i < len; i++)
	anno_strand[i] = '.';

      anno_outplus = (char *)malloc((len) * sizeof(char));
      for (i = 0; i < len; i++)
	anno_outplus[i] = '.';

      anno_outminus = (char *)malloc((len) * sizeof(char));
      for (i = 0; i < len; i++)
	anno_outminus[i] = '.';

      anno_codon = (int *)malloc((len) * sizeof(int));
      for (i = 0; i < len; i++)
	anno_codon[i] = 0;

      seq = (char *)malloc((len) * sizeof(char));
      for (i = 0; i < len; i++)
	seq[i] = '-';

      feature_type[0] = '\0';
      features = 0;
      in_location = 0;
      in_sequence = 0;
      codon_start = 1;
      cds = 0;
    }
    if (strcmp(t, "//\n") == 0) {
      if (option_info == 1) {
	printf("seq %c%c%c%c%c", nuc[0], nuc[1], nuc[2], nuc[3], nuc[4]);
	if (cds > 0) {
	  printf(" (%c", Nuc2Amino(nuc[3-cds], nuc[4-cds], nuc[5-cds]));
	  if (mutation != '\0') {
	    oldnuc = nuc[2];
	    nuc[2] = mutation;
	    printf(" -> %c) (%c -> %c)", Nuc2Amino(nuc[3-cds], nuc[4-cds], nuc[5-cds]), oldnuc, nuc[2]);
	  }
	  else
	    printf(")");
	}
	else if (cds < 0) {
	  printf(" (%c", Nuc2Amino(DNAcomplement(nuc[-1-cds]), DNAcomplement(nuc[0-cds]), DNAcomplement(nuc[1-cds])));
	  if (mutation != '\0') {
	    oldnuc = nuc[2];
	    nuc[2] = DNAcomplement(mutation);
	    printf(" -> %c) (%c -> %c)", Nuc2Amino(DNAcomplement(nuc[-1-cds]), DNAcomplement(nuc[0-cds]), DNAcomplement(nuc[1-cds])), oldnuc, nuc[2]);
	  }
	  else
	    printf(")");
	}
	printf("\n");
      }
      if (option_out == 1) {
	/* Plus strand */
	newstart = 1;
	for (i = 0; i < len; i++) {
	  if (option_anno == 1)
	    if (anno_outplus[i] != 'O') {
	      newstart = 1;
	      continue;
	    }
	  if (newstart == 1) {
	    printf("; TYPE              DNA\n");
	    printf("; COL 1             label\n");
	    printf("; COL 2             residue\n");
	    printf("; COL 3             seqpos\n");
	    if (option_g == 1) {
	      printf("; COL 4             intron/exon\n");
	      printf("; COL 5             codonpos\n");
	      printf("; COL 6             strand\n");
	    }
	    if (option_posname == 1 && option_anno == 1)
	      printf("; ENTRY             %s_%d_P\n", t1, i+1);
	    else
	      printf("; ENTRY             %s\n", t1);
	    printf("; ACCESSION         %s\n", accession);
	    printf("; LENGTH            %d\n", len);
	    printf("; ----------\n");
	    newstart = 0;
	  }
	  printf("N     %c %5d", seq[i], i+1);
	  if (option_g == 1) {
	    printf("     %c", anno_intex[i]);
	    if (anno_codon[i] == 0)
	      printf("     .");
	    else
	      printf(" %5d", anno_codon[i]);
	    printf("     %c", anno_strand[i]);
	  }
	  printf("\n");
	  if (i+1 == len || 
	      (option_anno == 1 &&
	       anno_outplus[i] == 'O' &&
	       anno_outplus[i+1] != 'O'))
	    printf("; **********\n");
	}
	/* Minus strand */
	if (option_anno == 1 || option_bothstrand == 1) {
	  newstart = 1;
	  for (i = len-1; i >= 0; i--) {
	    if (option_anno == 1)
	      if (anno_outminus[i] != 'O') {
		newstart = 1;
		continue;
	      }
	    if (newstart == 1) {
	      printf("; TYPE              DNA\n");
	      printf("; COL 1             label\n");
	      printf("; COL 2             residue\n");
	      printf("; COL 3             seqpos\n");
	      if (option_g == 1) {
		printf("; COL 4             intron/exon\n");
		printf("; COL 5             codonpos\n");
		printf("; COL 6             strand\n");
	      }
	      if (option_posname == 1 && option_anno == 1)
		printf("; ENTRY             %s_%d_M\n", t1, i+1);
	      else
		printf("; ENTRY             %s\n", t1);
	      printf("; ACCESSION         %s\n", accession);
	      printf("; LENGTH            %d\n", len);
	      printf("; STRAND            Minus\n");
	      printf("; ----------\n");
	      newstart = 0;
	    }
	    printf("N     %c %5d", DNAcomplement(seq[i]), i+1);
	    if (option_g == 1) {
	      printf("     %c", anno_intex[i]);
	      if (anno_codon[i] == 0)
		printf("     .");
	      else
		printf(" %5d", anno_codon[i]);
	      printf("     %c", anno_strand[i]);
	    }
	    printf("\n");
	    if (i == 0 || 
		(option_anno == 1 &&
		 anno_outminus[i] == 'O' &&
		 anno_outminus[i-1] != 'O'))
	      printf("; **********\n");
	  }
	}
	free(anno_intex);
	free(anno_strand);
	free(anno_outplus);
	free(anno_outminus);
	free(anno_codon);
	free(seq);
      }
      newlocus = 1;
      continue;
    }
    if (strncmp(t, "ACCESSION ", 10) == 0) {
      in_accession = 1;
      accptr = 0;
    }
    if (in_accession == 1) {
      if (strncmp(t, "ACCESSION ", 10) != 0 && t[0] != ' ') {
	in_accession = 0;
	accession[accptr] = '\0';
      }
      else if (accptr < MAXACC-100) {
	if (t[0] == ' ')
	  accession[accptr++] = '_';
	for (i = 12; t[i] != '\n'; i++)
	  if (t[i] == ' ')
	    accession[accptr++] = '_';
	  else
	    accession[accptr++] = t[i];
      }    
    }
    if (in_location == 1) {
      if (t[21] != '/' && t[0] == ' ' && t[5] == ' ' && strlen(location_text) < MAXLOC-81 && strlen(location_longtext) < MAXLOC-81) {
	sscanf(t, " %s", location_text+strlen(location_text));
	strcpy(location_longtext+strlen(location_longtext), t);
      }
      else {
	if ((loc = ReadLoc(location_text)) == NULL)
	  ;
	/*fprintf(stderr, "Location error in '%s'\n", location_text);*/
	in_location = 0;
      }
    }
    if (features == 1 && in_location == 0) {
      if ((t[0] != ' ' || t[5] != ' ') && feature_type[0] != '\0') {
	/* A feature ends here, time to annotate */
	if (option_anno == 1) {
	  /* are we interested in this feature? */
	  found = 0;
	  lcount = MakeCounter(feature_list, FIRST);
	  while ((elm = Next(lcount)) != NULL)
	    if (strcmp((char *)elm, feature_type) == 0) {
	      found = 1;
	      break;}
	  if (found == 1) {  /* yes */
	    plus_annotate(loc, anno_outplus, len, 'O', option_ext);
	    minus_annotate(loc, anno_outminus, len, 'O', option_ext);
	  }
	}
	if (strcmp(feature_type, "exon") == 0) {
	  strand_annotate(loc, anno_strand, len);
	  annotate(loc, anno_intex, len, 'E');
	}
	else if (strcmp(feature_type, "intron") == 0) {
	  strand_annotate(loc, anno_strand, len);
	  annotate(loc, anno_intex, len, 'I');
	}
	else if (strcmp(feature_type, "gene") == 0) {
	  strand_annotate(loc, anno_strand, len);
	  annotate(loc, anno_intex, len, 'G');
	}
	else if (strcmp(feature_type, "mRNA") == 0) {
	  strand_annotate(loc, anno_strand, len);
	  annotate(loc, anno_intex, len, 'M');
	}
	else if (strcmp(feature_type, "3'UTR") == 0) {
	  strand_annotate(loc, anno_strand, len);
	  annotate(loc, anno_intex, len, '3');
	}
	else if (strcmp(feature_type, "5'UTR") == 0) {
	  strand_annotate(loc, anno_strand, len);
	  annotate(loc, anno_intex, len, '5');
	}
	else if (strcmp(feature_type, "CDS") == 0) {
	  strand_annotate(loc, anno_strand, len);
	  codon_annotate(loc, anno_codon, len, codon_start);
	}
	if (option_info == 1) {
	  if (InLoc(loc, info) == 1 || InLoc(loc, info) == -1) {
	    if (strcmp(feature_type, "source") == 0)
	      ;
	    else {
	      if (strcmp(feature_type, "CDS") == 0) {
		printf("CDS %d", anno_codon[info-1]);
		cds = anno_codon[info-1];
		if (InLoc(loc, info) == -1)
		  cds *= -1;
	      }
	      else
		printf("%s", feature_type);
	      if (InLoc(loc, info) == 1)
		printf(" (+), ");
	      else
		printf(" (-), ");
	    }
	  }
	}
	FreeLoc(loc);
      }
      else if (option_longinfo == 1 && 
	       (InLocRange(loc, info_start, info_end) == 1 ||
		InLocRange(loc, info_start, info_end) == -1) &&
	       strcmp(feature_type, "source") != 0) {
	if (feature_start == 1) {
	  printf("%s", location_longtext);
	  feature_start = 0;
	}
	printf("%s", t);
      }
      if (t[0] != ' ') { /* end of features */
	features = 0;
      }
      else if (t[5] != ' ') { /* new feature */
	sscanf(t, " %s %s", feature_type, location_text);
	strcpy(location_longtext, t);
	in_location = 1;
	feature_start = 1;
      }
      else if (strcmp(feature_type, "CDS") == 0) {
	sscanf(t, " /codon_start=%d", &codon_start);
      }
    }
    else if (in_sequence == 1) {
      for (i = 10; t[i] != '\n'; i++) {
	if (isspace(t[i]))
	  continue;
	if (option_info == 1)
	  if (pos >= info-2 && pos <= info+2)
	    nuc[pos-info+2] = t[i];
	seq[pos-1] = t[i];
	pos++;
      }
    }
    if (strncmp(t, "FEATURES ", 9) == 0) { /* start of features */
      features = 1;
    }
    if (strncmp(t, "ORIGIN", 6) == 0) { /* start of sequence */
      pos = 1;
      in_sequence = 1;
    }
  }

  if (fp != stdin && fclose(fp) != 0) {
    fprintf(stderr, "gb2col: Error in closing file\n");
    return 1; }

  return 0;
}

void usage(void)
{
  fprintf(stderr, "Usage: gb2col [-g | --gene] [FILE]\n");
}

Loc *MakeLoc(void)
{
  Loc *loc;

  loc = (Loc *)malloc(sizeof(Loc));

  loc->range = MakeLList();
  loc->uncert = MakeLList();

  return loc;
}

void PrintLoc(FILE *fp, Loc *loc)
{
  Range *range;
  LListCounter *lcount;

  if (loc == NULL)
    return;

  fprintf(fp, "Certain:\n");
  lcount = MakeCounter(loc->range, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->complement == 1)
      fprintf(fp, " Comp:");
    if (range->start == START)
      fprintf(fp, " START-");
    else
      fprintf(fp, " %d-", range->start);
    if (range->end == END)
      fprintf(fp, "END\n");
    else
      fprintf(fp, "%d\n", range->end);
  }
  
  fprintf(fp, "Uncertain:\n");
  lcount = MakeCounter(loc->uncert, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->complement == 1)
      fprintf(fp, " Comp:");
    if (range->start == START)
      fprintf(fp, " START-");
    else
      fprintf(fp, " %d-", range->start);
    if (range->end == END)
      fprintf(fp, "END\n");
    else
      fprintf(fp, "%d\n", range->end);
  }
}

void FreeLoc(Loc *loc)
{
  if (loc != NULL) {
    DestroyLList(loc->range);
    DestroyLList(loc->uncert);
    free(loc);
  }
}

int InLoc(Loc *loc, int pos)
{
  Range *range;
  LListCounter *lcount;
  int startpos, endpos;

  if (loc == NULL)
    return 0;

  lcount = MakeCounter(loc->range, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->start == START)
      startpos = 1;
    else
      startpos = range->start;
    if (range->end == END)
      endpos = pos;
    else
      endpos = range->end;
    if (pos >= startpos && pos <= endpos) {
      if (range->complement == 0)
	return 1;
      else
 	return -1;
    }
 }

  lcount = MakeCounter(loc->uncert, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->start == START)
      startpos = 1;
    else
      startpos = range->start;
    if (range->end == END)
      endpos = pos;
    else
      endpos = range->end;
    if (pos >= startpos && pos <= endpos) {
      if (range->complement == 0)
	return 2;
      else
 	return -2;
    }
  }

  return 0;
}

int InLocRange(Loc *loc, int info_start, int info_end)
{
  Range *range;
  LListCounter *lcount;
  int startpos, endpos;

  if (loc == NULL)
    return 0;

  lcount = MakeCounter(loc->range, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->start == START)
      startpos = 1;
    else
      startpos = range->start;
    if (range->end == END)
      endpos = info_end;
    else
      endpos = range->end;
    if (info_end >= startpos && info_start <= endpos) {
      if (range->complement == 0)
	return 1;
      else
 	return -1;
    }
 }

  lcount = MakeCounter(loc->uncert, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->start == START)
      startpos = 1;
    else
      startpos = range->start;
    if (range->end == END)
      endpos = info_end;
    else
      endpos = range->end;
    if (info_end >= startpos && info_start <= endpos) {
      if (range->complement == 0)
	return 2;
      else
 	return -2;
    }
  }

  return 0;
}

Range *InitRange(int a, int b, int comp);

int rec_RL(Loc *loc, char *text, int *ptr, int comp);

Loc *ReadLoc(char *text)
{
  Loc *loc;
  int ptr;

  loc = MakeLoc();
  ptr = 0;

  if (rec_RL(loc, text, &ptr, 0) == 1) {
    free(loc);
    return NULL; }

  if (text[ptr] != '\0')
    return NULL;

  return loc;
}

int rec_RL(Loc *loc, char *text, int *ptr, int comp)
{
  int a, b, c, d;
  int n;
  int i, j;
  int len;
  int level;

  if (strncmp(text + *ptr, "complement(", 11) == 0) {
    *ptr += 11;
    rec_RL(loc, text, ptr, (comp+1)%2);
    if (text[(*ptr)++] != ')')
      return 1;
  }
  else if (strncmp(text + *ptr, "join(", 5) == 0) {
    *ptr += 5;
    if (comp == 0) {
      rec_RL(loc, text, ptr, comp);
      while (text[*ptr] == ',') {
	(*ptr)++;
	rec_RL(loc, text, ptr, comp);
      }
      if (text[(*ptr)++] != ')')
	return 1;
    }
    else {
      for (i = *ptr; text[i] != ')' && text[i] != '\0'; i++)
	;
      if (text[i] != ')')
	return 1;
      j = i;
      while (text[j] == ',' || text[j] == ')') {
	for (j = j-1; text[j] != '(' && text[j] != ','; j--)
	  ;
	j++;
	rec_RL(loc, text, &j, comp);
	for (j = j-1; text[j] != '(' && text[j] != ','; j--)
	  ;
      }
      *ptr = i+1;
    }
  }
  else if (strncmp(text + *ptr, "order(", 6) == 0) {
    *ptr += 6;
    if (comp == 0) {
      rec_RL(loc, text, ptr, comp);
      while (text[*ptr] == ',') {
	(*ptr)++;
	rec_RL(loc, text, ptr, comp);
      }
      if (text[(*ptr)++] != ')')
	return 1;
    }
    else {
      for (i = *ptr; text[i] != ')' && text[i] != '\0'; i++)
	;
      if (text[i] != ')')
	return 1;
      j = i;
      while (text[j] == ',' || text[j] == ')') {
	for (j = j-1; text[j] != '(' && text[j] != ','; j--)
	  ;
	j++;
	rec_RL(loc, text, &j, comp);
	for (j = j-1; text[j] != '(' && text[j] != ','; j--)
	  ;
      }
      *ptr = i+1;
    }
  }
  else {
    level = 0;
    for (i = 0;; i++) {
      if (text[*ptr+i] == '(')
	level++;
      else if (text[*ptr+i] == ')') {
	level--;
	if (level < 0)
	  break;
      }
      else if (text[*ptr+i] == '\0' || text[*ptr+i] == ',')
	break;
    }
    len = i;
    if (isupper(text[*ptr])) /* refers to another entry */
      ;
    else {
      for (i = 0; i < len-1; i++)
	if (text[*ptr+i] == '.' && text[*ptr+i+1] == '.')
	  break;
      if (i == len-1) { /* Not a range */
	if (sscanf(text + *ptr, "(%d.%d)%n", &a, &b, &n) == 2) {
	  if (n != len)
	    return 1;
	  else {
	    Enqueue(loc->uncert, InitRange(a, a, comp));
	    Enqueue(loc->uncert, InitRange(b, b, comp));
	  }
	}
	else if (sscanf(text + *ptr, "%d%n", &b, &n) == 1) {
	  if (n != len)
	    return 1;
	  else {
	    Enqueue(loc->range, InitRange(b, b, comp));
	  }
	}
	else if (sscanf(text + *ptr, "<%d%n", &b, &n) == 1) {
	  if (n != len)
	    return 1;
	  else {
	    Enqueue(loc->range, InitRange(START, b, comp));
	  }
	}
	else if (sscanf(text + *ptr, ">%d%n", &b, &n) == 1) {
	  if (n != len)
	    return 1;
	  else {
	    Enqueue(loc->range, InitRange(b, END, comp));
	  }
	}
	else
	  return 1;
      }
      else { /* a range */
	if (sscanf(text + *ptr, "(%d.%d)%n", &a, &b, &n) == 2) {
	  if (n != i) {
	    return 1;
	  }
	  else {
	    Enqueue(loc->uncert, InitRange(a, b-1, comp));
	  }
	}
	else if (sscanf(text + *ptr, "%d%n", &b, &n) == 1) {
	  if (n != i) {
	    return 1;
	  }
	}
	else if (sscanf(text + *ptr, "<%d%n", &b, &n) == 1) {
	  if (n != i) {
	    return 1;
	  }
	  else {
	    Enqueue(loc->uncert, InitRange(START, b-1, comp));
	  }
	}
	else {
	  return 1;
	}
	
	if (sscanf(text + *ptr + i+2, "(%d.%d)%n", &c, &d, &n) == 2) {
	  if (n != len-i-2) {
	    return 1;
	  }
	  else {
	    Enqueue(loc->range, InitRange(b, c, comp));
	    Enqueue(loc->uncert, InitRange(c+1, d, comp));
	  }
	}
	else if (sscanf(text + *ptr + i+2, "%d%n", &c, &n) == 1) {
	  if (n != len-i-2) {
	    return 1;
	  }
	  else {
	    Enqueue(loc->range, InitRange(b, c, comp));
	  }
	}
	else if (sscanf(text + *ptr + i+2, ">%d%n", &c, &n) == 1) {
	  if (n != len-i-2) {
	    return 1;
	  }
	  else {
	    Enqueue(loc->range, InitRange(b, c, comp));
	    Enqueue(loc->uncert, InitRange(c+1, END, comp));
	  }
	}
	else {
	  return 1;
	}
      }
    }

    *ptr += len;
  }

  return 0;
}

Range *InitRange(int a, int b, int comp)
{
  Range *range;

  range = (Range *)malloc(sizeof(Range));

  range->start = a;
  range->end = b;
  range->complement = comp;

  return range;
}

/* Annotate things that are certain */
void annotate(Loc *loc, char *anno, int len, char sym)
{
  int i;
  Range *range;
  LListCounter *lcount;
  int startpos, endpos;

  if (loc == NULL)
    return;

  lcount = MakeCounter(loc->range, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->start == START)
      startpos = 1;
    else
      startpos = range->start;
    if (range->end == END || range->end > len)
      endpos = len;
    else
      endpos = range->end;
    for (i = startpos; i <= endpos; i++) {
      if (sym == '3' || sym == '5') {
	anno[i-1] = sym;
      }
      else if (sym == 'E' || sym == 'I') {
	if (anno[i-1] != '3' && anno[i-1] != '5')
	  anno[i-1] = sym;
      }
      else if (sym == 'M') {
	if (anno[i-1] != '3' && anno[i-1] != '5' &&
	    anno[i-1] != 'E' && anno[i-1] != 'I')
	  anno[i-1] = sym;
      }
      else if (sym == 'G') {
	if (anno[i-1] != '3' && anno[i-1] != '5' &&
	    anno[i-1] != 'E' && anno[i-1] != 'I' &&
	    anno[i-1] != 'M')
	  anno[i-1] = sym;
      }
      else {
	if (anno[i-1] == '.')
	  anno[i-1] = sym;
      }
      
    }
  }
}

/* Annotate strand info that is certain */
void strand_annotate(Loc *loc, char *anno, int len)
{
  int i;
  Range *range;
  LListCounter *lcount;
  int startpos, endpos;

  if (loc == NULL)
    return;

  lcount = MakeCounter(loc->range, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->start == START)
      startpos = 1;
    else
      startpos = range->start;
    if (range->end == END || range->end > len)
      endpos = len;
    else
      endpos = range->end;
    for (i = startpos; i <= endpos; i++) {
      if (range->complement == 0)
	anno[i-1] = '+';
      else
	anno[i-1] = '-';
    }
  }
}

/* Annotate things that are certain on the plus strand */
void plus_annotate(Loc *loc, char *anno, int len, char sym, int ext)
{
  int i;
  Range *range;
  LListCounter *lcount;
  int startpos, endpos;

  if (loc == NULL)
    return;

  lcount = MakeCounter(loc->range, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->complement == 1)
      continue;
    if (range->start == START || range->start-ext < 1)
      startpos = 1;
    else
      startpos = range->start-ext;
    if (range->end == END || range->end+ext > len)
      endpos = len;
    else
      endpos = range->end+ext;
    for (i = startpos; i <= endpos; i++) {
      if (sym == '3' || sym == '5') {
	anno[i-1] = sym;
      }
      else if (sym == 'E' || sym == 'I') {
	if (anno[i-1] != '3' && anno[i-1] != '5')
	  anno[i-1] = sym;
      }
      else if (sym == 'M') {
	if (anno[i-1] != '3' && anno[i-1] != '5' &&
	    anno[i-1] != 'E' && anno[i-1] != 'I')
	  anno[i-1] = sym;
      }
      else if (sym == 'G') {
	if (anno[i-1] != '3' && anno[i-1] != '5' &&
	    anno[i-1] != 'E' && anno[i-1] != 'I' &&
	    anno[i-1] != 'M')
	  anno[i-1] = sym;
      }
      else {
	if (anno[i-1] == '.')
	  anno[i-1] = sym;
      }
      
    }
  }
}

/* Annotate things that are certain on the minus strand */
void minus_annotate(Loc *loc, char *anno, int len, char sym, int ext)
{
  int i;
  Range *range;
  LListCounter *lcount;
  int startpos, endpos;

  if (loc == NULL)
    return;

  lcount = MakeCounter(loc->range, FIRST);
  while ((range = Next(lcount)) != NULL) {
    if (range->complement == 0)
      continue;
    if (range->start == START || range->start-ext < 1)
      startpos = 1;
    else
      startpos = range->start-ext;
    if (range->end == END || range->end+ext > len)
      endpos = len;
    else
      endpos = range->end+ext;
    for (i = startpos; i <= endpos; i++) {
      if (sym == '3' || sym == '5') {
	anno[i-1] = sym;
      }
      else if (sym == 'E' || sym == 'I') {
	if (anno[i-1] != '3' && anno[i-1] != '5')
	  anno[i-1] = sym;
      }
      else if (sym == 'M') {
	if (anno[i-1] != '3' && anno[i-1] != '5' &&
	    anno[i-1] != 'E' && anno[i-1] != 'I')
	  anno[i-1] = sym;
      }
      else if (sym == 'G') {
	if (anno[i-1] != '3' && anno[i-1] != '5' &&
	    anno[i-1] != 'E' && anno[i-1] != 'I' &&
	    anno[i-1] != 'M')
	  anno[i-1] = sym;
      }
      else {
	if (anno[i-1] == '.')
	  anno[i-1] = sym;
      }
      
    }
  }
}

/* annotate codon positions */
void codon_annotate(Loc *loc, int *anno, int len, int codon)
{
  int comp;
  int i;
  Range *range;
  LListCounter *lcount;
  int startpos, endpos;

  if (loc == NULL)
    return;

  /* assume that the whole location is on the same strand */
  lcount = MakeCounter(loc->range, FIRST);
  if ((range = Next(lcount)) != NULL)
    comp = range->complement;

  if (comp == 0) {
    lcount = MakeCounter(loc->range, FIRST);
    while ((range = Next(lcount)) != NULL) {
      if (range->start == START)
	startpos = 1;
      else
	startpos = range->start;
      if (range->end == END || range->end > len)
	endpos = len;
      else
	endpos = range->end;
      for (i = startpos; i <= endpos; i++) {
	anno[i-1] = codon;
	codon = (codon%3)+1;
      }
    }
  }
  if (comp == 1) {
    lcount = MakeCounter(loc->range, FIRST);
    while ((range = Next(lcount)) != NULL) {
      if (range->start == START)
	startpos = 1;
      else
	startpos = range->start;
      if (range->end == END || range->end > len)
	endpos = len;
      else
	endpos = range->end;
      for (i = endpos; i >= startpos; i--) {
	anno[i-1] = codon;
	codon = (codon%3)+1;
      }
    }
  }
}
