/*********************************************************************

  greppos.c
  
  usage: greppos [-s -rRANGE -f] [RANGEFILE] [FILE]

  This program picks out specific positions in col format sequences.
  The header is preserved. RANGE example: '1-3,7,5'. This option would
  print positionss 1, 2, 3, 5 and 7 in that order. Input is through
  stdin or FILE, while output is through stdout. with the -f option a
  rangefile can be specified. This file could be:

  1-3,7
  5

  Which wiuld give the same as above. The -s option assumes that the
  range is ordered, e.g. 2, 5-8, 10. This speeds things up.

  000308 Bjarne Knudsen (bk@daimi.au.dk)

  Copyright (C) 2000 Bjarne Knudsen

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  02111-1307, USA.

*********************************************************************/

#include "clib/col.h"

void usage(void);
int inrange(int num, char *range);
int inorderrange(int num, char *range, int *ptr);

int main(int argc, char **argv)
{
  FILE *fp;
  Header *header;
  Entry *entry;
  int num_col;
  int read_error;   /* for keeping track of errors in reading entries */
  char *range;
  int i;
  CmdArg *cmdarg;   /* Command line arguments */
  char *s;          /* String for arguments */
  int option_f, option_s;
  int order_ptr;
  char field[MAXCOLW];
  int pos;

  /* default option */
  option_f = 0;
  option_s = 0;

  cmdarg = InitArgument(argc, argv);

  while ((s = GetArgument(cmdarg)) != NULL)
    if (strncmp(s, "r", 1) == 0 && option_f == 0)
      range = &s[1];
    else if (strncmp(s, "-range=", 7) == 0)
      range = &s[7];
    else if (strcmp(s, "s") == 0 && option_f == 0)
      option_s = 1;
    else if (strcmp(s, "-sorted") == 0 && option_f == 0)
      option_s = 1;
    else if (strcmp(s, "f") == 0)
      option_f = 1;
    else if (strcmp(s, "-file") == 0)
      option_f = 1;
    else {
      usage();
      return 1; }

  if (option_f == 0) {
    if ((s = GetFilename(cmdarg)) == NULL)
      fp = stdin;
    else if (GetFilename(cmdarg) != NULL) {
      usage();
      return 1; }
    else if ((fp = fopen(s, "r")) == NULL) {
      fprintf(stderr, "greppos: Error in opening file '%s'\n", s);
      return 1; }
  }
  else {
    if ((s = GetFilename(cmdarg)) == NULL) {
      usage();
      return 1; }
    else if ((fp = fopen(s, "r")) == NULL) {
      fprintf(stderr, "greppos: Error in opening file '%s'\n", s);
      return 1; }
    else if ((range = GetFile(fp)) == NULL) {
      fprintf(stderr, "greppos: Error in reading range file '%s'\n", s);
      return 1; }
    else if ((s = GetFilename(cmdarg)) == NULL)
      fp = stdin;
    else if (GetFilename(cmdarg) != NULL) {
      usage();
      return 1; }
    else if ((fp = fopen(s, "r")) == NULL) {
      fprintf(stderr, "greppos: Error in opening file '%s'\n", s);
      return 1; }
  }

  header = MakeHeader();
  entry = MakeEntry();

  if (ReadHeader(fp, header) != 0)
    return 1;

  AddHeaderInfo(header, argc, argv);

  PrintHeader(stdout, header);

  while ((read_error = ReadEntryText(fp, entry)) == 0) {
    num_col = ReadColno(entry, "alignpos");
    if (num_col == 0) 
      num_col = ReadColno(entry, "seqpos");

    order_ptr = 0;
    PrintEntryText(stdout, entry);
    for (i = 1; (read_error = ReadEntryLines(fp, entry, 1)) == 0; i++) {
      if (num_col == 0)
	pos = i;
      else {
	GetField(field, entry, 1, num_col);
	pos = atoi(field);
      }

      if ((option_s == 0 && inrange(pos, range)) ||
	  (option_s == 1 && inorderrange(pos, range, &order_ptr)))
	PrintEntryLines(stdout, entry);
    }

    if (read_error == 1)
      break;  /* An error has occurred */

    PrintEntryEnd(stdout, entry);
  }

  if (fp != stdin && fclose(fp) != 0) {
    fprintf(stderr, "greppos: Error in closing file\n");
    return 1; }

  if (read_error == 1)
    return 1;

  return 0;
}

void usage(void)
{
  fprintf(stderr,
	  "usage: greppos\n"
	  "            [-s | --sorted]  [-r<range> | -range=<range> | -f |\n"
	  "            --file] [<rangefile>] [<file>]\n");
}

/* returns true if num is in range */
int inrange(int num, char *range)
{
  int ptr, ptr2;
  int low, high;

  ptr = 0;

  while (range[ptr] != '\0') {
    for (ptr2 = ptr; range[ptr2] != '\0' && range[ptr2] != ',' &&
	             range[ptr2] != '\n'; ptr2++)
      ;
    if (sscanf(&range[ptr], " %d - %d", &low, &high) != 2) {
      if (sscanf(&range[ptr], " %d", &low) == 1)
	high = low;
      else
	continue;  /* Just continue if error in range */
    }
    if (low <= num && num <= high)
      return 1;
    ptr = ptr2;
    if (range[ptr] == ',' || range[ptr] == '\n')
      ptr++;
  }

  return 0;
}

/* returns true if num is in ordered range */
int inorderrange(int num, char *range, int *ptr)
{
  int ptr2;
  int low, high;

  while (range[*ptr] != '\0') {
    for (ptr2 = *ptr; range[ptr2] != '\0' && range[ptr2] != ',' &&
	             range[ptr2] != '\n'; ptr2++)
      ;
    if (sscanf(&range[*ptr], " %d - %d", &low, &high) != 2) {
      if (sscanf(&range[*ptr], " %d", &low) == 1)
	high = low;
      else
	continue;  /* Just continue if error in range */
    }
    if (low <= num && num <= high)
      return 1;
    else if (num <= high)
      return 0;
    *ptr = ptr2;
    if (range[*ptr] == ',' || range[*ptr] == '\n')
      (*ptr)++;
  }

  return 0;
}
