/* Last changed Time-stamp: <Nov 14 15:21:38 sabari> */

/*                
    Efficient detection of local RNA secondary structure changes induced
    by SNPs 
*/

#include <ctype.h>
#include <dirent.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <unistd.h>
#include "utils.h"
#include "alifold.h"
#include "aln_util.h"
#include "fold.h"
#include "fold_vars.h"
#include "pair_mat.h"
#include "params.h"
#include "PS_dot.h"
#include "read_epars.h"
#include "RNAsnp_cmdl.h"
#include "snpr.h"

void readSNP(char *snpFileName);
void readSequence( char** header, char** sequence);
char *read_fasta(char *seqFileName);
unsigned int  input_type;
char *input_string;

clock_t BeginTimer()
{
  //timer declaration
  clock_t Begin; //initialize Begin
  Begin = clock() ; //start the timer
  return Begin;
}

clock_t EndTimer(clock_t begin)
{
  clock_t End;
  End = clock() ;   //stop the timer
  return End;
}


//--------------------end include timer
static char rcsid[] = "$Id: rnasnp.c,v 1.1 2012/08/03 11:28:48 htafer Exp $";

/*--------------------------------------------------------------------------*/

int main(int argc, char *argv[])
{
  struct RNAsnp_args_info args_info;
  /*** SNP File **/
  char *snpFileName;
  snpFileName=NULL;

  /*** Seq File **/
  char *seqFileName;
  seqFileName=NULL;

  /** variables to read seq **/
  char *header, *sequence;
  header=NULL; sequence=NULL;
  
  /**
  *** Input stuff
  ***
  **/
  int pairdist;pairdist=120;
  int winsize;winsize=200;
  int regionX;regionX=20;
  int regionY;regionY=120;
  int winsizeFold;winsizeFold=200;
  int winsizeExt;winsizeExt=200;
  double preFactor;preFactor=1;
  int minLen=50;
  float cutoff=0.01;
  float alpha=1.0;
  int mode=1;
  int edist=0;
  int snpflag=0;
  char *rnasnppath;
  float pvalue1=0.4;
  float pvalue2=0.1;
  /*
  #############################################
  # check the command line parameters
  #############################################
  */
  if(RNAsnp_cmdline_parser (argc,argv,&args_info)!=0) exit(1);

  /*sequence file*/
  if(args_info.seq_given) seqFileName = strdup(args_info.seq_arg);
  /*target file*/
  if(args_info.snp_given) snpFileName = strdup(args_info.snp_arg);
  /* select the folding method */
  if(args_info.mode_given)   mode = args_info.mode_arg;
  /* option for ensemble euclidean distance */
  if(args_info.edist_given)  edist = args_info.edist_arg;
  /* set the maximum base pair span */
  if(args_info.span_given)   pairdist = args_info.span_arg;
  /* set the averaging windows*/
  if(args_info.winsize_given) winsize = args_info.winsize_arg;
  /* set the locality of the region to scan*/
  if(args_info.regionX_given) regionX = args_info.regionX_arg;
  /* set the locality of the structure to scan*/
  if(args_info.regionY_given) regionY = args_info.regionY_arg;
  /* set the averaging windowsfold*/
  if(args_info.winsizeFold_given) winsizeFold = args_info.winsizeFold_arg;
  /* set the minimum length of the substructure*/
  if(args_info.minLen_given) minLen = args_info.minLen_arg;
  /* set the cutoff for the base-pairs*/
  if(args_info.cutoff_given) cutoff = args_info.cutoff_arg;

//  /* set the alpha ratio forinside-outside paramter*/
//  if(args_info.alpha_given) alpha = args_info.alpha_arg;

  if(args_info.pvalue1_given) pvalue1 = args_info.pvalue1_arg;
  if(args_info.pvalue2_given) pvalue2 = args_info.pvalue2_arg;
  /* set the window extention*/
  if(args_info.winsizeExt_given) winsizeExt = args_info.winsizeExt_arg;
  /* set the averaging windowsfold*/
  if(args_info.boltzmannPreFactor_given) preFactor = args_info.boltzmannPreFactor_arg;

  /** Check for the input parameters **/
 
  /* check if the seq file is given*/
  if(seqFileName==NULL) {printf("Please try RNAsnp -h for more details about the execution\n");exit(0);}
  
  /* check if the RNASNPPATH environment variable is available*/
  rnasnppath=getenv("RNASNPPATH");
  if(rnasnppath==NULL){printf("\n\tPlease set the environment variable RNASNPPATH to the path where RNAsnp-1.1 directory is present and TRY AGAIN!!\n\t(e.g export RNASNPPATH=/software/RNASNP-1.1)\n\n");return 0;}

  /* check if the window length must be in the range from 100/200 to 800 and divisible by 50 */
  if(snpFileName==NULL){mode=3;}
  if( (mode==3 || mode==2) && (winsizeFold < 200 || winsizeFold > 800 || winsizeFold%50!=0) )
    {printf("Error: window length(w) must be in the range from 200 to 800 that is divisible by 50\n");return 0;}
  if( mode==1 && (winsizeFold < 100 || winsizeFold > 800 || winsizeFold%50!=0) )
    {printf("Error: window length(w) must be in the range from 100 to 800 that is divisible by 50\n");return 0;}

  if( (mode==3) && (winsizeExt < 100 || winsizeExt > 800 || winsizeExt%50!=0) )
    {printf("Error: window length(e) must be in the range from 100 to 800 that is divisible by 50\n");return 0;}

 
  /* alert warnings if the default values of the additional parameters are changed */
  /* added on Nov 29, 2015 */
  if( mode==1 && (minLen != 50 || cutoff != 0.01f) )
    {printf("\nWarnings: The default value has been changed for one of the additional parameters. Thus, the reporting p-value is not accurate. Please refer to the README file for more details. \n\n");}
  if( mode==2 && (winsize != 200 || pairdist  != 120 || regionX != 20 || regionY !=120 || cutoff != 0.01f) )
    {printf("\nWarnings: The default value has been changed for one of the additional parameters. Thus, the reporting p-value is not accurate. Please refer to the README file for more details. \n\n");}


  /* check if the environment variable is correct */ 
   char parFile[1024];
   sprintf(parFile,"%s/lib/distParam/W%dparameters.bin",rnasnppath,winsizeFold);
   if(access(parFile, F_OK) != 0){printf("Error: The parameter file couldn't be accessible at the path %s\n",parFile);return 0;}

  /* read the input sequence */
  //readSequence(&header,&sequence);
   sequence=read_fasta(seqFileName);

  /*###########################
    #If sequence is smaller than winsize, winsizefold, etc...
    #set it to the sequence length
    ###########################*/

  int seqLength; seqLength=strlen(sequence);

  /* alert warnings if the default values of the additional parameters are changed */
  /* added on Nov 29, 2015 */
  if( seqLength < winsizeFold )
    {printf("\nWarnings: The input sequence length %d is less than twice the size of chosen flanking size %d. Thus, the reporting p-value is not accurate. Please refer to the README file for more details. \n\n",seqLength, winsizeFold);}

  minLen = (minLen>seqLength?seqLength:minLen);
  winsize    =(winsize >seqLength    ?seqLength:winsize    );
  regionX    =(regionX >seqLength    ?seqLength:regionX    );
  regionY    =(regionY >seqLength    ?seqLength:regionY    );
  pairdist   =(pairdist>seqLength    ?seqLength:pairdist   );

  /* Print header for the output format */

  /*############################
  *** Go to the processing
  *############################*/
  /* If SNP file is not given */
  if(snpFileName==NULL)
  {
    int pid,pid_len,pos,seqlen;
    char *command;
    pid=getpid();
    pid_len=1+(int)log10(pid);
    pid_len+=13;
    snpFileName=(char *)malloc(pid_len*sizeof(char));
    sprintf(snpFileName,"/tmp/snp%d.txt",pid);
    
    FILE *fin;
    fin=fopen(snpFileName,"w");

    /* Introdue SNPs in all sequence position */
    pos=0; 
    while(sequence[pos]!='\0'){
      if(sequence[pos]=='G'){fprintf(fin,"G%dA\nG%dC\nG%dU\n",pos+1,pos+1,pos+1);}
      if(sequence[pos]=='C'){fprintf(fin,"C%dA\nC%dG\nC%dU\n",pos+1,pos+1,pos+1);}
      if(sequence[pos]=='A'){fprintf(fin,"A%dG\nA%dC\nA%dU\n",pos+1,pos+1,pos+1);}
      if(sequence[pos]=='U'){fprintf(fin,"U%dG\nU%dC\nU%dA\n",pos+1,pos+1,pos+1);}
       pos++;
    }
    fclose(fin);
    snpflag=1;
    mode=3;
    printf("SNP\tw\tSlen\tGC");
    if(mode==3){printf("\tinterval\td\tpvalue1\tewin\tinterval\td_max\tpvalue2");}
    if(edist==1){printf("\tedist");}
    printf("\n");
    processSNP(snpFileName, header, sequence, winsize, pairdist, regionX, regionY,winsizeFold,minLen,cutoff,alpha,mode,edist,snpflag,winsizeExt,rnasnppath,pvalue1,pvalue2);

    command=(char *)malloc((pid_len+4)*sizeof(char));
    sprintf(command,"rm %s",snpFileName);
    system((char *)command);
   }
  /* RNAsnp mode 1 and 2*/
  else
  {
  printf("SNP\tw\tSlen\tGC");
  if(mode==2){printf("\tmax_k\td_max\tp-value\tinterval\td\tp-value");}
  if(mode==1){printf("\tinterval\td_max\tp-value\tinterval\tr_min\tp-value");}
  if(mode==3){printf("\tinterval\td\tpvalue1\tewin\tinterval\td_max\tpvalue2");}
  if(edist==1){printf("\tedist");}
  printf("\n");
  processSNP(snpFileName, header, sequence, winsize, pairdist, regionX, regionY,winsizeFold,minLen,cutoff,alpha,mode,edist,snpflag,winsizeExt,rnasnppath,pvalue1,pvalue2);
  }
  if(header){
    free(header);
  }
  if(sequence){
    free(sequence);
  }
  /* free allocated memory of command line data structure */
  RNAsnp_cmdline_parser_free (&args_info);
  free(snpFileName);
  free(seqFileName);
}

void readSequence(char** header, char** sequence){
  /*
    ########################################################
    # handle user input from 'stdin'
    ########################################################
  */
  do{
    //int istty;
    //istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
    //if(istty) print_tty_input_seq_str("Input one sequence");
    
    
    /* extract filename from fasta header if available */
    while((input_type = get_input_line(&input_string, 0)) == VRNA_INPUT_FASTA_HEADER){
      (*header) = (char*) space(strlen(input_string)+1);
      (void) sscanf(input_string,"%s",(*header));
      free(input_string);
      input_string=NULL;
    }
    if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
    /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
    else{
      *(sequence)=strdup(input_string);
      free(input_string);
    }
    /* convert DNA alphabet to RNA if not explicitely switched off */
    str_DNA2RNA(*sequence);
    str_uppercase(*sequence);
    //if(istty) printf("lengths = %d\n", strlen(*sequence)); 
  }
  while(!strlen(*sequence));
}

char *read_fasta(char *seqFileName)
{
  FILE *fin;
  char *seq=NULL,ch;
  int j=1,len,flag=0;

  if(!(fin=fopen(seqFileName,"r"))){
    printf("Error: Can't read/find file %s\n", seqFileName);
    exit(0);
  }

  while(!feof(fin))
  {
    ch = fgetc( fin );
    if(ch=='>')flag=1;
    if(flag==1 && ch=='\n')flag=0;
    if(flag==0 && isalpha(ch)){
    seq=(char *)realloc (seq,j*sizeof(char));
    ch=toupper(ch);if(ch == 'T') ch = 'U';
    seq[j-1]=ch;
    j++;
         }
  }
  seq=(char *)realloc (seq,j*sizeof(char));
  seq[j-1]='\0';
  fclose(fin);
  return(seq);
}

