/* Last changed Time-stamp: <2012-08-03 3:05 sabari>*/
/*
    Efficient detection of local RNA seondary structure change
    induced by SNPs.
*/ 
    
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <ctype.h>
#include <string.h>
#include "LPfold.h"
#include "alifold.h"
#include "energy_const.h"
#include "read_epars.h"
#include "utils.h"
#include "fold_vars.h"
#include "fold.h"
#include "part_func.h"
#include "PS_dot.h"
#include "snpr.h"
#include "ipmpar.c"
#include "dcdflib.c"

PRIVATE int      compare(hits *elem1, hits *elem2);
PRIVATE float**  fmatrix(int rl,int rh,int cl,int ch,int val);
PRIVATE float**  compute_basepair(char *seq, int seqlen);
PRIVATE float**  compute_basepairRNAplfold(char *seq, int seqlen,int winsize, int pairdist, int regionX, int regionY);
PRIVATE float    free_array(float **arr,int len);
PRIVATE void     checkSNPALL(char *snpFileName, char *sequence,int winsizeFold, int winsize,int pairdist, int regionX, int regionY,int min,int Prog,int edist,int snpflag,int winsizeExt,float pvalue1,float pvalue2);

PRIVATE float**    compute_accBasepair(float **arr, int seqlen); 
PRIVATE distRes1*  compute_pccRNAplfold(float **wild,float **mut,int offset, int posMin, int posMax,int pairdist,int regionX, int regionY); 

PRIVATE float Pvalue(float d,float gc,int pos,int seqlen,int window,int opt);
PRIVATE FLT_OR_DBL* computeEnsembleEuclideanDistance(char *wtSeq, char *mutSeq);
PRIVATE distRes *Dresults;
		 
PRIVATE float**  dpmatrix(int rl, int rh, int cl, int ch);
PRIVATE void     output(int* count);
PRIVATE float** wild_plfoldBp;
PRIVATE float** wild_foldBp;
PRIVATE int posSubSeqMin, posSubSeqMax;
		 
PRIVATE snpRres* compute_bpd(float **wild, float **mut, int startCompute, int endCompute, int posSubSeqMin, int posSubSeqMax, int regionX, int regionY);
PRIVATE pccRes*  compute_pcc(float **wild,float **mut,int offset, int posMin, int posMax,int pairdist);

PRIVATE float bp;
PRIVATE float alpha;
PRIVATE int minLen;
PRIVATE int dot=0;
PRIVATE int SNPflag=0;
PRIVATE char *PATH;
PRIVATE int pflag=1,rstart,rend;
PRIVATE float rdist,rpvalue;
PRIVATE int temp;


PUBLIC void processSNP(char *snpFileName, char *header, char *sequence,int winsize, int pairdist, int regionX, int regionY,int winsizeFold,int min,float cutoff,float iop,int Prog,int edist,int snpflag,int winsizeExt,char *path,float pvalue1,float pvalue2){
  minLen=min;bp=cutoff;alpha=iop;PATH=path;
  checkSNPALL(snpFileName, sequence, winsizeFold,winsize,pairdist,regionX, regionY,minLen,Prog,edist,snpflag,winsizeExt,pvalue1,pvalue2);
}


void checkSNPALL(char *snpFileName, char* sequence, int winsizeFold, int winsize, int pairdist, int regionX, int regionY,int minLen,int Prog,int edist,int snpflag,int winsizeExt,float pvalue1,float pvalue2){
  int seqLength;
  seqLength=strlen(sequence);
  //File Related stuff
  FILE *snpFile;
  snpFile=NULL;
  int sizeOfSNPLine=16;
  /*##############################
   *Check if we have a snpFileName
   *##############################*/
  if(!(snpFile=fopen(snpFileName,"r"))){
    printf("Cant read/find file %s\n", snpFileName); 
    exit(0);
  }
  while(!feof(snpFile)) {
    char *mutantSequence;
    mutantSequence=strdup(sequence);
    char *snpLine,*tmpsnpLine;
    snpLine=NULL;
    snpLine = get_line(snpFile);
    
    if(snpLine==NULL){continue;}
    if(snpLine[0]=='#'){
      free(snpLine);
      continue;
    }
     tmpsnpLine=strdup(snpLine);
    /*###############################################
     * Start parsing the SNP. We need three arrays to
     * store the SNP information, and a variable to check 
     * if we have more SNP in a sequence than allowed
     *###############################################*/

    int sizeOfSNPLine=16;
    int numberOfSNPs =0;
    /*#######################################
     * Array containing the SNP informations
     * fromSNP, toSNP, posSNP
     *#######################################*/
    char *fromSNP;
    char *toSNP;
    int  *posSNP;
    fromSNP=(char*)space (sizeof(char) * sizeOfSNPLine);
    toSNP  =(char*)space (sizeof(char) * sizeOfSNPLine);
    posSNP =(int*) space (sizeof(int) * sizeOfSNPLine); 
    const char delimiter [] = "-";
    char *token;
    int count;
    count=0;
    /*#############################################
     *  Parse the entries in the SNP files with token
     *#############################################*/ 
    token=strtok(snpLine,"-");
    do{
      sscanf(token,"%c%d%c",(fromSNP+count),(posSNP + count),(toSNP + count));
      if(fromSNP[count]=='T')
         fromSNP[count]='U';
      if(toSNP[count]=='T')
         toSNP[count]='U';

      if((posSNP[count]-1)>=seqLength){
	printf("The position of the snp %s is farther away than the length of the sequence...skipping\n", token);
	token=strtok(NULL,"-");
      }
      else if(!(sequence[posSNP[count]-1]==fromSNP[count])){
	printf("The nucleotide to be changed %c is not the same as in the reference sequence %c->%c<-%c...skipping %s\n", 
	       fromSNP[count], sequence[posSNP[count]-2],sequence[posSNP[count]-1],sequence[posSNP[count]],token);
	token=strtok(NULL,"-");
      }
      else{
	mutantSequence[posSNP[count]-1]=toSNP[count];
	count++; 
	//resize the 3 arrays if necessary;
	if((sizeOfSNPLine -  count) < 1){
	  sizeOfSNPLine*=2;
	  fromSNP = (char*) realloc(fromSNP, sizeOfSNPLine * sizeof(char));
	  toSNP =   (char*) realloc(toSNP,   sizeOfSNPLine * sizeof(char));
	  posSNP =  (int* ) realloc(posSNP,  sizeOfSNPLine * sizeof(int));
	}
      }
      token=strtok(NULL,"-");
    }while(token);
    /*###############################################
     * Ok so now we parsed all SNP informations.
     * We need to fold and compare stuff
     * We first look at the very specific subsequence
     *################################################*/
     
    /*###############################################*
     * 
     *                        subsequence to fold 
     *            >-------------------|----------------------<
     *
     *  |-----------------------------*-------------------------------|
     *                               posSNP
     *
     *  Subsequence should be started at posSNP - winsizeFold and end at posSNP + winsizeFold
     *  The computation should start exactly at posSNP - winsizeFold
     *  So we need to define a variable for the 
     *##############################################*/
    
    /*#############################
     *position for the start and end 
     *of the subsequences
     #############################*/
    if(count<1){
      free(snpLine);
      free(fromSNP);free(toSNP);free(posSNP);
      continue;
    }
    count--;
    posSubSeqMin = (posSNP[0]-winsizeFold < 1?1:posSNP[0]-winsizeFold);
    posSubSeqMax = (posSNP[count]+winsizeFold > seqLength ? seqLength : posSNP[count]+winsizeFold);
    posSubSeqMin--, posSubSeqMax--;
    int subSeqLength=posSubSeqMax - posSubSeqMin+1;
 
 
    /*#############################
     * extract the subsequence
     * for the wild-type
     #############################*/
    
    char *wtSeq;
    wtSeq = (char*) space((posSubSeqMax -posSubSeqMin+2)*sizeof(char));
    strncpy(wtSeq, sequence + posSubSeqMin,  (posSubSeqMax - posSubSeqMin+1));
    strcat(wtSeq,"\0");
    
    /*#############################
     * extract the subsequence
     * for the mutant
     #############################*/
    char *mutSeq;
    mutSeq  = (char*) space((posSubSeqMax - posSubSeqMin+2) * sizeof(char));
    strncpy(mutSeq, mutantSequence + posSubSeqMin,  (posSubSeqMax- posSubSeqMin+1));
    strcat(mutSeq,"\0");
    
    int startCompute, endCompute;
    startCompute = (posSNP[0]-winsize < 1?1:posSNP[0]-winsize);
    endCompute   = (posSNP[count]+winsize > seqLength ? seqLength : posSNP[count]+winsize);
    
    /* Compute GC% of subsequence */
    float GC=0.0;
    int posSubSeq=0;
    while(wtSeq[posSubSeq]!='\0'){
      if(wtSeq[posSubSeq]=='G' || wtSeq[posSubSeq]=='C'){
	GC++;
      }
       posSubSeq++;
    }
    GC/=strlen(wtSeq);


   /*#########################################
      Compute RNAplfold stuff for mode 2 and 3
      ########################################*/
    int posSubSeqMax1,posSubSeqMin1;
    posSubSeqMin1 = posSubSeqMin; posSubSeqMax1 = posSubSeqMax;

    if(Prog==2 || Prog==3)
    {

	/* Compute the base pair probability matrix for wild-type,
           do it once for mode 3*/
	if(SNPflag==0)
        	wild_plfoldBp = compute_basepairRNAplfold(wtSeq, posSubSeqMax-posSubSeqMin+1,winsize,pairdist,regionX,regionY);
        
        /* Compute the base pair probability matrix for mutant */
	float **mutant_plfoldBp;
	mutant_plfoldBp = compute_basepairRNAplfold(mutSeq, (posSubSeqMax- posSubSeqMin+1),winsize,pairdist,regionX,regionY);

	/* Compute d_max/RNAplfold */
	snpRres *resSNPr;
	resSNPr = compute_bpd(wild_plfoldBp,mutant_plfoldBp, startCompute - posSubSeqMin, endCompute-posSubSeqMin, 0, posSubSeqMax - posSubSeqMin, regionX,regionY);

        /* Compute p-values for d_max and d#*/
        resSNPr->pvalue1=Pvalue(resSNPr->maxLeft,GC,posSNP[0],seqLength,winsizeFold,1);
        resSNPr->pvalue2=Pvalue(resSNPr->maxpos,GC,posSNP[0],seqLength,winsizeFold,2);

	/* print RNAplfold d_max and d# results */
        if(Prog==2)
        {
        printf("%s\t%d\t%d\t%.4f",tmpsnpLine,winsizeFold,seqLength,GC);
	printf("\t%d\t%4.4f\t%4.4f\t%d-%d\t%4.4f\t%4.4f",resSNPr->left+posSubSeqMin,resSNPr->maxLeft,resSNPr->pvalue1,resSNPr->left+posSubSeqMin,resSNPr->end+posSubSeqMin,resSNPr->maxpos,resSNPr->pvalue2);
        }

	if(Prog==3)
        {
	            /* define new subsequence positions from the local region identified using dmax
                       i.e local region + some flanking regions (defined using -e option)
		       This new subsequence is passed to RNAfold to find the exact local region */
                rstart=resSNPr->left+posSubSeqMin;rend=resSNPr->end+posSubSeqMin;
                temp=0;
                do{
                posSubSeqMin1 = (posSNP[0]-(winsizeExt+temp) < 1?1:posSNP[0]-(winsizeExt+temp));
                posSubSeqMax1 = (posSNP[count]+(winsizeExt+temp) > seqLength ? seqLength : posSNP[count]+(winsizeExt+temp));
                temp+=50;
                }while(rstart<posSubSeqMin1 || rend >posSubSeqMax1);
                temp-=50;
                posSubSeqMin1--, posSubSeqMax1--;
                pflag=0;
                
                if(resSNPr->pvalue2 < pvalue1)
	        {
                  pflag=1;
                  rstart=resSNPr->left+posSubSeqMin;rend=resSNPr->end+posSubSeqMin;rdist=resSNPr->maxpos;rpvalue=resSNPr->pvalue2;
                }
        }

	free(resSNPr);
	free_array(mutant_plfoldBp,(posSubSeqMax- posSubSeqMin+1));
    }

   /*########################
      Compute RNAfold stuff
      #######################*/

    if(Prog==1)
    {
        /*********************************************************************/
        // Run RNAfold with window similar to RNAplfold
        posSubSeqMax1=posSubSeqMax;posSubSeqMin1=posSubSeqMin;        
         
        /* Extract the subsequence for folding */
        char *wtSeq1,*mutSeq1;
        wtSeq1 = (char*) space((posSubSeqMax1 -posSubSeqMin1+2)*sizeof(char));
        strncpy(wtSeq1, sequence + posSubSeqMin1,  (posSubSeqMax1 - posSubSeqMin1+1));
        strcat(wtSeq1,"\0");
        mutSeq1  = (char*) space((posSubSeqMax1 - posSubSeqMin1+2) * sizeof(char));
        strncpy(mutSeq1, mutantSequence + posSubSeqMin1, (posSubSeqMax1- posSubSeqMin1+1));
        strcat(mutSeq1,"\0");

        /* Compute base pair probability matrix*/
        float **wild_foldBp;
        wild_foldBp = compute_basepair(wtSeq1, posSubSeqMax1-posSubSeqMin1+1);
        float **mutant_foldBp;
        mutant_foldBp = compute_basepair(mutSeq1, (posSubSeqMax1- posSubSeqMin1+1));

        int startCompute1=posSubSeqMin1+1;
        int endCompute1=posSubSeqMax1+1;

        /* Compute d_max and r_min together */
        pccRes *resPCCfold;
        resPCCfold = compute_pcc(wild_foldBp,mutant_foldBp,posSubSeqMin1, startCompute1 - posSubSeqMin1,endCompute1 - posSubSeqMin1,pairdist);

        /* Compute pvalues */
        Dresults->pvalue1=Pvalue(Dresults->maxDAll,GC,posSNP[0],seqLength,winsizeFold,3);
        resPCCfold->pvalue1=Pvalue(resPCCfold->minAll,GC,posSNP[0],seqLength,winsizeFold,4); 
        
        /* print the d_max and r_min values */
        printf("%s\t%d\t%d\t%.4f",tmpsnpLine,winsizeFold,seqLength,GC);
        printf("\t%d-%d\t%.4f\t%4.4f",Dresults->startAll,Dresults->endAll,Dresults->maxDAll,Dresults->pvalue1);
        printf("\t%d-%d\t%.4f\t%.4f",resPCCfold->startAll,resPCCfold->endAll, resPCCfold->minAll,resPCCfold->pvalue1);

        free(resPCCfold);free(Dresults);
        free(wtSeq1);free(mutSeq1);
        free_array(wild_foldBp,(posSubSeqMax1 - posSubSeqMin1+1));
        free_array(mutant_foldBp,(posSubSeqMax1- posSubSeqMin1+1));
    }
    if(Prog==3 && pflag==1)
    {

        /* extract the new subsequence with position returned by Prog 1 */
        char *wtSeq1,*mutSeq1;
        wtSeq1 = (char*) space((posSubSeqMax1 -posSubSeqMin1+2)*sizeof(char));
        strncpy(wtSeq1, sequence + posSubSeqMin1,  (posSubSeqMax1 - posSubSeqMin1+1));
        strcat(wtSeq1,"\0");
        mutSeq1  = (char*) space((posSubSeqMax1 - posSubSeqMin1+2) * sizeof(char));
        strncpy(mutSeq1, mutantSequence + posSubSeqMin1, (posSubSeqMax1- posSubSeqMin1+1));
        strcat(mutSeq1,"\0");

        /* Compute GC% of subsequence */
        float GC1=0.0;
        int posSubSeq=0;
        while(wtSeq1[posSubSeq]!='\0'){
        if(wtSeq1[posSubSeq]=='G' || wtSeq1[posSubSeq]=='C'){
	 GC1++;
         }
          posSubSeq++;
         }
         GC1/=strlen(wtSeq1);

        /* Compute base pair probability matrix*/
        float **wild_foldBp;
        wild_foldBp = compute_basepair(wtSeq1, posSubSeqMax1-posSubSeqMin1+1);
        float **mutant_foldBp;
        mutant_foldBp = compute_basepair(mutSeq1, (posSubSeqMax1- posSubSeqMin1+1));

        int startCompute1=posSubSeqMin1+1;
        int endCompute1=posSubSeqMax1+1;

        /* Compute d_max and r_min together */
        pccRes *resPCCfold;
        resPCCfold = compute_pcc(wild_foldBp,mutant_foldBp,posSubSeqMin1, startCompute1 - posSubSeqMin1,endCompute1 - posSubSeqMin1,pairdist);


        /* Compute p-values for the new SNP position and window length */
        int newWinsizeFold,newposSNP;
        newWinsizeFold=(winsizeExt+temp);
        newposSNP=posSNP[0];
        Dresults->pvalue1=Pvalue(Dresults->maxDAll,GC1,newposSNP,seqLength,newWinsizeFold,3);
        
        /* print the d_max and r_min results */
        pflag=0;
        if(Dresults->pvalue1 < pvalue2)
        {
        printf("%s\t%d\t%d\t%.4f",tmpsnpLine,winsizeFold,seqLength,GC);
        printf("\t%d-%d\t%4.4f\t%4.4f",rstart,rend,rdist,rpvalue);
        printf("\t%d",newWinsizeFold);
        printf("\t%d-%d\t%.4f\t%4.4f",Dresults->startAll,Dresults->endAll,Dresults->maxDAll,Dresults->pvalue1);
        pflag=1;
        }

        free(resPCCfold);free(Dresults);
        free(wtSeq1);free(mutSeq1);
        free_array(wild_foldBp,(posSubSeqMax1 - posSubSeqMin1+1));
        free_array(mutant_foldBp,(posSubSeqMax1- posSubSeqMin1+1));
    }

    /*###########################
      Compute euclidean distance
      between two ensembles
      *#########################*/
    if(edist==1 && pflag==1)
    {
	FLT_OR_DBL *resultsSTUDLA; resultsSTUDLA=computeEnsembleEuclideanDistance(wtSeq,mutSeq);
	//printf("\t%g\t%g\n",2*resultsSTUDLA[2]/(resultsSTUDLA[0]+resultsSTUDLA[1]),resultsSTUDLA[0]+resultsSTUDLA[1]-2*resultsSTUDLA[2]);
	printf("\t%.4f",2*resultsSTUDLA[2]/(resultsSTUDLA[0]+resultsSTUDLA[1]));
	free(resultsSTUDLA);
    }

    if(Prog!=3) printf("\n"); 
    if(Prog==3 && pflag==1) printf("\n"); 
    if(snpflag==1) 
	SNPflag++;
    if(SNPflag==3)
    {
       SNPflag=0;
       if(Prog==1||Prog==3)
	  free_array(wild_plfoldBp,(posSubSeqMax- posSubSeqMin+1));
    }

  free(fromSNP);free(toSNP);free(posSNP);free(snpLine);free(tmpsnpLine);
  free(wtSeq);free(mutSeq);
  free(mutantSequence);
  mutantSequence=NULL;
  }
    fclose(snpFile); 
    if((SNPflag>0 && (Prog==2||Prog==3)) || (snpflag==0 && (Prog==2||Prog==3)))
      free_array(wild_plfoldBp,(posSubSeqMax- posSubSeqMin+1));
}


PRIVATE float **compute_basepair(char *seq, int seqlen)
{

   int i, j, n;
   char *structure=NULL;
   float pf,mfe,kT,sfact=1.07;
   float** arr;
   /*##################################################
    *Declare the arr array to collect probability values
    *it has length seqlen in both dimensions
    ##################################################*/
   arr = dpmatrix(0,seqlen,0,seqlen);
   n = seqlen;
   structure = (char*) space(sizeof(unsigned)*(n+1));
   dangles = 2;
   boltzmannPreFactor=1.0;
   noLonelyPairs = 0;
   mfe = fold(seq, structure);
   do_backtrack = 1;
   kT = (temperature+273.15)*1.98717/1000.; /* in Kcal */
   pf_scale = exp(-(sfact*mfe)/kT/n);
   pf = pf_fold(seq, structure);
   //printf("-------------\n");
   for(i=1;i<n;i++){
     for(j=i+3+1;j<=n;j++)
       if(pr[iindx[i]-j] > bp)
	{ 
	  arr[i-1][j-1]=pr[iindx[i]-j];
	  arr[j-1][i-1]=pr[iindx[i]-j];
	}
   }
   //if(dot)PS_dot_plot(seq, ffname);
   free(structure);
   free_pf_arrays();
   free_arrays(); 
   return arr;
 }

 PRIVATE float** compute_accBasepair(float **arr, int seqlen)
 {
   // Function to accumulate the RNAfold base-pair probability matrix 
   int i,j;
   for(i=0;i<seqlen;i++)
   {
    for(j=(i+1);j<seqlen;j++)
    {arr[i][j]+=arr[i][j-1];}
    for(j=(i-1);j>=0;j--)
    {arr[i][j]+=arr[i][j+1];}
   }
   return arr;
 }

PRIVATE float **compute_basepairRNAplfold(char *seq, int seqlen,int winsize, int pairdist, int regionX, int regionY)
{

  int i, j, n;
  char *structure=NULL;
  float mfe,kT,sfact=1.07;
  plist *pf=NULL;
  plist *dpp=NULL;
  float **dpMatrix;
  int pf_scale  = -1;
  int  wSize, lSize;
  wSize=winsize; lSize=pairdist;
  if(seqlen < lSize){
    wSize = seqlen;
    lSize = seqlen;
  }
  else if(seqlen < wSize){
    wSize=seqlen;
  }
  dpMatrix = dpmatrix(0,seqlen,0,seqlen);
  pf = pfl_fold(seq, wSize, lSize, 0, NULL, &dpp, NULL, NULL);
  i=0;
  while(pf[i].j>0){
    if(pf[i].p>bp)
    {
    dpMatrix[pf[i].i][pf[i].j] = pf[i].p;
    dpMatrix[pf[i].j][pf[i].i] = pf[i].p;
    }
    i++;
   }
   free(pf);
   return dpMatrix;
}



PRIVATE snpRres*  compute_bpd(float **wild, float **mut, int startCompute, int endCompute,
			      int posSubSeqMin, int posSubSeqMax, int regionX, int regionY)
{
  // Function to compute distance using RNAplfold matrix 
  int pos, posI,posJ;
  int localityX, localityY;
  localityX = regionX; localityY=regionY;
  float sum;float max;int maxpos;
  float previousSum;
  snpRres *results; results=(snpRres*) space(sizeof(snpRres));
  max=0;
  sum=0;
  //Compute first element for the recursion computation of dk
  pos=startCompute;
  for(posI=pos; posI < pos+localityX && posI <= endCompute; posI++){
    for(posJ=posI; posJ < posI + localityY && posJ <= endCompute; posJ++){
      float diff;
      diff=(wild[posI][posJ]-mut[posI-posSubSeqMin][posJ-posSubSeqMin]);
      sum+=diff*diff;
    }
  }
  //##############################
  //Start recursion for 5UTR
  //##############################
  maxpos=startCompute;
  max=sum;
  for(pos=startCompute+1; pos<= endCompute; pos++){
    posI = pos + localityX+1;
    for(posJ=posI; posJ < posI + localityY && posJ <= endCompute; posJ++){
      float diff;
      diff=(wild[posI][posJ]-mut[posI-posSubSeqMin][posJ-posSubSeqMin]);
      sum+=diff*diff;
    }
    posI = pos;
    for(posJ=posI; posJ < posI + localityY && posJ <= endCompute; posJ++){
      float diff;
      diff=(wild[posI][posJ]-mut[posI-posSubSeqMin][posJ-posSubSeqMin]);
      sum-=diff*diff;
    }
    if(max<sum){
      max=sum;
      maxpos=pos;
    }
  }

  results->left = maxpos;
  results->maxLeft = max;

  //#################################
  // Function to find the exact interval of max difference
  //##################################

  int maxend;max=0;
  for(posJ=maxpos+localityX;posJ<maxpos+localityX+localityY && posJ<=endCompute;posJ++){
    sum=0;
     
    for(posI=maxpos;posI<posJ;posI++){
      for(pos=(posI+1);pos<=posJ;pos++)
      {
        float diff;
        diff=(wild[posI][pos]-mut[posI][pos]);
        sum+=diff*diff;
      }
                                      }
    //printf("%d %d\n",maxpos,posJ);
    sum/=(posJ-maxpos);
    if(max<sum){
      max=sum;
      maxend=posJ;
    }
  }
  results->end=maxend;
  results->maxpos=max;
  results->pvalue1=0;
  results->pvalue2=0;
  return results;
}

PRIVATE pccRes* compute_pcc(float **wild,float **mut, int offset, int posMin, int posMax,int pairdist){
  // Function to compute the PCC and distance using RNAfold matrix
  float b,wout,mout,wins,mins;
  float emn,em,en,em2,en2,test,bx,by,r,top;
  float m,n;
  int x,y,z,cnt,N,flag=0,count=0;
  float min;
  int len;
  len=(posMax-posMin+1);

  // compute distance along side
  float **distanceAll;
  distanceAll=dpmatrix(0,len,0,len);
  for(x=0;x<len;x++)
  {
   for(y=(x+1);y<len;y++){
      distanceAll[x][y]=(wild[x][y]-mut[x][y])*(wild[x][y]-mut[x][y]);
      distanceAll[x][y]+=distanceAll[x][y-1];
      }
  }
  float sum,max;
  max=0; 

  Dresults=(distRes*) space(sizeof(distRes));
  pccRes *pccResult; 
  pccResult=(pccRes*) space(sizeof(pccRes));

  min=1;
  r=1; 

  wild=compute_accBasepair(wild,len);
  mut=compute_accBasepair(mut,len);

  for(x=posMin-1;x<posMax;x++){
   for(y=posMax-1;y>=(x+minLen-1);y--){
     emn=em=en=em2=en2=wins=mins=wout=mout=0.0;
     sum=0; //new
     cnt=0;
     for(z=x;z<=y;z++){
       // modified posMax to posMax-1 and posMin to posMin-1 #Sabari
       wout += (wild[z][posMax-1]-wild[z][y])+(wild[z][posMin-1]-wild[z][x]);
       mout += (mut[z][posMax-1]-mut[z][y]) + (mut[z][posMin-1]-mut[z][x]);
       
       wins += wild[z][y] + wild[z][x];
       mins += mut[z][y] + mut[z][x];

       //all
       m = wild[z][y] + wild[z][x];
       n = mut[z][y] + mut[z][x];  
       emn+=(m*n);
       em+=m;en+=n;
       em2+=(m*m);en2+=(n*n);

       //distance
       sum+=distanceAll[z][y]+distanceAll[z][x];
       
       cnt++;
     }

     wins *= 0.5; mins *= 0.5;
     wout *= alpha; mout *= alpha;
     if((wins>wout) || (mins>mout)) {
       N=cnt;
 
       test=(em*en)/N;
       top=emn-test;
       bx=em2-((em*em)/N);
       by=en2-((en*en)/N);
       b=sqrt(bx*by);
       
       if(top!=0 || b!=0) r=top/b;
       
       //PCC results
       if(min>r){pccResult->startAll=x+1+offset;pccResult->endAll=y+1+offset;pccResult->minAll=r;min=r;}
      
       //Distance results
       sum/=cnt;
       if(max<sum){Dresults->startAll=x+1+offset;Dresults->endAll=y+1+offset;Dresults->maxDAll=sum;max=sum;}
     }
   }
  } 
  free_array(distanceAll,len);
  Dresults->pvalue1=0;
  return pccResult;
}

PRIVATE int compare(struct hits *elem1, struct hits *elem2)
 {
    if ( elem1->val < elem2->val)
       return -1;

    else if (elem1->val > elem2->val)
       return 1;

    else
       return 0;
 }

PRIVATE float free_array(float **arr,int len)
{
  int i,j;
  for(i=0;i<=len;i++)
  {
    free(arr[i]);
  }
  free(arr);
}

PRIVATE float  *fvector( int l, int h, int val )
{
  int     i;
  float   *v;
  v = ( float * ) malloc(( unsigned ) ( h - l + 1 ) * sizeof(float));
  if ( !v ) {
    printf( "Cannot allocate fvector %i %i\n", l, h );
    exit( 1 );
  }
  v -= l;
  /* initalize vector to zero */
  for ( i=l; i<h; i++ )
    v[i] = val;
  return( v );
}


PRIVATE float  **fmatrix( int rl, int rh, int cl, int ch, int val )
{
  int     i;
  float   **m;
  m = (float **) malloc((unsigned) ( rh - rl + 1 ) * sizeof(float * ));
  if ( ! m ) {
    printf( "Cannot allocate fmatrix level 1 %i %i\n", rl, rh );
    exit( 1 );
  }
  m -= rl;
  for ( i=rl; i< rh; i++ )
    m[i] = fvector( cl, ch, val );
  return( m );
}

PRIVATE float **dpmatrix( int rl, int rh, int cl, int ch){
  float **m;
  int i;
  int rowNumber, colNumber;
  rowNumber=rh-rl+1;
  colNumber=ch-cl+1;
  m = (float **) space((rowNumber) * sizeof(float*));
  for(i=0; i<rowNumber; i++){
    m[i] = (float*) space((colNumber) * sizeof(float));
  }
  return m;
}

PRIVATE float Pvalue(float d, float gc, int pos, int seqlen,int window,int i)
{
  FILE *fin;
  char parFile[1024];
  size_t size;
  sprintf(parFile,"%s/lib/distParam/W%dparameters.bin",PATH,window);
  fin=fopen(parFile,"rb");
  //char *header;header=NULL;
  gc*=100;
  //header=get_line(fin);
  float pvalue=0;
  int flag=0,exflag=0;

  int tmppos,diff,counter;
  float tmp,tmp1;
  pvalueArray *record;
  record=(pvalueArray*) space(sizeof(pvalueArray));

  //find the snp position  
  if(pos>window && seqlen>=(window+window))
  {
    tmppos=pos+window;
    if(tmppos<=seqlen){pos=window;}
    else{diff=tmppos-seqlen;diff+=window;pos=diff;}
  }
  else if(pos>window && seqlen<(window+window))
  {
    tmppos=seqlen-pos;
    pos=(window+window)-tmppos;
  }
  //find the line number for fseek
  if(gc<20){counter=1;}
  else if(gc>80){counter=8;}
  else{tmp=(int)gc;counter=(int)(tmp/10);}
  counter+=((pos-1)*8);
  counter--;

  fseek(fin,sizeof(pvalueArray)*counter,SEEK_SET);
  fread(record,sizeof(pvalueArray),1,fin);
 
   // p-vlalue for RNAplfold d_max
   if(i==1 && record->plfoldmu1!=0 && record->plfoldsig1!=0)
   {
     tmp=1-log(d);
     pvalue=exp(-(exp(-(tmp-record->plfoldmu1)/record->plfoldsig1)));
     exflag=1;
   }
   // p-vlalue for RNAplfold d#
   if(i==2 && record->plfoldmu2!=0 && record->plfoldsig2!=0)
   {
     tmp=1-log(d);
     pvalue=exp(-(exp(-(tmp-record->plfoldmu2)/record->plfoldsig2)));
     exflag=1;
   }
   // p-vlalue for RNAfold dmax
   if(i==3 && record->foldmu1!=0 && record->foldsig1!=0)
   {
     tmp=1-log(d);
     pvalue=exp(-(exp(-(tmp-record->foldmu1)/record->foldsig1)));
     exflag=1;
   }
   // p-vlalue for RNAfold rmin
   if(i==4 && record->alpha!=0 && record->beta!=0)
   {
    int which = 1;
    double p = 0;
    double q = 0;
    double x = (d+1)/2;
    double y = 1-x;
    int status = 1;
    double bound = 0;

    cdfbet ( &which, &p, &q, &x, &y,  &record->alpha, &record->beta, &status, &bound );
    pvalue=p;
    exflag=1;
   }
  fclose(fin);
  free(record);
  return pvalue; 
}

PRIVATE FLT_OR_DBL* computeEnsembleEuclideanDistance(char *wtSeq, char *mutSeq){
  short *S_p, *S1_p;
  char *ptype_p;
  FLT_OR_DBL *qb_p,*qm_p,*q1k_p,*qln_p;
  FLT_OR_DBL *results; 
  results = (FLT_OR_DBL*) space(3*sizeof(FLT_OR_DBL));
  FLT_OR_DBL  zWtSeq;
  boltzmannPreFactor=1;
  pf_fold(wtSeq,NULL); //pf fold sequence
  get_pf_arrays(&S_p,&S1_p,&ptype_p,&qb_p,&qm_p,&q1k_p,&qln_p);//get folding arrays
  zWtSeq = qln_p[1];
  free_pf_arrays(); //free arrays, next folding
     
     
  FLT_OR_DBL  zMutSeq;
  pf_fold(mutSeq,NULL); //pf fold sequence
  get_pf_arrays(&S_p,&S1_p,&ptype_p,&qb_p,&qm_p,&q1k_p,&qln_p);//get folding arrays
  zMutSeq = qln_p[1];
  free_pf_arrays(); //free arrays, next folding


  FLT_OR_DBL hZWtSeq;
  boltzmannPreFactor=0.5;
  pf_fold(wtSeq,NULL); //pf fold sequence
  get_pf_arrays(&S_p,&S1_p,&ptype_p,&qb_p,&qm_p,&q1k_p,&qln_p);//get folding arrays
  hZWtSeq = qln_p[1];
  free_pf_arrays(); //free arrays, next folding

   
  FLT_OR_DBL hZMutSeq;
  boltzmannPreFactor=0.5;
  pf_fold(mutSeq,NULL); //pf fold sequence
  get_pf_arrays(&S_p,&S1_p,&ptype_p,&qb_p,&qm_p,&q1k_p,&qln_p);//get folding arrays
  hZMutSeq = qln_p[1];
  free_pf_arrays(); //free arrays, next folding
  
  FLT_OR_DBL hZMutWtSeq;
  char *seqArray[3];
  boltzmannPreFactor=0.5;
  seqArray[0]=wtSeq;
  seqArray[1]=mutSeq;
  seqArray[2]=NULL;
  strict=1;cv_fact=0;
  alipf_fold((const char **)seqArray, NULL, NULL);
  get_alipf_arrays(&S_p,&qb_p,&qm_p,&q1k_p,&qln_p);//get folding arrays
  hZMutWtSeq = qln_p[1];
  free_alipf_arrays();

  FLT_OR_DBL a,b,c,d,e;
  a=hZWtSeq/zWtSeq;a/=(zWtSeq);a/=exp(strlen(mutSeq)*log(pf_scale));
  b=hZMutSeq/zMutSeq;b/=(zMutSeq);b/=exp(strlen(mutSeq)*log(pf_scale));
  c=hZMutWtSeq/zWtSeq;c/=(zMutSeq);c/=exp(strlen(mutSeq)*log(pf_scale));
  results[0]=a;
  results[1]=b;
  results[2]=c;
  boltzmannPreFactor=1.0;
  return results;
}


