#!/usr/bin/perl

# RNAsnp-1.2 
# Copyright (C) 2013 Radhakrishnan Sabarianthan <sabari@rth.dk>
#
# Perl template program to access the web server RNAsnp for predicting the effect
# SNPs on local RNA secondary structure.  
#
# Usage: Type "./rnasnp.pl" in your terminal.

use strict;
use warnings;
use Getopt::Long qw(:config no_ignore_case bundling);
use HTTP::Request::Common qw(POST);
use LWP::UserAgent;
use HTML::TreeBuilder;
use Data::Dumper;

my %opts=(f => "", s => "", m => 1, w => 200,
          #params of mode 1#
	  M => 1, l => 50, c => 0.01,
          #params of mode 2#
      	  W => 200, L => 120, X => 20, Y => 120, c2 => 0.01,
	  #params of mode 3#
	  pvalue1 => 0.1, pvalue2 => 0.05, e => 200, h => 0);

GetOptions(\%opts, qw(f=s s=s m=i M=i w=i l=i c=f W=i L=i X=i Y=i pvalue1=f pvalue2=f e=i h)) or exit;

# Argument check
if($opts{f} eq "" || $opts{h}==1){
	print "RNAsnp-1.2\n\n";
	print "This script is used to access the RNAsnp web server for predicting SNPs effect on local RNA secondary structure and parse the output.\n\n";
	print "Usage: ./rnasnp.pl -f <seq_file> -s <snp_file> [OPTIONS]\n\n";

	print "\t-f <STRING>\tFile containing the input sequence in FASTA format\n";
	print "\t-s <STRING>\tFile containing the list of SNPs\n";
	print "\t-m <INT>\tmode of operation (default='1')\n";
	print "\t\t\t 1 - based on global folding (using RNAfold)\n";
	print "\t\t\t 2 - based on local folding (using RNAplfold)\n";
	print "\t\t\t 3 - to screen putative structure-disruptive SNP\n";

        print "\t[OPTIONS]\n";
        print "\t-w <INT>\tsize of flanking regions on either side of SNP (default='200')\n\n";
	print "\t-c <FLOAT>\tCut-off for the base pair probabilities (default='0.01')\n\n";
	print "\tParameters associated with (mode) -m 1:\n";
 	print "\t-M <INT>\tMeasures: 1 - distance; 2 - Correlation coefficient\n";
	print "\t-l <INT>\tMinimum length of the sequence interval (default='50')\n";
	print "\tParameters associated with (mode) -m 2:\n";
	print "\t-W <INT>\tAverage the pair probabilities over windows of size (default='200')\n";
	print "\t-L <INT>\tMaximum allowed base pair span \n";
	print "\t-X <INT>\tLength of the local structural element that we expect to have an effect (default='20')\n";
	print "\t-Y <INT>\tLength of the interval over which the local structural changes are evaluated (default='120')\n\n";
	print "\tParameters associated with (mode) -m 3:\n";
	print "\t--pvalue1 <FLOAT>\tP-value threshold to filter SNPs that are predicted using mode 2\n";
	print "\t--pvalue2 <FLOAT>\tP-value threshold to filter SNPs that are predicted using mode 1\n";
	print "\t-e <INT>\tMinimum length of flanking regions on either of SNP\n\n";
	print "Example: ./rnasnp.pl -f seq.txt -s snp.txt\n";
        exit;
}

# Check for input consistency

# read the input sequence file
open(IN,$opts{f}) or die "Error in opening the file $opts{f}\n";
my $sequence;
while(<IN>)
{
  if($_!~/^>/){chomp $_;$sequence.=$_;}
}
close IN;
$sequence.="\n";

# check the mode input value
unless($opts{m}==1 || $opts{m}==2 || $opts{m}==3)
{die "The value for -m must be either 1 or 2 or 3\n"; }

# check for the measure of mode 1
unless($opts{M} == 1 || $opts{M} ==2 )
{ die "The value for -M must be either 1 or 2\n"; }

# read the SNP file 
my $snp;
my $sleepSeconds; # variable to define the time gap to refresh the output page
if($opts{m} == 1 || $opts{m} == 2)
{
  open(IN,$opts{s}) or die "Error in openeing the file $opts{s}\n";
  while(<IN>)
  {
    if($_=~/[A-Z]\d{1,}[A-Z]/){$snp.=$_};
  }
  $sleepSeconds=15;
}

if($opts{m} ==3){$snp="XXX\n";$sleepSeconds=300;}

if($opts{w}%50 !=0)
{ die "The value for -w must be divisible by 50\n";}


# Set the values of for input fields
my %params = (
   RNASNP_af => $sequence,
   RNASNP_tf => $snp,
   RNASNP_mode => $opts{m},
   RNASNP_measureMode1 => $opts{M},
   RNASNP_winsizeFold1 => $opts{w},
   RNASNP_winsizeFold2 => $opts{w},
   RNASNP_l => $opts{l},
   RNASNP_c1 => $opts{c},
   RNASNP_c2 => $opts{c},
   RNASNP_W => $opts{W},
   RNASNP_L => $opts{L},
   RNASNP_X => $opts{X},
   RNASNP_Y => $opts{Y},
   RNASNP_pvalue1 => $opts{pvalue1},
   RNASNP_pvalue2 => $opts{pvalue2},
   RNASNP_e => $opts{e},
);

# Create a request
my $req = POST("http://rth.dk/resources/rnasnp/submit.php", [%params]);

# Retrieve the result for the request
my $ua = LWP::UserAgent->new;
my $res = $ua->request($req);

if ($res->is_success)
 {
	my $content = $res->content;
        # Retrieve the job id after submission
	my $tree = HTML::TreeBuilder->new;
	$tree->parse($content);
	my $jobid = $tree->find('b');
        my $jobID = $jobid->as_text;
        my $url="http://rth.dk/resources/rnasnp/results?jobid=$jobID";
        print "Job Id: $jobID\n\n";
        
        # find the status of the job
        my $res1=$ua->get($url);
        my $content1 = $res1->content;
        my $treeC = HTML::TreeBuilder->new;
        $treeC->parse($content1);
        my @p = $treeC->find('p');

        # if the status is running then refresh the page for every $sleepSeconds to 
        # get final output
        if($p[0]->as_text=~/running/ || $p[0]->as_text!~/Job ID:/)
        {
           do{
               sleep $sleepSeconds;
               my $res1=$ua->get($url);
               my $content1 = $res1->content;
               my $treeC = HTML::TreeBuilder->new;
               $treeC->parse($content1);
               @p = $treeC->find('p');
              }while($p[1]->as_text!~/Download/);
              # stop refreshing once the outputs are ready to Download

           # print the RNAsnp output of "Description" section
           my $csv="http://rth.dk/resources/rnasnp/output/$jobID/RNAsnp_results_$jobID.csv";
	   $csv=$ua->get($csv);
           my @res=$csv->content;
           foreach my $row (@res)
           {
             $row=~s/,/\t/g;
	     print $row;
           }

           # download all the outputs
           my $download="http://rth.dk/resources/rnasnp/output/$jobID/RNAsnp_results_$jobID.zip"; 
           my $down=$ua->get($download);
           open(OUT,">RNAsnp_results_$jobID.zip");
           print OUT $down->content;       
           close OUT;
           print "\nThe complete details of RNAsnp output is available in './RNAsnp_results_$jobID.zip'\n";
          }
          else{print $p[0],"\n";exit;}

}

else {print $res->status_line, "\n";}

print "\n";
