#!/usr/bin/perl -w

# user home settings
my $SUBDIR = "/home/users/seemann/nagnag";
my $RFAMMODELS = "/home/users/seemann/seq_data/rfamseq/Rfam.tar.gz";
my $PBSSUBMIT = "/home/users/seemann/projects/tools/pbsubmit.pl";
my $RFAMSEED = "/home/users/seemann/seq_data/rfamseq/Rfam.seed";

# server settings
my $SERVER = "fe4.dcsc.sdu.dk";
my $SERVER_HOME = "/people/disk1/seemann/ncRNA/ravenna";
my $RAVENNA_HOME = "/people/disk1/seemann/ncRNA/ravenna-0.2f";
my $QUEUE = "-q workq";  # format "-q <QUEUE>"
my $NODES = "-l nodes=1";  # format "-l nodes=<NUMBER>"
my $WALLT = "-l walltime=15:00:0";  # format "-l walltime=<h>:<min>:<s>"


sub help {
    print <<EOT;
Usage: ravenna.pl FILE [list of Rfam-Models | ALL]
Starts RaveNnA to find known RNA-Genes in the input sequences.

Arguments:
  FILE                   Query sequences in gziped fasta format.
  list of Rfam-Models    list of testing Rfam-Models (f.e. RF00001), optional
  ALL			 "ALL" for all Rfam-Models, same like no second argument
EOT
    exit;
}

sub sshcall {
    my $server = shift;
    my $cmd = shift;
    my $status = 1;
    my $counter = 11;
    my @res;

    while( $status!=0 ) {
	die("'$cmd' can not execute on '$server'!\n") if --$counter==0;
	if( $server ne "localhost" ) {
	    @res = `ssh $server \"$cmd\"`;
	}
	else {
	    @res = `$cmd`;
	}
	$status = $? / 256;
	print "Try '$cmd' on '$server' again in 60 seconds!\n" if $status!=0;
	sleep 1 if $status!=0;
    }

    return @res;
}

sub scpcall {
    my $path1 = shift;
    my $path2 = shift; 
    my ($server, $path3, $path4);
    my $status = 1;
    my $counter = 11;

    ($server, $path3) = $path1 =~ /(.*):(.*)/ if $path1 =~ /:/;
    ($server, $path4) = $path2 =~ /(.*):(.*)/ if $path2 =~ /:/;

    while( $status!=0 ) {
	die("Can not copy to '$server'!\n") if --$counter==0;
	if( $server ne "localhost" ) {
	    `scp $path1 $path2`;
	}
	else {
	    if(defined $path3) {
		`cp $path3 $path2`;
	    }
	    else {
		`cp $path1 $path4`;
	    }
	}
	$status = $? / 256;
	print "Try copy to '$server' again in 60 seconds!\n" if $status!=0;
	sleep 1 if $status!=0;
    }
} 

sub run_ravenna {
    my $fastafile = shift;
    my $RFAMMODELS = shift;
    my $PBSSUBMIT = shift;
    my $RFAMSEED = shift;
    my $RAVENNA_HOME = shift;
    my $SUBDIR = shift;
    my $refFamilies = shift; 

    my ($cmd, $family, @params, $line, $hours, $index, @qstat, @sshh, $status, @par);

    # publish the db in the RaveNnA config file
    open OUT, ">$SUBDIR/ravenna.config.tab" || die("Can not open the file!\n");
    print OUT "cmzashaExe\t$RAVENNA_HOME/src/release/cmzasha\nInfernalBinDir\t$RAVENNA_HOME/NotByZasha/infernal-0.7/src\nRfamDir\t$RAVENNA_HOME/data\nRsearchMatrixDir\t$RAVENNA_HOME/NotByZasha/rsearch-1.1-zasha/matrices\nPerlDir\t$RAVENNA_HOME/src\ndb\tESTs\t$RAVENNA_HOME/partFASTAs/partition.gz.list\t$RAVENNA_HOME/data/EmblIdAndOrganismCompact_Barrick3.tab\t$RAVENNA_HOME/data/default.heurcreationspec\n";
    close OUT;

    # create the heuristic HMM training file
    open OUT, ">$SUBDIR/default.heurcreationspec" || die("Can not open the file!\n");
    print OUT "genomes\t$RAVENNA_HOME/data/E_coli_NC_000913.fasta\t1\t$RAVENNA_HOME/data/Bordetella_bronchiseptica.fasta\t1\t$RAVENNA_HOME/data/S_aureus.fasta\t1\n";
    close OUT;

    # secure copy of all input data to ravenna home directory
    system(qq{scp $fastafile $SUBDIR/ravenna.config.tab $SUBDIR/default.heurcreationspec $RFAMMODELS $SERVER:$RAVENNA_HOME/data/});
    $fastafile =~ s/(.*)\/(.*)$/$2/;

    # uncompress the Rfam models
    system(qq{ssh $SERVER 'cd $RAVENNA_HOME/data; gunzip -c Rfam.tar.gz | tar xvf - >>$SERVER_HOME/stdout_ravenna.log; rm Rfam.tar.gz'}) && die("Can't create covariance models!\n");
     
    # secure copy of the pbs starting script 'pbsubmit.pl'
    system(qq{scp $PBSSUBMIT $SERVER:$SERVER_HOME});

    # create a list of fasta files (named 'partition.gz.list') as db-input for RaveNnA whereas each file includes maximal 5.000.000 nucleotids
    system(qq{ssh $SERVER "mkdir -p $RAVENNA_HOME/partFASTAs; rm $RAVENNA_HOME/partFASTAs/*; $RAVENNA_HOME/src/release/cmzasha --partition-fasta $RAVENNA_HOME/data/$fastafile $RAVENNA_HOME/partFASTAs 5000000 >>$SERVER_HOME/stdout_ravenna.log; gzip $RAVENNA_HOME/partFASTAs/*.fasta"}) && die("Can't create the RaveNna database!\n");
    
    # get parameters for RaveNnA from 'Rfam.seed'
    open IN, $RFAMSEED || die("Can not open the file!\n");
    if( $$refFamilies[0] eq "ALL" ) {
	while ( <IN> ) {
	    if( /^\#=GF AC\s*(RF\d{5})/ ) {
		$line = "$1\t";
	    }
	    elsif( /^\#=GF GA\s*(\d*.\d*)/ ) {
		$line = $line."$1\t";
	    }
	    elsif( /^\#=GF BM\s*cmsearch --local -W (\d*)/ ) {
		$line = $line."$1\tlocal";
		push @params, $line;
	    }
	    elsif( /^\#=GF BM\s*cmsearch -W (\d*)/ ) {
		$line = $line."$1\tglobal";
		push @params, $line;
	    }
	}
    }
    else {
	foreach $family ( @$refFamilies ) {
	    $status = 0;
	    while ( <IN> ) {
		if( /^\#=GF AC\s*(RF\d{5})/ ) {
		    if( $family eq $1 ) {
			$status = 1;
			$line = "$1\t";
		    }
		}
		elsif( $status==1 ) {
		    if( /^\#=GF GA\s*(\d*.\d*)/ ) {
			$line = $line."$1\t";
		    }
		    elsif( /^\#=GF BM\s*cmsearch --local -W (\d*)/ ) {
			$line = $line."$1\tlocal";
			push @params, $line;
			last;
		    }
		    elsif( /^\#=GF BM\s*cmsearch -W (\d*)/ ) {
			$line = $line."$1\tglobal";
			push @params, $line;
			last;
		    }
		}
	    }
	}
    }
    close IN;

    # run RaveNnA for each covariance model in @families (known ncRNA-families)
    #my $dummy = 1;
    print "Start RaveNnA on $SERVER.\n";

    # whoami on running machine
    @sshh = sshcall($SERVER, "whoami");
0;
    # loop until the data is tested against all covariance models
    while( @params > 0 ) {
	foreach( @params ) {
	    @par = split " ", $_;
	    $cmd = qq{perl $RAVENNA_HOME/src/ravenna.pl -configFile $RAVENNA_HOME/data/ravenna.config.tab -scoreThreshold $par[1] -database ESTs -$par[3] -cmFileName $RAVENNA_HOME/data/$par[0].cm -workDir /scratch $par[2]};
	    print "$cmd\n";
	    
	    # create the shell script 'pbs_ravenna.sh' to start the pbs job for one Rfam family
	    open OUT, ">$SUBDIR/pbs_ravenna.sh" || die("Can not open the file!\n");
	    print OUT "#!/bin/tcsh\ncd $RAVENNA_HOME\n$SERVER_HOME/pbsubmit.pl -o \"-N ravenna $NODES $WALLT\" $QUEUE -D -Q -B $RAVENNA_HOME -c \"cp $RAVENNA_HOME/src/ParallelBlock.pm $RAVENNA_HOME/src/RavennaConfigFile.pm .; $cmd; cp /scratch/*.cmzasha* $SERVER_HOME; rm -f ParallelBlock.pm RavennaConfigFile.pm\"\n";
	    close OUT;
	    
	    # start the pbs job on the server
	    system(qq{scp $SUBDIR/pbs_ravenna.sh $SERVER:$SERVER_HOME});
	    system(qq{ssh $SERVER 'chmod u+x $SERVER_HOME/pbs_ravenna.sh; $SERVER_HOME/pbs_ravenna.sh'}) && die("RaveNnA finished with an error!\n");
	    
	    sleep 2;

	    # collect the started ravenna process id in an array
	    @qstat = sshcall($SERVER, "qstat -u$sshh[0]");
	    foreach( reverse @qstat ) {
		if( /ravenna/ ) {
		    push @proc, /(\d+)./;
		    last;
		};
	    }

	    #last unless --$dummy;
	}

	# wait until all started RaveNnA processes are finished	
	while(1) {
	    @qstat = sshcall($SERVER, "qstat -u$sshh[0]");

	    @actproc = ();
	    @splice = ();
	    foreach( reverse @qstat ) {
		if( /ravenna/ ) {
		    push @actproc, /(\d+)./;
		    last;
		};
	    }

	    $index=0;
	    $status=0;
	    foreach $proc ( @proc ) {
		foreach ( @actproc ) {
		    if( $_==$proc ) {
			$status=1;
			last;
		    }
		}
		push @splice, $index if $status==0;
		$index++;
		$status=0;
	    }
	    foreach( reverse @splice ) {
		splice @proc, $_, 1;
	    }

	    last if $#proc==-1;

	    sleep 60;
	}
	
	# fetch results (cmzasha.* files) from the server
	system(qq{scp $SERVER:$SERVER_HOME/*.cmzasha* $SUBDIR});
	
	# test if all covariance models are used because some can be lost through the walltime
	foreach( <$SUBDIR/*.cmzasha.csv> ) {
	    last if @params==0;
	    $index = 0;
	    $_ =~ /(RF\d{5})/;
	    while(1) {
		if( $params[$index] =~ /$1/ ) {
		    splice @params, $index, 1;	
		    last;
		}
		else {
		    $index++;
		    last if $index==@params;
		}
	    }
	}
	# double the walltime
	if( @params > 0 ) {
	    ($hours,$min) = $WALLT =~ /walltime=(\d+):(\d+)/;
	    $min=2*($hours*60+$min);
	    $hours=int($min/60);
	    $min=$min % 60;
	    $WALLT =~ s/-l walltime=\d+:\d+/-l walltime=$hours:$min/;
	    $index = @params;
	    print "$index left covariance models will be tested again with a increased walltime of \'$WALLT\'.\n";
	} 
	
    }

    # clean the server
    #system(qq{ssh $SERVER "rm -rf $SERVER_HOME"}) && die("Can't clean the server from RaveNnA!\n");
} 


#################################################
# main

# get options
help() if @ARGV<1;

$file = shift @ARGV;
@models = @ARGV==0 ? ( "ALL" ) : @ARGV;

print "Query Fasta File:\t $file\n";
print "Rfam Models:\t\t @models\n";
run_ravenna($file, $RFAMMODELS, $PBSSUBMIT, $RFAMSEED, $RAVENNA_HOME, $SUBDIR, \@models);


