#!/bin/bash

# script to extract maf, and run rnabound together with PETfold and RNAbound

# input parameters
chr=$1
wstart=$2
wend=$3
strand=$4
winsize=$5
flanking=$6
bpthr=$7

path=$PWD

# load internal modules
module load hg # load module for mafsInRegion script

# path for the local PETfold and RNAfold 
export PETFOLDBIN=/home/projects/rth/rnabound/multiz_mapping/scripts/PETfold/bin/ # set path for the PETfold dependencies

PETFOLDPATH=/home/projects/rth/rnabound/multiz_mapping/scripts/PETfold/bin/
RNAFOLDPATH=/home/users/sabari/miniconda3/envs/viennarna241/bin/

filename=$(mktemp) # create a temporary filename
fastafile=$(mktemp) 

## PETfold analysis 
## ================
# output folder
outdir=$path/results/petfold_rnabound_window_${winsize}_${flanking}_${bpthr}
if [ ! -d $outdir ];then mkdir $outdir;fi

# extract maf sequence
echo -e "$chr\t$wstart\t$wend"
mafsInRegion <(echo -e "$chr\t$wstart\t$wend") stdout $path/dataset/alignment/out_${winsize}_nogaps_0.75.maf 2>/dev/null >$filename

# run PETfold on the extracted alignment
# prepare fasta sequence. If there are more than one maf for a region, then select the one with maximum length
python scripts/selected_one_maf.py $filename $strand >$fastafile

# run petfold
ofilename="${chr}_${wstart}_${wend}"
$PETFOLDPATH/PETfold -f $fastafile -r $outdir/${ofilename}_pp.txt >/dev/null

perl $path/scripts/rnabound_benchmark.pl --relibmat $outdir/${ofilename}_pp.txt --flanking $flanking --pnull $bpthr | gzip >/tmp/${ofilename}_segments.txt.gz
# mv outputfile from tmp
mv /tmp/${ofilename}_segments.txt.gz $outdir/
cp $fastafile $outdir/${ofilename}.fasta

# run single sequence analysis using RNAfold
# ==========================================
# output folder
outdir=$path/results/rnafold_rnabound_window_${winsize}_${flanking}_${bpthr}
if [ ! -d $outdir ];then mkdir $outdir;fi

ofilename="${chr}_${wstart}_${wend}"
tmpfold=$(mktemp -d)
# extract only human sequence
grep -A1 "hg38" $fastafile | grep -v ">" | sed 's/\-//g' >$tmpfold/${ofilename}_seq.txt

cd $tmpfold/
$RNAFOLDPATH/RNAfold -p < ${ofilename}_seq.txt >out.txt
mv dot.ps $outdir/${ofilename}.ps
cd $path

perl $path/scripts/rnabound_benchmark.pl --dotfile $outdir/${ofilename}.ps --flanking $flanking --pnull $bpthr | gzip >/tmp/${ofilename}_segments.txt.gz
# mv outputfile from tmp
mv /tmp/${ofilename}_segments.txt.gz $outdir/

rm -rf $filename $fastafile
rm -rf $tmpfold/*
rm -rf $tmpfold
