#!/usr/bin/perl



use FindBin; # locate this script
use lib "$FindBin::Bin/metaRNAmodules_mapping/";
use lib "$FindBin::Bin/metaRNAmodules_model/";  # use the parent directory
use lib "$FindBin::Bin/RNAmodules/";
use mainMapping;
use utils;
use Cwd qw(abs_path);
use strict;
use Getopt::Long;
use warnings;
use Data::Dumper;




## Set Environment variables
my $infernalPathi = $ENV{'INFERNALPATH'};
my $cmalign;
if(defined $infernalPathi){
	$cmalign = $infernalPathi."/cmalign";
	if(! -e $cmalign){
		print "Could not find the executable $cmalign\n\n";
		exit;
	}
}
else{
	print "Could not find CMALIGN. Please set the Environment variable: INFERNALPATH\n";
	exit;
}

my $clustalPathi = $ENV{'CLUSTALWPATH'};
my $clustalw;
if(defined $clustalPathi){
	$clustalw = $clustalPathi."/clustalw";
	if(! -e $clustalw){
		print "Could not find the executable clustalw2 at $clustalPathi/clustalw\n\n";
		exit;
	}
}
else{
	print "Could not find CLUSTALW. Please set the Environment variable: CLUSTALWPATH\n";
	exit;
}


my $rmDirName = $ENV{'RMDETECTPATH'};
my $rmdetect;
my $rmcluster;
my $rmbuild;
if(defined $rmDirName){
	$rmdetect = $rmDirName."/rmdetect.py";
	if(! -e $rmdetect){
		print "Could not find the executable $rmdetect\n\n";
		exit;
	}
	$rmcluster = $rmDirName."/rmcluster.py";
	if(! -e $rmcluster){
		print "Could not find the executable $rmcluster\n\n";
		exit;
	}
	$rmbuild = $rmDirName."/rmbuild.py";
	if(! -e $rmbuild){
		print "Could not find the executable $rmbuild\n\n";
		exit;
	}
}
else{
	print "Could not find RMDETECT. Please set the Environment variable: RMDETECTPATH\n";
	exit;
}



my $multipermPath = $ENV{'MULTIPERMPATH'};
my $multiperm;
if(defined $multipermPath){
	$multiperm = $multipermPath."/multiperm";
	if(! -e $multiperm){
		print "Could not find the executable $multiperm\n\n";
		exit;
	}
}
else{
	print "Could not find MULTIPERM. Please set the Environment variable: MULTIPERMPATH\n";
	exit;
}




## global variables
my $absPath = "";
my $scriptPath;
my $metaRNAmodulesVersion = "metaRNAmodules-0.1.1.0";
my $metaRNAmodulesV;
my $tmpModuleOutDirName = "Tmp_Modules/";
my $tmpModuleOutDir = "";
my $tmpModuleName = "moduleClass_";
my $tmpModuleNameSuffix = ".mod";
my $modFileSep = "Instances of motif:";
my $prog = "metaRNAmodules";
my $fredExamples_small = "FR3D_examples_small/";
my $fredExamples_all = "FR3D_examples_all/";
my $putativeModulesFileName_small = "PutativeModules_small.txt";
my $putativeModulesFileName_all = "PutativeModules_all.txt";
my $putativeModulesFileName = "PutativeModules.txt";
my $putModPath = "$FindBin::Bin/RNAmodules/";
my $outdirSuffix = "_Out/";
my $rnamodulesLibName = "RNAmodules/";
my $rnamodulesLib;
my $mappingLibName = "metaRNAmodules_mapping/";
my $mappingLib;
my $modelLibName = "metaRNAmodules_models/";
my $modelLib;
my $cleanedRfamAliDirName = "Rfam_clean/";
my $cleanedRfamAliDir;
my $cleanedSeedAliFiZipName = "Rfam.seed.rr95.90.30.gz";
my $structureTableDir;
my $structureTableName = "_strucTab.txt.gz";
my $fr3dpath;
my $fr3dFiName = "_basepairs_FR3D.txt.gz";
my $getKeywordsScriptName = "getKeywords.sh";
my $rfamFullGappedDir;
my $rfamFullGappedDirName = "RFAMfull10.1_STOCKHOLM/";
my $tmpDir = "/tmp/";
my $keyFileName = "rfam_keywords.txt.gz";
my $modCleanRfamAliName = "_mod_clean.stk";
my $modCleanRefseqRfamAliName = "_mod_clean_refseq.stk";
my $cmalignAliName = ".seed.10.1.cleaned_mod_full.sto";
my $modAliName = "_mod.stk";
my $cmDir;
my $cmDirName = "RFAMseed10.1CovarianceModels/";
my $cleanedAliDir;
my $cleanedAliDirName = "RFAMseed10.1CleanedAlignments/";
my $rfamAliDir;
my $rfamAliDirName = "RFAMseed10.1_STOCKHOLM/";
my $stk2alnToolName = "stk2aln";
my $libName = "lib/";
my $statsName = "stats";
my $uniqueModuleNameSuffix = ".module";
my $help;
my $putativeModulesFile;
my $outdir;
my $rootPath;
my $rootPathPrefix = "./";
my %infoHash;
my $modelVersionName = "1";
my $modelNameName = "MYMODEL_fr3d";
my $modelName;
my $modelVersion;
my $ali2scan;
my $ali2scanName = "RFAMseed10.1_STOCKHOLM/RF00015.seed.sto.gz";
my $rmdoutName = "test.rmdout";
my $rmcoutName = "test.summary.rmcout";
my $clusterDirName = "clusters_test/";
my $clusterSummary = "cluster_test.summary";
my $rmdoutNameS = "shuffled.rmdout";
my $rmcoutNameS = "shuffled.summary.rmcout";
my $clusterDirNameS = "clusters_shuffled/";
my $clusterSummaryS = "cluster_shuffled.summary";
my $shuffleDir = "ShuffledAlignments/";
my $exam;
my $exam_small = 1;
my $exam_all = 2;
my $shuffNum = "25";
my $putModTool;
my $putModToolDef = "n";
my $y;

my $helptext = "\nusage:\nperl metaRNAmodules [options]\n\noptions:\n\n-o, --outdir\t\tDirectory containing the output of metaRNAmodules [default: ./".$prog."_Out]\n-n, --name\t\tName of the new model [default: ".$modelNameName."]\n-x, --x\t\t\tVersion number of the new model [default: ".$modelVersionName.".0]\n-a, --ali2scan\t\tFile containing an alignment [mandatory: STOCKHOLM format]\n-p, --putativeModule\tRun PutativeModule y/n [default: n]\n-e, --exam\t\tUse example [1=small, 2=all, default=1]\n-h, --help\t\tPrint help file and exit\n-V, --version\t\tPrint version and exit\n\n";


##############
## parse options

GetOptions ('outdir=s' => \$outdir, 'name=s' => \$modelName, 'x:f' => \$modelVersion, 'ali2scan=s' => \$ali2scan, 'putativeModule=s' => \$putModTool, 'exam=i' => \$exam, 'help' => sub { HelpMessage() }, 'version' => sub { showVersion() }, 'y=i' => \$y) or die "$helptext";




## get super directory of given outdir
if(! defined $rootPath){
	$scriptPath = abs_path($0);
	getRootPath();
}
chdir $rootPath;

######################
## set default options

if(! defined $outdir){
	$outdir = $rootPath.$prog.$outdirSuffix;
}
else{
	$outdir = $rootPath.$outdir."/";
}
if(! defined $cleanedRfamAliDir){
	$cleanedRfamAliDir = $rootPath.$cleanedRfamAliDirName.$cleanedSeedAliFiZipName;
}
if(! defined $rfamFullGappedDir){
	$rfamFullGappedDir = $rootPath.$rfamFullGappedDirName;
}
if(! defined $modelName){
	$modelName = $modelNameName;
}
if(! defined $modelVersion){
	$modelVersion = $modelVersionName;
}
if(! defined $ali2scan){
	$ali2scan = $rootPath.$ali2scanName;
}
else{
	checkFormat($ali2scan);
}
if(! defined $putModTool){
	$putModTool = $putModToolDef;
}
elsif($putModTool ne "n" && $putModTool ne "y"){
		print "\nPlease check option -p, --putativModule [y, n]\n\n";
		exit(0);
}
if(! defined $exam){
	$exam = $exam_small;
}
elsif($exam != 1 && $exam != 2){
		print "\nPlease check option -e, --exam [1, 2]\n\n";
		exit(0);
}


## set file names either to example small or all
if($putModTool eq "n" && $exam == 1){
	$putativeModulesFile = $rootPath.$putativeModulesFileName_small;
	$fr3dpath = $rootPath.$fredExamples_small;
}
elsif($putModTool eq "n" && $exam == 2){
	$putativeModulesFile = $rootPath.$putativeModulesFileName_all;
	$fr3dpath = $rootPath.$fredExamples_all;
}
elsif($putModTool eq "y" && $exam == 1){
	$putativeModulesFile = $rootPath.$putativeModulesFileName;
	$fr3dpath = $rootPath.$fredExamples_small;
}
elsif($putModTool eq "y" && $exam == 2){
	$putativeModulesFile = $rootPath.$putativeModulesFileName;
	$fr3dpath = $rootPath.$fredExamples_all;
}


## set path names for libraries
$modelLib = $rootPath.$modelLibName;
$rnamodulesLib = $rootPath.$rnamodulesLibName;
$mappingLib = $rootPath.$mappingLibName;
$tmpModuleOutDir = $outdir.$tmpModuleOutDirName;
my $keywordsScript = $mappingLib.$getKeywordsScriptName;
my $keyFile = $rootPath.$cleanedRfamAliDirName.$keyFileName;
my $stk2alnTool = $rootPath.$libName.$stk2alnToolName;
my $stats = $rootPath.$libName.$statsName;
$cmDir = $rootPath.$cmDirName;
$cleanedAliDir = $rootPath.$cleanedAliDirName;
$rfamAliDir = $rootPath.$rfamAliDirName;
$structureTableDir = $rootPath.$cleanedRfamAliDirName;

## fill hash with infos
$infoHash{"outdir"} = $outdir;
$infoHash{"fr3dpath"} = $fr3dpath;
$infoHash{"putativeModulesFile"} = $putativeModulesFile.".gz";
$infoHash{"rootPath"} = $rootPath;
$infoHash{"progName"} = $prog;
$infoHash{"tmpModuleOutDir"} = $tmpModuleOutDir;
$infoHash{"tmpModuleName"} = $tmpModuleName;
$infoHash{"tmpModuleNameSuffix"} = $tmpModuleNameSuffix;
$infoHash{"version"} = $modelVersion;
$infoHash{"modelname"} = $modelName;
$infoHash{"modFileSep"} = $modFileSep ;
$infoHash{"modAliName"} = $modAliName;
$infoHash{"keywordScript"} = $keywordsScript;
$infoHash{"keywordFile"} = $keyFile;
$infoHash{"cleanedRfamAliZipFi"} = $cleanedRfamAliDir;
$infoHash{"strucTabDir"} = $structureTableDir;
$infoHash{"strucTabName"} = $structureTableName;
$infoHash{"tmp"} = $tmpDir;
$infoHash{"rfamFullGappedDir"} = $rfamFullGappedDir;
$infoHash{"fr3dFiName"} = $fr3dFiName;
$infoHash{"uniqueModuleNameSuffix"} = $uniqueModuleNameSuffix;
$infoHash{"cmDir"} = $cmDir;
$infoHash{"cleanedAliDir"} = $cleanedAliDir;
$infoHash{"rfamAliDir"} = $rfamAliDir;
$infoHash{"stk2alnTool"} = $stk2alnTool;
$infoHash{"cmalign"} = $cmalign;
$infoHash{"cmalignAliName"} = $cmalignAliName;
$infoHash{"clustalw"} = $clustalw;
$infoHash{"modCleanRfamAli"} = $modCleanRfamAliName;
$infoHash{"modCleanRefseqRfamAli"} = $modCleanRefseqRfamAliName;
$infoHash{"rmbuild"} = $rmbuild;
$infoHash{"rmdetect"} = $rmdetect;
$infoHash{"rmcluster"} = $rmcluster;
$infoHash{"ali2scan"} = $ali2scan;
$infoHash{"rmdoutName"} = $rmdoutName;
$infoHash{"rmcoutName"} = $rmcoutName;
$infoHash{"clusterDirName"} = $clusterDirName;
$infoHash{"clusterSummary"} = $clusterSummary;
$infoHash{"rmdoutNameS"} = $rmdoutNameS;
$infoHash{"rmcoutNameS"} = $rmcoutNameS;
$infoHash{"clusterDirNameS"} = $clusterDirNameS;
$infoHash{"clusterSummaryS"} = $clusterSummaryS;
$infoHash{"shuffleDir"} = $shuffleDir;
$infoHash{"multiperm"} = $multiperm;
$infoHash{"shuffNum"} = $shuffNum;
$infoHash{"stats"} = $stats;
$infoHash{"y"} = $y;


###########################################################################################
## check if rootPath is writable (to create output directory and module file), if yes start scanning and mapping
if(-w $rootPath){
	
	## create output directory if not exist, remove existing one. Set as writable
	if(!-d $outdir){ `mkdir $outdir`; `chmod a+w $outdir`; }
	else{ `rm -rf $outdir`; `mkdir $outdir`; `chmod a+w $outdir`; }
	
	my $putativeModulesFileZipped = $putativeModulesFile.".gz";
	if($putModTool eq "y"){
		#################
		## print information
		print STDOUT "\n\n\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\n";
		print STDOUT "\n\#\# Start scanning FR3D files....\n";
		print STDOUT "\#\# FR3D path: $fr3dpath\n";
		print STDOUT "\#\# FR3D results: $putativeModulesFile\n";
		
		unzipDirFiles($fr3dpath);
		
		######################
		## run FR3D scan
		print STDOUT "\#\# Scanning FR3D\n";
		print STDOUT "\#\# ...this may take some minutes...please wait...\n\n";
		`$putModPath"/PutativeModules" $fr3dpath > $putativeModulesFile`;	
		
		######################
		## zip putativeModuleFile
		
		if(-e $putativeModulesFile){
			if(! -e $putativeModulesFileZipped){
				`gzip -9 $putativeModulesFile`;
			}
			elsif(-e $putativeModulesFileZipped){
				`rm -f $putativeModulesFileZipped`;
				`gzip -9 $putativeModulesFileZipped`;
			}
		}
		zipDirFiles($fr3dpath);
	}
	
	####################################################
	## start mapping only if size of putativeModulesFileZipped  > 0
	if(-e $putativeModulesFileZipped){
		
		my $size = (stat($putativeModulesFileZipped))[7] || die "stat($putativeModulesFileZipped): $!\n";
		if($size > 0){
			
			###############
			## start mapping
			&mainMapping::map(\%infoHash);
		}
		else{ print "$putativeModulesFile is empty\n\n"; }
	}
	else{ print STDOUT "\nNo Putative Module file found! Please check if ./PutativeModules_all.txt.gz and ./PutativeModules_small.txt.gz exist!"; }
}
else{ print STDOUT "\nPath not writable: $rootPath ! Can't create output directory!\n\n"; }




#################
## local subroutines

sub unzipDirFiles{
	
	my ($fr3dpath) = @_;
	opendir(FR, $fr3dpath) or die "\nCan't open $fr3dpath\n\n";
	my @fr3dArr = readdir(FR);
	close(FR);
	chdir $fr3dpath;
	foreach my $fred(@fr3dArr){
		if($fred !~ /^\.+/ ){
			if($fred =~ /\.gz$/){ `gzip -d $fred`; }
		}
	}
	chdir $rootPath;
}

sub zipDirFiles{
	
	my ($fr3dpath) = @_;
	opendir(FR, $fr3dpath) or die "\nCan't open $fr3dpath\n\n";
	my @fr3dArr = readdir(FR);
	close(FR);
	chdir $fr3dpath;
	foreach my $fred(@fr3dArr){
		if($fred !~ /^\.+/ ){
			if($fred !~ /\.gz$/){ `gzip -9 $fred`; }
		}
	}
	chdir $rootPath;
}


sub HelpMessage {
	
	print $helptext;
	exit(0);
}

sub showVersion {
	
	print $metaRNAmodulesVersion."\n\n";
	exit(0);
}

sub getRootPath {
	
	my @path = split(/\//, $scriptPath);
	for(my $x = 0; $x < $#path; $x++){
		$rootPath .= $path[$x]."/";
	}
}


sub checkFormat {

	my ($ali) = @_;
	if($ali !~ /.*\.sto/g && $ali !~ /.*\.stk/g && $ali !~ /.*\.sto\.gz/g && $ali !~ /.*\.stk\.gz/g){
		print "\nPlease check format of alignment!\nmetaRNAmodules requires STOCKHOLM format.\nSuffix of file should be .sto or .stk\n\n";
	exit;
	}
}
