#!/usr/bin/perl


package mainMapping;


use FindBin;                 # locate this script
use lib "$FindBin::Bin/metaRNAmodules_mapping/";
use lib "$FindBin::Bin/metaRNAmodules_model/";
use lib "$FindBin::Bin/lib/";  # use the parent directory
use pipeline;
use utils;
use strict;
use Getopt::Long;
use warnings;
use Data::Dumper;


##############################################################################
## PACKAGE for mapping the FR3D instances onto Rfam, start the metaRNAmodules pipeline
## INPUT: a hash reference with path and name details and the putativeModuleFile 



sub map {
	
	my ($infoHash) = @_;
	my $moFi = $$infoHash{"putativeModulesFile"};
	my $outdir = $$infoHash{"outdir"};
	my $tmpModuleOutDir = $$infoHash{"tmpModuleOutDir"};
	my $tmpModuleNameSuffix = $$infoHash{"tmpModuleNameSuffix"};
	my $tmpModuleName = $$infoHash{"tmpModuleName"};
	my $tmpi = $$infoHash{"tmp"};
	my $tmpModuDir = utils::getTemporaryDirectory($$infoHash{"tmp"});
	
	#################
	## print information
	print STDOUT "\n\#\# Start mapping FR3D output....\n";
	print STDOUT "\#\# Input: $moFi\n";
	print STDOUT "\#\# Output: $outdir\n";
	
	
	## split putativeModulesFile according to secondary structure classes, save as temporary files
	if(!-d $tmpModuleOutDir && -e $moFi){
		createTmpModuleFiles($infoHash, $tmpModuDir);
	}
	else{ `rm -rf $tmpModuleOutDir`; createTmpModuleFiles($infoHash, $tmpModuDir); }
	
	## process each class file
	opendir(CL, $tmpModuleOutDir) or die "\nCan't open $tmpModuleOutDir in mainMapping\n\n";
	my @modArr = readdir(CL);
	close(CL);
	
	foreach my $mod(@modArr){
		if($mod =~ /$tmpModuleName(\d+)$tmpModuleNameSuffix\.gz/){
			my $class = $1;
			
			## open putativeModulesFile, process each module instance
			my $classFile = $tmpModuleOutDir.$mod;
			
			my $unzippClassFilePath = $classFile;
			my @pathArr = split(/\//,$unzippClassFilePath);
			my $unzippClassFile = pop(@pathArr);
			$unzippClassFile =~ s/\.gz//g;
			my $tmpDir = utils::getTemporaryDirectory($$infoHash{"tmp"});
			my $tmpZipClassFile = $unzippClassFile.".gz";
			
			if(!-e $tmpDir.$unzippClassFile && !-e $tmpDir.$classFile){
				`cp $classFile $tmpDir`;
				`gzip -d $tmpDir$tmpZipClassFile`;
			}
			$$infoHash{"tmpClassFile"} = $tmpDir.$unzippClassFile;
			open(MO, "<".$tmpDir.$unzippClassFile) or die "\nCan't open $tmpDir$unzippClassFile in mainMapping\n\n";
			while(<MO>){
				## correct instance format:       Instance: 2OZB F    9 F   26 / F   15 F   23
				next unless($_ =~ /^Instance\:[\s\t]+([\w\d]{4})[\s\t]+([\w\d]+[\s\t]+\d+[\s\t]+[\w\d]+[\s\t]+[\d]+[\s\t]+\/[\s\t]+[\w\d]+[\s\t]+\d+[\s\t]+[\w\d]+[\s\t]+\d+.*)/); 
				chomp($_);
				my $pdb = lc($1);
				my $instance = $1." ".$2;
				my $instTmpDir = utils::getTemporaryDirectory($$infoHash{"tmp"});
				$$infoHash{"instTmpDir"} = $instTmpDir;
				#################
				## print information
				print STDOUT "\n\n\n\#\# ".$_."\n";
				
				#############
				## start pipeline
				pipeline::startPipeline($infoHash, $instance, $class);
				`rm -rf $instTmpDir`;
			}
			close(MO);
			#if(-d $tmpDir){ `rm -rf $tmpDir`; }
		}
	}
	utils::cleanTmp($tmpi);
}




sub createTmpModuleFiles {
	
	my ($infoHash, $tmpModuDir) = @_;
	my $moFi = $$infoHash{"putativeModulesFile"};
	my $outdir = $$infoHash{"outdir"};
	my $modFileSep = $$infoHash{"modFileSep"};
	my $tmpModuleOutDir = $$infoHash{"tmpModuleOutDir"};
	my $tmpModuleName = $$infoHash{"tmpModuleName"};
	my $tmpModuleNameSuffix = $$infoHash{"tmpModuleNameSuffix"};
	my $moduleCount = 0;
	my $unzippedMofi = $moFi;
	$unzippedMofi =~ s/\.gz//g;
	
	###########################
	## unzip putativeModulesFile
	my @mofiArr = split(/\//,$moFi);
	my $mofiNa = pop(@mofiArr);
	my $tmpModuFiZi = $tmpModuDir.$mofiNa;
	my $tmpModuFi = $tmpModuFiZi;
	$tmpModuFi =~ s/\.gz//g;
	if(! -d $tmpModuDir){ `mkdir $tmpModuDir`; }
	if(-e $moFi && ! -e $tmpModuFiZi){
		`cp $moFi $tmpModuDir`;
		if($tmpModuFiZi =~ /\.gz$/ && ! -e $tmpModuFi){
			`gzip -d $tmpModuFiZi`;
		}
	}

	#if($moFi =~ /\.gz$/ && -e $moFi && ! -e $unzippedMofi ){
	#	`gzip -d $moFi`;
	#}

	## create TMP directory
	if(! -d $tmpModuleOutDir){ `mkdir $tmpModuleOutDir`; }
	
	## set file separator
	$/ = $modFileSep;
	
	##################################################
	## open putativeModueFile and split according to sec. struc. classes
	## store each class file in the tmp directory

	open(MO,"<".$tmpModuFi) or die "\nCan't open $tmpModuFi\n\n";
	while(<MO>){
		chomp($_);
		
		## split one class and do not save first line (which is the number of instances)
		my @filArr = split(/\n/, $_);
		if(scalar @filArr > 0){
			if($filArr[1] =~ /\d+/){
				
				$moduleCount++;
				## open temporary file and store class
				open(OU, ">".$tmpModuleOutDir.$tmpModuleName.$moduleCount.$tmpModuleNameSuffix) or die;
				for(my $x = 1; $x < scalar @filArr; $x++){
					print OU $filArr[$x]."\n";
				}
				print OU "\n";
				close(OU);
				my $name = $tmpModuleOutDir.$tmpModuleName.$moduleCount.$tmpModuleNameSuffix;
				system("gzip -9 $name");
			}
		}
	}
	close(MO);
	
	## set file separator to default
	$/ = "\n";
	## zip putativeModulesFile again
	#if(!-e $moFi){
	#	`gzip -9 $unzippedMofi`;
		#`rm -f $unzippedMofi`;
	#}
	`rm -rf $tmpModuDir`;
}


1;
