#!/usr/bin/perl


package models_build;



use FindBin;
use lib "$FindBin::Bin/metaRNAmodules_model/";
use lib "$FindBin::Bin/lib/";
use prepareModelBuilding;
use useRMdetectPackage;
use utils;
use strict;
use warnings;
use Data::Dumper;









sub startBuild {
	
	my ($fam, $moHashRef, $strucTabHashRef, $mofile, $infoHash) = @_;
	my $outDir = $$infoHash{"uniqueOutDir"};
	my $tmp = $$infoHash{"instTmpDir"};
	my $uniqueName = $$moHashRef{"uniqueModuleName"};
	my $uniqueModuleNameSuffix = $$infoHash{"uniqueModuleNameSuffix"};
	
	print STDOUT "\#\# Build FR3D model\n";
	
	my $moduleHashRef = getMotifInfo($mofile, $tmp, $uniqueName,$uniqueModuleNameSuffix);
	my ($defFi, $modelFi)= processMotifs($fam, $moduleHashRef, $outDir, $tmp, $infoHash);
	return($defFi, $modelFi);
}





sub processMotifs {
	
	my ($fam, $moHashRef, $newModelDir, $newTmpRand, $infoHash) = @_;
	
	my $modelName = $$infoHash{"modelname"};
	my $x = $$infoHash{"version"};
	my $stoFiName = $$infoHash{"modCleanRfamAli"};
	my $newAliFiName = $$infoHash{"modCleanRefseqRfamAli"};
	my $pdb = $$moHashRef{"pdb"};
	my $bps = $$moHashRef{"bps"};
	my $upos = $$moHashRef{"upos"};
	my ($ugap1, $ugap2) = split(/\//, $upos);
	my $gpos = $$moHashRef{"gpos"};
	my ($gap1, $gap2) = split(/\//, $gpos);
	my $rmbuild = $$infoHash{"rmbuild"};
	my ($g11, $g12) = split(/\-/, $gap1);
	my ($g21, $g22) = split(/\-/, $gap2);
	
	## set model names
	my $lcModelName = lc($modelName);
	my $version = $x.".0";
	my $name = $modelName."_".$version;
	my $lcName = lc($name);
		
	my $stoFi = $newModelDir.$fam.$stoFiName.".gz";	
	my $tmpStoFi = $newTmpRand.$fam.$stoFiName.".gz";
	
	if(!-e $tmpStoFi && ! -e $newTmpRand.$fam.$stoFiName){
		`cp $stoFi $newTmpRand`;
		`gzip -d $tmpStoFi`;
	}
	elsif(-e $tmpStoFi && ! -e $newTmpRand.$fam.$stoFiName){
		`gzip -d $tmpStoFi`;
	}
	$tmpStoFi =~ s/\.gz//g;
	
	my $newAliFi = $newTmpRand.$fam.$newAliFiName;
		
	## get gapped sequence
	my $origStoFi = $newModelDir.$fam.$stoFiName;
	my $gapSeq = getGappedSeq($tmpStoFi, $newTmpRand);
	
	my $tmpdefFi = $newTmpRand.$modelName."_".$version.".def";
	my $tmpmodelFi = $newTmpRand.$lcModelName."_".$version.".model";
	my $tmpdataFi = $newTmpRand.$modelName."_".$version.".data";
	my $defFi = $newModelDir.$modelName."_".$version.".def";
	my $modelFi = $newModelDir.$lcModelName."_".$version.".model"; 
		
	
	## run preparation of def-files and RMBuild
	if($gapSeq ne ""){
		
		prepareModelBuilding::prepareFiles4Modelling($fam, $newTmpRand, $tmpStoFi, $newAliFi, $g11, $g12, $g21, $g22, $gapSeq, $tmpdefFi, $modelName, $version, $bps);
		
		if(-e $tmpdefFi){
					
			useRMdetectPackage::buildModel($rmbuild, $newTmpRand, $newAliFi, $tmpdefFi, $tmpmodelFi, $name);			
		}
		else{ print "\n\nCould not build model\n"; }
			
		if(-d $newTmpRand){
			copyModels($newTmpRand, $newModelDir, $newAliFi, $tmpdefFi, $tmpmodelFi, $tmpdataFi);
			print STDOUT "\#\# model has been build: ".$modelName."_".$version."\n";
		}
	}
	else{ print "\n\nCould not prepare model building\n"; }
	return($defFi, $modelFi);
}






sub copyModels {
	
	my $newTmpRand = shift;
	my $newModelDir = shift;
	my $newAliFi = shift;
	my $tmpdef = shift;
	my $tmpmodFi = shift;
	my $tmpdataFi = shift;
	`gzip -9 $newAliFi`;
	$newAliFi = $newAliFi.".gz";
	`mv $newAliFi $newModelDir`;
	`mv $tmpdef $newModelDir`;
	`mv $tmpmodFi $newModelDir`;
	`mv $tmpdataFi $newModelDir`;
}






sub getGappedSeq {

	my ($tmpstofi, $tmp) = @_;
	my $seq = "";
	
	if(-e $tmpstofi){	
		open(ALI, "<$tmpstofi") or die "\nCan't open $tmpstofi\n\n";
		while(<ALI>){
			next unless($_ !~ /^\#/);
			next unless($_ !~ /^$/);
			chomp($_);
			if($_ =~ /^full/i){
				my ($id, $se) = split(/[\s\t]+/, $_);
				$seq .= $se;
			}
		}
		close(ALI);
	}
	return($seq);
}





sub getMotifInfo {
		
	my ($mofile, $newTmpRand, $uniqueName,$uniqueModuleNameSuffix) = @_;
	my $fam = "";
	my $class = "";
	my $pdb = "";
	my $org = "";
	my $upos = "";
	my $gpos = "";
	my $of = "F";
	my $moseq = "";
	my $moRep = "";
	my $dobra = "";
	my $bps = "";
	my %moHash;
	
	my $tmpmofi = $newTmpRand.$uniqueName.$uniqueModuleNameSuffix.".gz";
	if(-e $mofile){
		if(! -e $tmpmofi && ! -e $newTmpRand.$uniqueName.$uniqueModuleNameSuffix){
			`cp $mofile $newTmpRand`;
			`gzip -d $tmpmofi`;
		}
		elsif(-e $tmpmofi && ! -e $newTmpRand.$uniqueName.$uniqueModuleNameSuffix){
			`gzip -d $tmpmofi`;
		}
	}
	$tmpmofi =~ s/\.gz//g;
	
	open(FI, "<".$tmpmofi) or die "\nCan't open $tmpmofi\n\n";
	while(<FI>){
		chomp($_);
		
		if($_=~/\*+/g){
			$of = "T";
		}
		elsif($_=~/^PDB\:[\s\t]+(\d{1}[\w\d]{3})/){
			$pdb = $1;
			$moHash{"pdb"} = $pdb;
		}
		elsif($_=~/^RFAM\:[\s\t]+(RF0\d+)/){
			$fam = $1;
			$moHash{"fam"} = $fam;
		}
		elsif($_=~/^CLASS\:[\s\t]+(\d+)/){
			$class = $1;
			$moHash{"class"} = $class;
		}
		elsif($_ =~ /^ORG\:[\s\t]+([\w\d\.\-\/]+)/){
			$org = $1;
			$moHash{"org"} = $org;
		}
		elsif($_=~/^Ungapped seed alignment position:[\s\t]+([\d\-\/]+)/){
			$upos = $1;
			$moHash{"upos"} = $upos; 
		}
		elsif($_=~/^Gapped seed alignment position:[\s\t]+([\d\-\/]+)/){
			$gpos = $1;
			$moHash{"gpos"} = $gpos; 
		}
		elsif($_=~/^Putative Module\:[\s\t]+([\w\&]+)/g){
			$moseq = $1;
			$moHash{"moseq"} = $moseq;	
		}
		elsif($_=~/^Module representation\:[\s\t]+(\<[\w\d\:\,]+\>)/g){
			$moRep = $1;
			$moHash{"morep"} = $moRep;
		}
		elsif($_=~/^Dot-Bracket notation\:[\s\t]+([\(\)\.\<\>\{\}]+[\(\)\.\<\>\{\}\d]*)/g){
			$dobra = $1;
			$moHash{"dobra"} = $dobra;
		}
		elsif($_=~/^BP[\s\t]+\d+\:[\s\t]+(\d+)[\s\t]+(\d+)[\s\t]+[\(\)A-Za-z]+/){
			$bps .= $2;
			$bps .= "*";
			$bps .= $1;
			$bps .= "+";
		}
	}
	close(FI);
	chop($bps);
	$moHash{"bps"} = $bps;
	#`rm -rf $newTmpRand`;
	return(\%moHash);
}


1;



