#!/usr/bin/perl


package getRfamSeqInfo;



use lib "$FindBin::Bin/lib/";  # use the parent directory
use utils;
use strict;
use warnings;
use Data::Dumper;




sub getRfamEmblInfo {
		
	my ($strucEntryHashRef, $classFiZip, $rfamFullGappedDir, $tmpDir) = @_;
	
	print STDOUT "\#\# Get Rfam sequence infos\n";
	my $classFiUz = $classFiZip;
	$classFiUz =~ s/\.gz//g;
	if(-e $classFiZip && !-e $classFiUz){ `gzip -d $classFiZip`; }
			
	my $org = $$strucEntryHashRef{"org"};
	my $chain = $$strucEntryHashRef{"pdbRfamChain"};
	my $pdbEmblStart = $$strucEntryHashRef{"pdbEmblSeqPosStart"};
	my $pdbEmblStop = $$strucEntryHashRef{"pdbEmblSeqPosStop"};
	my $fam = $$strucEntryHashRef{"fam"};
	
	my ($tmpFullStockholmAli, $tmpRandDir) = copyFullRfamAli2Tmp($fam, $tmpDir, $rfamFullGappedDir);
	my $rfamHashRef = getFullUngappedRfamSeqAndEMBLPos($fam, $org, $pdbEmblStart, $pdbEmblStop, $tmpFullStockholmAli);
	return($rfamHashRef);
}





sub copyFullRfamAli2Tmp {
	
	my ($fam, $tmpRandDir, $rfamFullGappedDir, $mof) = @_;
	opendir(FG, $rfamFullGappedDir) or die "\nCan't open $rfamFullGappedDir in getRfamSeqInfo\n\n";
	my @stkArr = readdir(FG);
	close(FG);
	my @famStkArr = grep (/$fam/,@stkArr);
	my $fullStockholmAliZip = $famStkArr[0];
	my $tmpFullStockholmAliZip = $tmpRandDir.$fullStockholmAliZip;
	my $tmpFullStockholmAli = $tmpFullStockholmAliZip;
	$tmpFullStockholmAli =~ s/\.gz//g;
	if(!-e $tmpFullStockholmAli){
		`cp $rfamFullGappedDir$fullStockholmAliZip $tmpRandDir`;
		`gzip -d $tmpFullStockholmAliZip`;
	}
	return($tmpFullStockholmAli, $tmpRandDir);
}







sub getFullUngappedRfamSeqAndEMBLPos {

	my ($fam, $org, $pdbEmblStart, $pdbEmblStop, $tmpFullStockholmAli) = @_;
	my $rfamEmblStart = 0;
	my $rfamEmblStop = 0;
	my $rfamStrand = "";
	my $gapSeq = "";
	
	open(FU,"<".$tmpFullStockholmAli) or die "\nCan't open $tmpFullStockholmAli in getRfamSeqInfo\n\n";
	while(my $fasLine = <FU>){
		
		next unless($fasLine !~ /^\#=/);
		next unless($fasLine =~ /^$org/);
		chomp($fasLine);
		my ($orgFas, $rfamEmblPos_Seq) = split(/\//,$fasLine);
		my ($rfamEmblPos, $seq) = split(/[\t\s]+/, $rfamEmblPos_Seq);
		my ($tmpRfamEmbl1, $tmpRfamEmbl2) = split(/\-/,$rfamEmblPos);
		my $tmpRfamEmblStart = 0;
		my $tmpRfamEmblStop = 0;
		
		if($tmpRfamEmbl1 <= $tmpRfamEmbl2){
			$tmpRfamEmblStart = $tmpRfamEmbl1;
			$tmpRfamEmblStop = $tmpRfamEmbl2;
			$rfamStrand = "+";
		}
		else{
			$tmpRfamEmblStart = $tmpRfamEmbl2;
			$tmpRfamEmblStop = $tmpRfamEmbl1;
			$rfamStrand = "-";
		}
		if(($tmpRfamEmblStart <= $pdbEmblStart) && ($tmpRfamEmblStop >= $pdbEmblStop)){
			$gapSeq .= $seq;
			$rfamEmblStart = $tmpRfamEmblStart;
			$rfamEmblStop = $tmpRfamEmblStop;
		}
	}
	close(FU);
	my %rfamHash;
	if(length($gapSeq) > 0){
		my $ungapSeq = "";
		($ungapSeq = $gapSeq) =~ s/[\.\_\-\,]+//g;
		$ungapSeq = uc($ungapSeq);
		$rfamHash{"fullUngapSeq"} = $ungapSeq;
		$rfamHash{"rfamEmblSeqPosStart"} = $rfamEmblStart;
		$rfamHash{"rfamEmblSeqPosStop"} = $rfamEmblStop;
		$rfamHash{"rfamEmblStrand"} = $rfamStrand;
	}
	return(\%rfamHash);
}


1;
