#!/usr/bin/perl


package align2cleanedRfam;


use FindBin;
use lib "$FindBin::Bin/lib/";
use utils;
use strict;
use warnings;
use Data::Dumper;



## Package to align the full Rfam sequence to the sedd alignnment
## Input:
## written: 09/03/2012 - corinna



sub alignFullSeq2CleanedSeedAli {

	my ($fam, $infoHash, $strucTabRef) = @_;
	my $tmpDir = $$infoHash{"instTmpDir"};
	my $cmDir = $$infoHash{"cmDir"};
	my $cleanedAliDir = $$infoHash{"cleanedAliDir"};
	my $rfamAliDir = $$infoHash{"rfamAliDir"};
	my $classFiZip = $$infoHash{"tmpClassFile"};
	my $stk2aln = $$infoHash{"stk2alnTool"};
	my $cmalignDir = $$infoHash{"cmalign"};
	my $rfamFullGappedDir = $$infoHash{"rfamFullGappedDir"};
	my $rfamEmblStart = $$strucTabRef{"rfamEmblSeqPosStart"};
	my $rfamEmblStop = $$strucTabRef{"rfamEmblSeqPosStop"};
    	my $org = $$strucTabRef{"org"};
    	my $ungapSeq = $$strucTabRef{"fullUngapSeq"};
	my $cmalignAliName = $$infoHash{"cmalignAliName"};
	
	print STDOUT "\#\# Align full sequence to seed alignment\n";
	
	my $faout = $tmpDir.$fam."_new.fas";
	open(FAS, ">$faout") or die "\nCan't open $faout in align2cleanedRfam::line44\n\n";
	print FAS "> full_".$org."_".$rfamEmblStart."-".$rfamEmblStop."\n";
	print FAS $ungapSeq."\n";
	close(FAS);
	my $newAliPath = cmalign($fam, $faout, $tmpDir, $cmalignDir, $cmDir, $cleanedAliDir,$cmalignAliName);
	
	return($newAliPath);
}




sub cmalign {

	my ($fam, $fastaFi, $randout, $cmalignDir, $cmDir, $cleanedAliDir, $cmalignAliName) = @_;
	my $seedAliSeq = "";
	my $header = "";
	my $newseq = "";
	my $newAli = "";
	my $newalistk = "";
	
	### cp covariance model to /tmp
	opendir(CM, $cmDir) or die "Can't find $cmDir in align2cleanedRfam::line66\n\n";
	my @cmArr = readdir(CM);
	close(CM);
	my @cmFiArr = grep (/$fam/,@cmArr);
	my $coMoZip = $cmFiArr[0];
	my $coMo = $coMoZip;
	$coMo =~ s/\.gz//g;
	if((-e $cmDir.$coMoZip) && (!-e $randout.$coMo) && (!-e $randout.$coMoZip)){
		`cp $cmDir$coMoZip $randout`;
		`gunzip -d $randout/$coMoZip`;
	}
		
	### cp cleaned rfam alignment to /tmp
	opendir(CA, $cleanedAliDir) or die "Can't find $cleanedAliDir in align2cleanedRfam::line79\n\n";
	my @caArr = readdir(CA);
	close(CA);
	my @caFiArr = grep (/$fam/,@caArr);
	my $cleanedRfamAliZip = $caFiArr[0];
	my $cleanedRfamAli = $cleanedRfamAliZip;
	$cleanedRfamAli =~ s/\.gz//g;
	if((-e $cleanedAliDir.$cleanedRfamAliZip) && (!-e $randout.$cleanedRfamAli) && (!-e $randout.$cleanedRfamAli)){
		`cp $cleanedAliDir$cleanedRfamAliZip $randout`;
		`gunzip -d $randout/$cleanedRfamAliZip`;
	}
	$header = getHeader($randout.$cleanedRfamAli);
	$newAli .= $header."\n\n";	
	
	### run cmalign
	if((-e $randout.$coMo) && (-e $randout.$cleanedRfamAli) && (-e $fastaFi)){
		
		my $ali = `'$cmalignDir' --withali $randout$cleanedRfamAli $randout$coMo $fastaFi`;
		
		my @aliArr = split(/\n/, $ali);
		
		foreach my $aliLine(@aliArr){
			
			if(($aliLine =~ /^\#[\s\t]+/) || (length($aliLine) < 5) || ($aliLine =~ /^\s/) || ($aliLine =~ /^\#=GF/)){}
			else{
				chomp($aliLine);
				if($aliLine =~ /^\#=GC[\s\t]+RF/){
					$newAli .= $aliLine."\n\n";
				}
				else {
					$newAli .= $aliLine."\n";
				}
			}
		}
		chomp($newAli);
		$newAli .= "//\n\n";
		my $alignedFullSeq = "";
		$newalistk = $randout.$fam.$cmalignAliName;
		open(NA, ">$newalistk") or die "\nCan't open $newalistk  in align2cleanedRfam::line177\n\n";
		print NA $newAli;
		close(NA);
	}
	return($newalistk);
}




sub getHeader {

	my $tmpcleanedRfamAli = shift;
	my $head = "";
	open(AI, "<".$tmpcleanedRfamAli) or die "\nCan't open $tmpcleanedRfamAli in align2cleanedRfam\n\n";
	while(<AI>){
		next unless $_ =~ /^\#/;
		if(($_ !~ /^\#\=GC/) && ($_ !~ /^\/\//)){
			if($_ =~ /.+STOCKHOLM.*/g){
				$head .= $_;
				$head .= "\n\n";
			}
			else{ $head .= $_; }
		}
	}
	close(AI);
	return($head);
}



1;



