#!/usr/bin/perl -w
##########################################################################################################################
# A perl program to visualize deepBlockAlign results
# Usage: perl plotClusterAlign.pl -q <query id> -s <subject id> -c <query file> -d <subject file>
# Output: a pdf file with block group and block alignments.
# Example: perl plotClusterAlign.pl -q cluster_186 -s cluster_174 -c GSM450601.clusters -d GSM450601.clusters
##########################################################################################################################

use strict;
use warnings;

use GD::Simple;
use Getopt::Long;

############################################################################################################
#Parse input options
use vars qw($queid $subid $quefile $subfile $help);

GetOptions ("q=s"  => \$queid,
	    "s=s"  => \$subid,
	    "c=s"  => \$quefile,
	    "d=s"  => \$subfile,
	    "help" => \$help,
	    "h"    => \$help);

usage() if($help || !$queid || !$subid || !$quefile || !$subfile);

############################################################################################################
#Retrieve query and subject clusters and put them in query.cluster and subject.cluster files
my @temp=(); my $i=(); my $queSize=(); my $subSize=(); my $queGenomicStart=(); my $subGenomicStart=();
my $queChr=(); my $subChr=(); my $queType=(); my $subType=();

chomp($queid);
@temp = split(/\|/, $queid);
my $query_id = "$temp[0]";
my $query_ncRNAid = "$temp[1]" if(defined($temp[1]));
my @query_coor=();

@temp = split(/\|/, $subid);
my $subject_id = "$temp[0]";
my $subject_ncRNAid = "$temp[1]" if(defined($temp[1]));
my @subject_coor=();

my $query_header = `grep -w '$query_id' $quefile`;
if(!$query_header) { print "\nError: Unable to retrieve $queid from $quefile\n\n"; exit; }

my $subject_header = `grep -w '$subject_id' $subfile`;
if(!$subject_header) { print "\nError: Unable to retrieve $subid from $subfile\n\n"; exit; }

@temp = split('\s+', $query_header);
system("grep -w $query_id -A $temp[6] $quefile > query.cluster");
$queSize = $temp[3]-($temp[2]-1);
$queGenomicStart=$temp[2];
$queChr=$temp[1];
$queType=$temp[9];

@temp=split('\s+', $subject_header);
system("grep -w $subject_id -A $temp[6] $subfile > subject.cluster");
$subSize = $temp[3]-($temp[2]-1);
$subGenomicStart=$temp[2];
$subChr=$temp[1];
$subType=$temp[9];

############################################################################################################
#Peform the alignment
my $score=();
@temp = `./deepBlockAlign.x -q query.cluster -s subject.cluster -p 1`;
if($temp[scalar(@temp)-1]!~/^\#/) { $temp[scalar(@temp)-1]=~m/[^\s]+$/; $score=$&; }
else { print "\nError: No alignment computed for $query_id and $subject_id\n\n"; exit; }

############################################################################################################
#Read the profile alignment file
open(INFILE, "/tmp/nprcluster.txt") || die $!;
my @data=<INFILE>; close INFILE;

############################################################################################################
#Determine the aligned clusters and shift required on x-axis for plotting the cluster alignment
my %blockAln=(); @temp=();
my @deepBlockAlign = grep { $_=~/^ALIGNMENT/ } @data;
for($i=0; $i<scalar(@deepBlockAlign); $i++) {
	$deepBlockAlign[$i]=~s/^ALIGNMENT//; $deepBlockAlign[$i]=~s/\(.+//g; $deepBlockAlign[$i]=~s/\s+//g;
	#Parse paired block alignment
	if($deepBlockAlign[$i]=~/\:/) { @temp=split("[\-\:]+", $deepBlockAlign[$i]); $blockAln{$temp[0]}=$temp[2]; $blockAln{$temp[1]}=$temp[3]; }
	#parse unpaired block alignment
	else { @temp=split("[\-]", $deepBlockAlign[$i]); $blockAln{$temp[0]}=$temp[1]; }
}

my $qBlock=(); my $sBlock=(); my $shiftCluster=();

foreach(sort { $blockAln{$a} <=> $blockAln{$b} } keys(%blockAln)) {
	$qBlock=$_; $sBlock=$blockAln{$_};
	if($qBlock >= $sBlock) { $shiftCluster="subject"; }
	else { $shiftCluster="query"; }
	last;
}

my @queryCoor = split('\s+', $data[scalar(@data)-2]);
my @subjectCoor = split('\s+', $data[scalar(@data)-1]);

#Derive anchor coordinate for plotting blocks for the second cluster
my $qBlockCoor=(); my $sBlockCoor=(); my $qBlockShift=0; my $sBlockShift=0;
if($shiftCluster=~/subject/) {
	$queryCoor[$qBlock-1]=~m/^[0-9]+/; $qBlockCoor=$&;
	$subjectCoor[$sBlock-1]=~m/^[0-9]+/; $sBlockCoor=$&;
	$sBlockShift = $qBlockCoor-$sBlockCoor;
}
elsif($shiftCluster=~/query/) {
	$queryCoor[$qBlock-1]=~m/^[0-9]+/; $qBlockCoor=$&;
	$subjectCoor[$sBlock-1]=~m/^[0-9]+/; $sBlockCoor=$&;
	$qBlockShift = $sBlockCoor-$qBlockCoor;
}

############################################################################################################
#Plot the block alignments
#unless(-d "clusterAln") { system("mkdir clusterAln"); }

my @plot_cmd = grep { $_!~/^[0-9]+/ && $_!~/^ALIGNMENT/ } @data;
$plot_cmd[0]=~m/^[^\s]+/;
my $cluster_aln_pdf = $&; $cluster_aln_pdf=~s/\>//g; $cluster_aln_pdf=~s/\_[0-9]+\#/--/;
$cluster_aln_pdf=~s/\_[0-9]\.pdf//; $cluster_aln_pdf=~s/\//-/g;

open(OUTFILE, ">/tmp/nprtemp.txt") || die $!;
for($i=0; $i<scalar(@data); $i++) { if($data[$i]!~/^[0-9]+/) { last; } else { print OUTFILE $data[$i]; } }
close OUTFILE;

my @block_aln_pdf=();

for($i=0; $i<scalar(@plot_cmd); $i++) {
	$plot_cmd[$i]=~s/\>//g;
	$plot_cmd[$i]=~s/\#/--/;
	$plot_cmd[$i]=~s/\//-/g;
	$plot_cmd[$i]=~m/^[^\s]+/; $block_aln_pdf[$i]=$&;
	#system("R --no-save --vanilla --slave < plotblockAlign.r --args /tmp/nprtemp.txt $plot_cmd[$i]");
	my $returnCode = system("R --no-save --vanilla --slave < plotblockAlign.r --args /tmp/nprtemp.txt $plot_cmd[$i]");
	if($returnCode!=0) { print "\nError: Missing pre-requisite, R language\n\n"; exit; }
}
############################################################################################################
#Create the tex file and Plot cluster alignments.

my $qsDescription=$cluster_aln_pdf; $qsDescription=~s/\_/-/g;
my($qCluster, $sCluster) = split(/\-\-/, $qsDescription);
$qsDescription=~s/\-\-/\\hspace{1mm}with\\hspace{1mm}/;

my $qCanvasWidth=(); my $sCanvasWidth=(); my $img=(); @temp=();
@temp=split("\:", $queryCoor[scalar(@queryCoor)-1]); $qCanvasWidth=$temp[2];
@temp=split("\:", $subjectCoor[scalar(@subjectCoor)-1]); $sCanvasWidth=$temp[2];

$i=(); my $x1=0; my $x2=0; my $y1=(); my $y2=(); my $max_x2=0; my $default_y1=5; my $max_y2=10;
my $mod_x1=(); my $mod_x2=(); my $mod_y1=(); my $mod_y2=(); my $nBlock=(); my $start_x=1000; my $end_x=0;

open(OUTFILE, ">$cluster_aln_pdf.tex") || die $!;

print OUTFILE <<LATEX;
\\documentclass[a4paper, 10pt]{article}
\\usepackage[hmargin=0.5cm, vmargin=1cm]{geometry}
\\usepackage{graphicx}
\\usepackage{color}
\\usepackage{tikz}
\\usepackage{subfigure}
\\definecolor{dark-red}{RGB}{100,0,0}
\\definecolor{dark-green}{RGB}{0,100,0}
\\definecolor{dark-blue}{RGB}{0,0,100}
\\begin{document}
\\begin{center} \\Large{deepBlockAlign\\hspace{1mm}v1.0} \\end{center}
\\normalsize \\textbf{Block\\hspace{1mm}group\\hspace{1mm}alignment:}\\hspace{1mm}\\emph{{\\color{dark-red}\\underline{$qCluster}}\\hspace{1mm}with\\hspace{1mm}{\\color{dark-blue}\\underline{$sCluster}}}\\hspace{1mm}(Score:\\hspace{1mm}$score) \\linebreak \\linebreak
\\begin{center} \\begin{tikzpicture}[scale=1]
LATEX

for($i=0; $i<scalar(@queryCoor); $i++) {
	my($x1, $y1, $x2, $y2) = split("\:", $queryCoor[$i]);
	$x1 = ($x1+$qBlockShift);
	$x2 = ($x2+$qBlockShift);

	if($x1 < $max_x2) {
		$y1 = $default_y1+5;
		print OUTFILE "\\draw [thick,white,fill=dark-red] ($x1 mm, $y1 mm) rectangle ($x2 mm, $y1 mm+5 mm);";
		$mod_x1 = $x1; $mod_x2 = ($x1+$x2)/2;
		$mod_y1 = ($y1+($y1+5))/2; $mod_y2 = ($y1+($y1+5))/2;
		$nBlock = $i+1;
		print OUTFILE "\\draw [white] ($mod_x1 mm, $mod_y1 mm)($mod_x2 mm, $mod_y2 mm) node {Block $nBlock};\n";
		
		$default_y1 = $y1;
	}
	else {
		print OUTFILE "\\draw [thick,white,fill=dark-red] ($x1 mm, 5 mm) rectangle ($x2 mm, 10 mm);";
		$mod_x1 = $x1; $mod_x2 = ($x1+$x2)/2;
		$mod_y1 = (5+10)/2; $mod_y2 = (5+10)/2;
		$nBlock = $i+1;
		print OUTFILE "\\draw [white] ($mod_x1 mm, $mod_y1 mm)($mod_x2 mm, $mod_y2 mm) node {Block $nBlock};\n";

		$default_y1=5;
	}

	if($x1 < $start_x) { $start_x = $x1; }
	if($x2 > $end_x) { $end_x = $x2; }
	
	$max_x2 = $x2 if($x2 > $max_x2);
	$max_y2 = $y1+5 if($y1+5 > $max_y2);
}

print OUTFILE "\\draw ($start_x mm, 4 mm) -- coordinate (x axis mid) ($end_x mm, 4 mm);\n";
print OUTFILE "\\foreach \\x in {$start_x,".($start_x+20).",...,$end_x}\n";
print OUTFILE "\\draw (\\x mm, 4 mm) -- (\\x mm, 3 mm)\n";
print OUTFILE "node[anchor=north] {};\n";
for($i=$start_x; $i<=$end_x; $i+=20) {
	if($i==$start_x) {
		print OUTFILE "\\draw [black] (".($i-4)." mm, 2 mm) (".($i-4)." mm, 1 mm) node {$queChr:$queGenomicStart};\n";
	}
	else {
		print OUTFILE "\\draw [black] ($i mm, 2 mm) ($i mm, 1 mm) node {$queGenomicStart};\n";
	}
	$queGenomicStart += 20;
}
#print OUTFILE "\\draw [black] (-15 mm, 8 mm) (-10 mm, 8 mm) node {$queType};\n";

$max_y2 = $max_y2+5;
$max_x2=0; $default_y1=$max_y2+5; $start_x=1000; $end_x=0;

for($i=0; $i<scalar(@subjectCoor); $i++) {
	($x1, $y1, $x2, $y2) = split("\:", $subjectCoor[$i]);
	$x1 = ($x1+$sBlockShift);
	$x2 = ($x2+$sBlockShift);

	if($x1 < $max_x2) {
		$y1 = $default_y1+5;
		print OUTFILE "\\draw [thick,white,fill=dark-blue] ($x1 mm, $y1 mm) rectangle ($x2 mm, $y1 mm + 5 mm);";
		$mod_x1 = $x1; $mod_x2 = ($x1+$x2)/2;
		$mod_y1 = ($y1+($y1+5))/2; $mod_y2 = ($y1+($y1+5))/2;
		$nBlock = $i+1;
		print OUTFILE "\\draw [white] ($mod_x1 mm, $mod_y1 mm)($mod_x2 mm, $mod_y2 mm) node {Block $nBlock};\n";

		$default_y1 = $y1;
	}
	else {
		$default_y1 = $max_y2+5;

		print OUTFILE "\\draw [thick,white,fill=dark-blue] ($x1 mm, $default_y1 mm) rectangle ($x2 mm, $default_y1 mm + 5 mm);";
		$mod_x1 = $x1; $mod_x2 = ($x1+$x2)/2;
		$mod_y1 = ($default_y1+($default_y1+5))/2; $mod_y2 = ($default_y1+($default_y1+5))/2;
		$nBlock = $i+1;
		print OUTFILE "\\draw [white] ($mod_x1 mm, $mod_y1 mm)($mod_x2 mm, $mod_y2 mm) node {Block $nBlock};\n";

	}

	if($x1 < $start_x) { $start_x = $x1; }
	if($x2 > $end_x) { $end_x = $x2; }
	
	$max_x2 = $x2 if($x2 > $max_x2);
}

$default_y1 = $max_y2+4;

print OUTFILE "\\draw ($start_x mm, $default_y1 mm) -- coordinate (x axis mid) ($end_x mm, $default_y1 mm);\n";
print OUTFILE "\\foreach \\x in {$start_x,".($start_x+20).",...,$end_x}\n";
print OUTFILE "\\draw (\\x mm,$default_y1 mm) -- (\\x mm,".($default_y1-1)." mm)\n";
print OUTFILE "node[anchor=north] {};\n";
for($i=$start_x; $i<=$end_x; $i+=20) {
	if($i==$start_x) {
		print OUTFILE "\\draw [black] (".($i-4)." mm,".($default_y1-2)." mm) (".($i-4)." mm, ".($default_y1-3)." mm) node {$subChr:$subGenomicStart};\n";
	}
	else {
		print OUTFILE "\\draw [black] ($i mm,". ($default_y1-2) ." mm) ($i mm, ". ($default_y1-3) ." mm) node {$subGenomicStart};\n";
	}
	$subGenomicStart += 20;
}
#print OUTFILE "\\draw [black] (-15 mm, ". ($default_y1+5) ." mm) (-10 mm, ". ($default_y1+5) ." mm) node {$subType};\n";

print OUTFILE <<LATEX;
\\end{tikzpicture}
\\end{center}
LATEX

############################################################################################################
#print OUTFILE "\\normalsize \\textbf{Aligned\\hspace{1mm}blocks:}\\hspace{1mm}";
$queType=~s/\_/\\\_/g; $subType=~s/\_/\\\_/g;
print OUTFILE "\\begin{table}[h] \\caption{Aligned Blocks} \\centering\n";
print OUTFILE "\\begin{tabular}{| c |  c |} \\hline\n";
print OUTFILE "\\color{dark-red}{$queType} & \\color{dark-blue}{$subType} \\\\ \\hline\n";
foreach(sort { $a <=> $b } keys(%blockAln)) { print OUTFILE "Block $_ & Block $blockAln{$_} \\\\ \\hline\n" };
print OUTFILE "\\end{tabular} \\end{table}";

print OUTFILE "\\normalsize \\textbf{Block\\hspace{1mm}alignment:} \\linebreak \\linebreak\n";
print OUTFILE "\\begin{figure}[htp]\n\\begin{center}\n";
foreach(@block_aln_pdf) { print OUTFILE "\\subfigure{\\includegraphics[scale=0.35]{$_}}\n"; }
print OUTFILE "\\end{center}\n\\end{figure}\n";
print OUTFILE "\\end{document}";

############################################################################################################

#Create final output file (.pdf)
system("pdflatex $cluster_aln_pdf.tex > /tmp/tmp.txt");

#Delete intermediate files.
print "\nDeleting intermediate files..... ";
system("rm $cluster_aln_pdf.aux");
system("rm $cluster_aln_pdf.log");
system("rm $cluster_aln_pdf.tex");
foreach(@block_aln_pdf) {
	system("rm $_");
}
system("rm query.cluster");
system("rm subject.cluster");
print "Done\n\n";

############################################################################################################
sub usage {
        print STDERR "\nProgram: plotClusterAlign.pl (Visualize deepBlockAlign results)\n";
        print STDERR "Author: Center for non-coding RNA in Technology and Health, IBHV, University of Copenhagen, Denmark\n";
        print STDERR "Version: 1.0\n";
        print STDERR "Contact: sachin\@rth.dk\n";
        print STDERR "Usage: plotClusterAlign.pl -q <query id> -s <subject id> -c <query file> -d <subject file>\n";
        print STDERR " -q <query id>        query cluster's id\n";
        print STDERR " -s <subject id>      subject cluster's id\n";
        print STDERR " -c <file>            query cluster file\n";
        print STDERR " -d <file>            subject cluster file\n\n";
        exit(-1);
}
############################################################################################################
exit;
