#!/usr/bin/env perl   

#  -*- perl -*-

use strict;
use warnings FATAL => qw ( all );
use Data::Dumper;
use Formats;
use Errors;

# STEM_VARIATION
# Colors alignment according to differences from
# a reference sequence (first RNA sequence in the alignment):
#
# Green  = no change
# Yellow = semi-conservative basechange
# Orange = conservative basechange
# Red    = mismatch
#
# Ebbe Sloth Andersen, September 2005. Email: esa@mb.au.dk

# >>>>>>>>>>>>>>>>>> RUN PROGRAM <<<<<<<<<<<<<<<<<<<<

my ( $entries, $file, $header );

( $file ) = @ARGV;

( $header, $entries ) = &Formats::read_col ( $file );
&Errors::check_col ( $entries, { "TYPE" => ["RNA"] } );
$entries = &stem_variation ( $entries );
$header .= "; stem_variation.pl was run on this file.";
&Formats::write_col ( $header, $entries );

# >>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<

sub stem_variation
{
    # Ebbe Sloth Andersen, September 2005.

    my ( $entries ) = @_;

    # Returns an array of hashes.

    my ( @rnas,
	 $rna,
	 %colors,
	 $colorpos,
	 @basepairs,
	 $basepair,
	 $entry,
	 @entnts,
	 @entbps,
	 @refnts,
	 @refbps,
	 $ref,
	 @ref,
	 @bps,
	 $nt1,
	 $nt2,
	 $bp,
	 $i,           # counting numbers
	 $length,
	 $num,
	 $r,
	 $e,
	 @e,
	 @re,
	 @pm,
	 $pm,
         @pms,
         $p,
         @r,
         @g,
         @b,
	 );

    # Define colors
    %colors = (
	       "red"       => [1.00, 0.30, 0.30],
	       "orange"    => [1.00, 0.70, 0.30],
	       "yellow"    => [1.00, 1.00, 0.30],
	       "green"     => [0.50, 1.00, 0.50],
	       "blue"      => [0.50, 0.50, 1.00],
	       "grey"      => [0.70, 0.70, 0.70],
	       "white"     => [1.00, 1.00, 1.00]
	       );

    # initialize
    @refnts = ( );
    @bps = ( );
    @entnts = ( );
    @ref = ( );
    @refbps = ( );
    $nt2 = "x";
    $i = 0;
    @rnas = ( );

    # make colors for each position by comparing each RNA with reference.
    
    @rnas = grep { $_->{'TYPE'} eq "RNA" } @{ $entries };
    $ref = $rnas[0];
    @bps = split ( /,/, $ref->{'align_bp'} );
    @refnts = split ( /,/, $ref->{'residue'} );

    # find all reference base pairs
    
    foreach $nt1 ( @refnts ) {  
        
        if ( $nt1 =~ /^(T|A|U|G|C)/ && $bps[$i] ne "." ) {
            $nt2 = $refnts[$bps[$i]-1];
        }
        push ( @refbps, "$nt1$nt2" );
        $nt2 = "x";
        $i++;   
    }

    # color rnas

    foreach $rna ( @rnas ) {
        
        @entnts = ( );
        @entbps = ( );
        @bps = ( );
        @entnts = split ( /,/, $rna->{'residue'} );   # looks at each RNA entry
        @bps = split ( /,/, $rna->{'align_bp'} );

        $nt2 = "x";
        $i = 0;
        foreach $nt1 ( @entnts ) {                    # and makes an array of the pairs
            if ( $nt1 =~ /^[TAUGC]/ && $bps[$i] ne "." ) {
                $nt2 = $entnts[$bps[$i]-1];
            }
            push ( @entbps, "$nt1$nt2" );
            $nt2 = "x";
            $i++;   
        }

        @r = ( );
        @g = ( );
        @b = ( );

        if ( defined $rna->{'color_r'} ) {            # get existing colors
            @r = split ( /,/, $rna->{'color_r'} );
            @g = split ( /,/, $rna->{'color_g'} );
            @b = split ( /,/, $rna->{'color_b'} );
        } else {
            foreach $nt1 ( @entnts ) {                # make white if no color entries
                push @r, 1;
                push @g, 1;
                push @b, 1;
            }
        }
        $rna->{'color_r'} = "";
        $rna->{'color_g'} = "";
        $rna->{'color_b'} = "";
        
        # compare with refbps
        $i = 0;
        foreach $e ( @entbps ) {
            $r = $refbps[$i];
            $colorpos = "white";              

            if ( $r =~ /^[.-]/ && $e =~ /^[TUCAG]/ ) { 
                $colorpos = "yellow";                         # insert
            }

            if ( ($e =~ /^[TUCAG]/) && ($r =~ /^[tucag]/) ) {
                $colorpos = "yellow";                         # new base pair
            } elsif ( ($e =~ /^[tucag]/) && ($r =~ /^[TUCAG]/) ) {
                $colorpos = "blue";                           # mismatch
            }
            
            if ( ($e =~ /^[TUCAG]/) && ($r =~ /^[TUCAG]/) ) {

                @e = ( );
                @re = ( );
                @e = split(//, $e);
                @re = split(//, $r);
                
                if ( ($e[0] ne $re[0]) && ($e[1] ne $re[1]) ) {
                    $colorpos = "red";                        # coordinated basechanges
                }
                if ( ($e[0] ne $re[0]) && ($e[1] eq $re[1]) ) {
                    $colorpos = "orange";                     # semi-conservatory basechange
                }
            }
            if ( $colorpos eq "white" or $colorpos eq "green" ) {
                $rna->{'color_r'} .= "$r[$i],";
                $rna->{'color_g'} .= "$g[$i],";
                $rna->{'color_b'} .= "$b[$i],";
            } else {
                $rna->{'color_r'} .= "$colors{$colorpos}[0],";
                $rna->{'color_g'} .= "$colors{$colorpos}[1],";
                $rna->{'color_b'} .= "$colors{$colorpos}[2],";
            }
            $i++;
        }
    }   
    return $entries;
}
