#!/usr/bin/env perl   

#  -*- perl -*-

#use strict;
#use warnings FATAL => qw ( all );
use Data::Dumper;

# COL2ALIS
# Makes converts col-file to ali-file that can be loaded into RnaViz. 
# If col-file contains colors then a annotation line is added to the ali-file.
# 
# Ebbe Sloth Andersen, September 2005. Email: esa@mb.au.dk

# >>>>>>>>>>>>>>>>>> RUN PROGRAM <<<<<<<<<<<<<<<<<<<<

my ( @entries, $file, @print, $line, $header );

( $file ) = @ARGV;

@entries = &read_col ( $file );

#print Dumper ( @entries );
#exit;

&write_alis ( @entries );

# >>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<

sub write_alis
{
    # WRITE_ALIS v.1 

    my ( @entries ) = @_;

    # Returns nothing.

    my ( $entry,
	 @rnas,
	 $rna,
	 $n,
	 $i,
	 $residue,
	 @residue,
         @pm,
         $pm,
         @pmres,
         $length,
         $length2,
         $prev,     # for individual structures 
         $m, 
         @m, 
         $pmres, 
         $bulge, 
         $stem, 
         $bpn, 
         $labels, 
         $loop,
         $l,       # for helix numbering 
         $one, 
         $stemstart, 
         $symbol, 
         $o, 
         @label, 
         $length3,
         @r,         # for coloring mask
         @g, 
         @b, 
         $color,
         $c,
         @helix,
         $helix,
         $gaps,
	 );

    # get pairingmask
    @pm = grep { $_->{'TYPE'} eq "pairingmask" } @entries;
    $pm = $pm[0]; 
    @pmres = split(/,/, $pm->{'residue'});
    # get RNAs
    @rnas = grep { $_->{'TYPE'} eq "RNA" } @entries;
    $entry = $rnas[0]; 
    @residue = split(/,/, $entry->{'residue'});
    $length2 = scalar @residue;
    $c = 0;
    # write rna structures
    foreach $entry ( @rnas ) {
        $c++;
        open ( OUT, ">$c\_$entry->{'ENTRY'}.ali" );
        # write header
        print OUT "           1         $length2\n\n";
        # write numbers
        $n = 0;
        for ( $i=0; $i<=$length2; $i++ ) {
            if ( $i >= 10000 ) {
                if ( $n == 10 ) {
                    print OUT "               $i";
                    $n=0;
                }
                $n++;
            } elsif ( $i >=1000 ) {
                if ( $n == 10 ) {
                    print OUT "                $i";
		$n=0;
                }
                $n = $n + 1;
            } elsif ( $i >=100 ) {
                if ( $n == 10 ) {
                    print OUT "                 $i";
                    $n=0;
                }
                $n = $n + 1;
            } else {
                if ( $n == 10 ) {
                    print OUT "                  $i";
                    $n=0;
                }
                $n = $n + 1;
            }
        }
        print OUT "\n";
# write ruler
        $n = 0;
        for ( $i=0; $i<$length2; $i++ ) {
            if ( $n < 4 ) { print OUT " ."; }
            if ( $n == 4 ) { print OUT " |"; }
            if ( $n == 4 ) { $n = -1; }
            $n++;
        }
        print OUT "\n";

# print OUT aligned rna structures
	@residue = ( );
	if ( defined $entry->{'residue'} )   { @residue = split(/,/, $entry->{'residue'}); }
        
        $prev = "-";
        $stem = "false";
        $loop = 0;
        $bulge = "false";
        $n = 0;
        $m = 0;
        $bpn = 0;
        $labels = 0;
        $length = 0;
        @m = ( );

	foreach $residue ( @residue ) {
            #print "n\tres\tpmr\tpre\len\n";

            if ( $pmres[$n] eq $prev && $labels == $loop/2 ) {
                $prev = "";;
            }
            
            # if new label or label shift
            if ( $pmres[$n] ne $prev && $pmres[$n] ne "-" ) {

                $bpn = 0; # restart base count
                $m = 0;   # restart base count
                $labels = 0;

                # get amount of labels in current stem
                @m = ( );
                @m = grep { $_ eq $pmres[$n] } @pmres;
                $length = scalar @m;
                $loop = $length;
            #print "$n\t$residue\t$pmres[$n]\t$prev\t$length\n";
                
                # count paired bases in this stemside
                for ($i=$n;$i<$length/2+$n;$i++) {
                    if ( $pmres[$i] eq $pmres[$n] ) {                    
                        if ( $residue[$i] =~ /^[AUCGT]/ ) {
                            $bpn++;
                        }
                    } else {
                         $length = $length + 2;                  # before it only counted gaps
                    }                                           # but it should read through 
                }                                               # any stem within a stem 
            }                                                   # or pseudoknot

            # make history and count labels
            if ( $pmres[$n] ne "-") {
                $prev = $pmres[$n];
                $labels++;
            }

# insert symbols inbetween
            # by analysing the current residue
            # and then printing the symbol before the residue
            
            if ( $residue[$n] =~ /^[AUCGT]/ ) {
                if ( $m == 0 ) { # and new stemside
                    if ( $residue[$n-1] =~ /^[AUCGT]/ && $n != 0 ) {
                        print OUT "\^"; $stem = "true";         # resume with new stem
                    } else {
                        print OUT "\["; $stem = "true";         # start stem
                    }
                } elsif ( $residue[$n-1] =~ /^[aucgt-]/ && $bulge eq "true" ) {
                    print OUT "\}"; $bulge = "false";                 
                } else {
                    print OUT " ";
                }
                $m++;
            } else { # if gap or unpaired
                if ( $m == 0 ) { # and new stem
                    if ( $residue[$n-1] =~ /^[AUCGT]/ && $n != 0 ) { # and previous was stem
                        print OUT "\]";  $stem = "false";
                    } else {
                        print OUT " ";
                    }
                } elsif ( $m == $bpn && $m != 0 && $stem eq "true" ) { # or end of stem
                    print OUT "\]"; $stem = "false"; $bulge = "false";
                } elsif ( $stem eq "true" && $bulge eq "false" && $residue[$n] =~ /^[aucgt]/ ) {
                    print OUT "\{"; $bulge = "true";
                } elsif ( $residue[$n] =~ /^[AUCGT]/ && $bulge eq "true" ) { # end stem
                    print OUT "\}"; $bulge = "false";
                } else {
                    print OUT " "; 
                }
            }
            
# print sequence
	    # if ( defined $entry->{'residue'} ) { print "$residue[$n]"; }
            if ( defined $entry->{'residue'} ) { print OUT uc("$residue[$n]"); }
            $n++;
	}
        
        if ( $m == $bpn && $m != 0 && $stem eq "true" ) { # end stem
            print OUT "\]"; $stem = "false"; $bulge = "false";
        } else {
            print OUT " ";
        }
        print OUT "      $entry->{'ENTRY'}\n";
        
# write helix numbers 
        my $length4 = 0;
        $length3 = 0;
        $prev = "-";
        my $prev2 = "-";
        $n = 0;
        $o = 0;
        $stemstart = 0;
        @label = ( );
        @m = ( );
        @helix = ( );
        $helix = "";         
        $gaps = 0;        
        print OUT " ";
        my $halfway = 0;
        @helix = @pmres;
        # variable control
        #print "before\t\t\t\t\t\tafter\n";
        #print "n\tresidue\thelix\tprev\t\tprint\tstart\thalf\n";
        foreach $helix ( @helix ) {

            # variable control
            #print "$n\t$residue[$n]\t$helix\t$prev\t\t";

            if ( $pmres[$n] eq $prev2 && $pmres[$n] ne "-" ) { # investigate if we go beyond half length
                @m = ( );
                @m = grep { $_ eq $helix } @pmres;    # should look at @pmres since @helix changes
                $length3 = scalar @m;                 # here we get number of labels
                $halfway = $stemstart + $length3/2 + $gaps;
                if ( $n >= $halfway ) {                # and reset if current position
                    $prev = "-";
                    $prev2 = "-";
                    $gaps = 0;
                }   
             }
            # count gaps in helix regions in pairingmask
            if ( $pmres[$n] eq "-" && $prev2 ne "-" ) {
                $gaps++;
            }

            if ( $pmres[$n] ne $prev2 && $pmres[$n] ne "-" ) {  # if new stem
                $stemstart = $n;
            }

            if ( $helix ne $prev && $helix ne "-" ) {  # if new stem
                $gaps = 0;                
                push ( @label, $helix );
                @m = ( );
                @m = grep { $_ eq $helix } @label;
                $length4 = scalar @m;
                if ( $length4 == 1 ) {
                    if ( $residue[$n] =~ /^[AUCGT]/ ) {
                        print OUT "$helix ";
                        #print "$helix\t";
                    } else {             # now we got a gap in the seq
                        print OUT "- "; 
                        #print "-\t";
                        $helix = "-";
                        pop @label
                    }
                } else {
                    if ( $residue[$n] =~ /^[AUCGT]/ ) {
                        print OUT "$helix'";
                        #print "$helix'\t";
                    } else { 
                        #print "-\t";
                        print OUT "- ";
                        $helix = "-";
                        pop @label
                    }
                }
            } else {
                #print "-\t";
                print OUT "- ";
            }
            
            # make history
            if ( $helix ne $prev && $helix ne "-" ) {
                $prev = $helix;
            }
            if ( $pmres[$n] ne $prev2 && $pmres[$n] ne "-" ) {
                $prev2 = $pmres[$n];
            }

            $n++;

            # variable control
            #print "$stemstart\t$halfway\t$length3\t$length4\t$gaps\n";

        }
        print OUT " H    Helix numbering\n";

# get colors from each sequence and write color mask
        $color = "";
        $i = 0;
        @r = ( );
        @g = ( );
        @b = ( );
        if ( defined $entry->{'color_r'} )   { @r = split(/,/, $entry->{'color_r'}); }
        if ( defined $entry->{'color_g'} )   { @g = split(/,/, $entry->{'color_g'}); }
        if ( defined $entry->{'color_b'} )   { @b = split(/,/, $entry->{'color_b'}); }
        foreach $residue ( @residue ) {

            if ( $r[$i] == 1.00 && $g[$i] == 0.50 && $b[$i] == 0.50 ) { $color .= " a"; } # r
            elsif ( $r[$i] == 1.00 && $g[$i] == 0.70 && $b[$i] == 0.20 ) { $color .= " b"; } # o
            elsif ( $r[$i] == 1.00 && $g[$i] == 1.00 && $b[$i] == 0.50 ) { $color .= " c"; } # y
            elsif ( $r[$i] == 0.50 && $g[$i] == 1.00 && $b[$i] == 0.10 ) { $color .= " d"; } # g
            elsif ( $r[$i] == 0.70 && $g[$i] == 1.00 && $b[$i] == 1.00 ) { $color .= " e"; } # c
            elsif ( $r[$i] == 0.60 && $g[$i] == 0.60 && $b[$i] == 1.00 ) { $color .= " f"; } # b
            elsif ( $r[$i] == 1.00 && $g[$i] == 0.70 && $b[$i] == 1.00 ) { $color .= " g"; } # p
            elsif ( $r[$i] == 0.70 && $g[$i] == 0.70 && $b[$i] == 0.70 ) { $color .= " h"; } # g

            elsif ( $r[$i] == 1.00 && $g[$i] == 0.20 && $b[$i] == 0.20 ) { $color .= " i"; } # r
            elsif ( $r[$i] == 1.00 && $g[$i] == 0.50 && $b[$i] == 0.00 ) { $color .= " j"; } # o
            elsif ( $r[$i] == 1.00 && $g[$i] == 1.00 && $b[$i] == 0.00 ) { $color .= " k"; } # y
            elsif ( $r[$i] == 0.00 && $g[$i] == 1.00 && $b[$i] == 0.00 ) { $color .= " l"; } # g
            elsif ( $r[$i] == 0.50 && $g[$i] == 1.00 && $b[$i] == 1.00 ) { $color .= " m"; } # c
            elsif ( $r[$i] == 0.30 && $g[$i] == 0.30 && $b[$i] == 1.00 ) { $color .= " n"; } # b
            elsif ( $r[$i] == 1.00 && $g[$i] == 0.50 && $b[$i] == 1.00 ) { $color .= " o"; } # p
            elsif ( $r[$i] == 0.50 && $g[$i] == 0.50 && $b[$i] == 0.50 ) { $color .= " p"; } # g

            elsif ( $r[$i] == 0.80 && $g[$i] == 0.00 && $b[$i] == 0.00 ) { $color .= " q"; } # r
            elsif ( $r[$i] == 0.80 && $g[$i] == 0.50 && $b[$i] == 0.00 ) { $color .= " r"; } # o
            elsif ( $r[$i] == 0.80 && $g[$i] == 1.00 && $b[$i] == 0.00 ) { $color .= " s"; } # y
            elsif ( $r[$i] == 0.00 && $g[$i] == 0.80 && $b[$i] == 0.00 ) { $color .= " t"; } # g
            elsif ( $r[$i] == 0.50 && $g[$i] == 1.00 && $b[$i] == 1.00 ) { $color .= " u"; } # c
            elsif ( $r[$i] == 0.50 && $g[$i] == 0.50 && $b[$i] == 1.00 ) { $color .= " v"; } # b
            elsif ( $r[$i] == 0.50 && $g[$i] == 0.50 && $b[$i] == 0.90 ) { $color .= " w"; } # p
            elsif ( $r[$i] == 0.30 && $g[$i] == 0.30 && $b[$i] == 0.30 ) { $color .= " x"; } # g

            elsif ( $r[$i] == 0.60 && $g[$i] == 0.00 && $b[$i] == 0.00 ) { $color .= " y"; } # r
            elsif ( $r[$i] == 0.60 && $g[$i] == 0.30 && $b[$i] == 0.00 ) { $color .= " A"; } # o
            elsif ( $r[$i] == 0.60 && $g[$i] == 0.60 && $b[$i] == 0.00 ) { $color .= " B"; } # y
            elsif ( $r[$i] == 0.00 && $g[$i] == 0.60 && $b[$i] == 0.00 ) { $color .= " C"; } # g
            elsif ( $r[$i] == 0.30 && $g[$i] == 0.80 && $b[$i] == 0.80 ) { $color .= " D"; } # c
            elsif ( $r[$i] == 0.30 && $g[$i] == 0.30 && $b[$i] == 0.80 ) { $color .= " E"; } # b
            elsif ( $r[$i] == 0.30 && $g[$i] == 0.30 && $b[$i] == 0.70 ) { $color .= " F"; } # p
            elsif ( $r[$i] == 0.20 && $g[$i] == 0.20 && $b[$i] == 0.20 ) { $color .= " G"; } # g

            elsif ( $r[$i] == 0.50 && $g[$i] == 1.00 && $b[$i] == 0.50 ) { $color .= " H"; } # g1
            elsif ( $r[$i] == 0.70 && $g[$i] == 1.00 && $b[$i] == 0.70 ) { $color .= " I"; } # g2
            elsif ( $r[$i] == 0.90 && $g[$i] == 1.00 && $b[$i] == 0.90 ) { $color .= " J"; } # g3
#            elsif ( $r[$i] == 0.50 && $g[$i] == 0.50 && $b[$i] == 1.00 ) { $color .= " K"; } # b1
            elsif ( $r[$i] == 0.70 && $g[$i] == 0.70 && $b[$i] == 1.00 ) { $color .= " L"; } # b2
            elsif ( $r[$i] == 0.90 && $g[$i] == 0.90 && $b[$i] == 1.00 ) { $color .= " M"; } # b3

            else { $color .= " X"; }

            $i++;
        }    
        print OUT "$color       mask\n";

        close ( OUT ); 
    }
}

sub read_col
{
    # READ_COL v1.1: Reads column format into data structure with an array of hashes.
    # Reads any number of columns with any kind of data.
   
    my ( $fname ) = @_;

    # Returns an array of hashes. The key corresponds to the header.
    # The value contains body data points seperated by comma.

    my ( $line,        # input lines
	 @entries,     # the output array of hashes
	 @cols, 
	 $TYPE,        
	 @COL,         # any number of columns
	 $ENTRY, 
	 @data, 
	 $i,           # counting numbers
	 $length,
	 $counter );

    # initialize
    $counter = 0;	    
    @data = ( );
    @entries = ( );
    @COL = ( );

    if ( not open FILE, "< $fname" ) {
	die "file not found!";
    }

    while ( $line = <FILE> )
    {
	if ( $line =~ /^; \*+/ )            # finds footer
	{
	    push @entries, 
	    {
		"TYPE" => $TYPE,
		"ENTRY" => $ENTRY,
	    };
	    if ( $TYPE eq "TREE" ) {         # this must be done
		shift ( @data );             # because of wierd
		$length = 6;                 # spacing in TREE file
	    }
	    for ( $i=0; $i<$length; $i++ ) {
		map { $entries[$counter]->{$COL[$i]} = $data[$i] } @entries;
	    }
	    $counter = $counter + 1;
	    @data = ( );
	    @COL = ( );
	}
	elsif ( $line =~ /^;/ )            # reads header
	{
	    if ( $line =~ /^; TYPE\s+(\S+)/ ) {
		$TYPE = $1;
	    } elsif ( $line =~ /^; COL\s+\S+\s+(\S+)/ ) {
		push ( @COL, $1 );
	    } elsif ( $line =~ /^; ENTRY\s+(\S+)/ ) { 
		$ENTRY = $1;
	    }
	}
	else                               # reads body
	{
	    @cols = split(/\s+/, $line);
	    $length = scalar @cols;
	    
	    # add sequences on an array

	    for ( $i=0; $i<$length; $i++ ) {
		$data[$i] .= "$cols[$i],";
	    }
	}   
    } 
    close FILE;
    return wantarray ? @entries : \@entries;
}
