#!/usr/bin/env perl   

#  -*- perl -*-

use strict;
use warnings FATAL => qw ( all );
use Data::Dumper;

# FITALI2PM
# Ebbe Sloth Andersen, 2006.

# >>>>>>>>>>>>>>>>>> RUN PROGRAM <<<<<<<<<<<<<<<<<<<<

my ( @entries, $file, $line, $header );

( $file ) = @ARGV;

@entries = &read_col ( $file );

#print Dumper ( @entries );
#exit;

@entries = &fitali2pm ( @entries );

$header = "This col file was generated by the program make_groups.";

&write_col ( @entries );

# >>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<

sub fitali2pm
{
    # Ebbe Sloth Andersen, 2006.

    my ( @entries ) = @_;

    # Sorted entries.

    my ( $entry,
         @align_bp,
         @residue,
         @align_bp2,
         $residue,
         $i,
         $a,
	 );

    $a = 1;
    foreach $entry ( @entries ) {
        if ( $entry->{TYPE} eq "pairingmask" ) {
            $entry->{ENTRY} = "pairingmask_$a";
            @align_bp = ( );
            @align_bp = split(/,/, $entry->{'align_bp'});
            $a++;
        } else {
            @residue = ( );
            @residue = split(/,/, $entry->{'residue'});
            @align_bp2 = ( );
            @align_bp2 = @align_bp;
            $i = 0;
            foreach $residue ( @residue ) {
                $residue = uc($residue);
            }
            foreach $residue ( @residue ) {
                if ( $align_bp[$i] eq "." ) {
                    $residue = lc($residue);
                } else {
                    if ( $residue eq "A" && $residue[$align_bp[$i]-1] eq "U" ) {
                    } elsif ( $residue eq "U" && $residue[$align_bp[$i]-1] eq "A" ) { 
                    } elsif ( $residue eq "G" && $residue[$align_bp[$i]-1] eq "C" ) { 
                    } elsif ( $residue eq "C" && $residue[$align_bp[$i]-1] eq "G" ) { 
                    } elsif ( $residue eq "G" && $residue[$align_bp[$i]-1] eq "U" ) { 
                    } elsif ( $residue eq "U" && $residue[$align_bp[$i]-1] eq "G" ) { 
                    } else {
                        $residue = lc($residue);
                        $align_bp2[$i] = ".";                 
                    }                   
                }     
                $i++;            
            }
            $entry->{residue} = join ( ",", @residue );
            $entry->{align_bp} = join ( ",", @align_bp2 );
        }
    }
    #print Dumper ( @entries );
    #exit;

    return @entries;
}

sub write_col
{
    # WRITE_COL_v2: outputs column file.
    # Ebbe Sloth Andersen, April 2005.

    my ( @entries ) = @_;

    # Returns nothing.

    my ( $entry,
	 $n,
	 $label,
	 @label,       # general variables 
	 @residue,
	 @seqpos,
	 @alignpos,
	 @align_bp,
	 @certainty,
	 @color_r, 
	 @color_g, 
	 @color_b, 
	 @number,      # special TREE variables 
	 @name,
	 @uplen,
	 @child,
	 @brother,
	 );

    # print header
    print "; $header \n";
    print "; ========== \n";
    
    # write sequences and masks
    foreach $entry ( @entries )
    { 
	# print header
	print "; TYPE\t$entry->{'TYPE'}\n";
	$n = 1;
	if ( defined $entry->{'label'} )     { print "; COL $n\tlabel\n"; $n++; }
	if ( defined $entry->{'residue'} )   { print "; COL $n\tresidue\n"; $n++; }
	if ( defined $entry->{'seqpos'} )    { print "; COL $n\tseqpos\n"; $n++; }
	if ( defined $entry->{'alignpos'} )  { print "; COL $n\talignpos\n"; $n++; }
	if ( defined $entry->{'align_bp'} )  { print "; COL $n\talign_bp\n"; $n++; }
	if ( defined $entry->{'certainty'} ) { print "; COL $n\tcertainty\n"; $n++; }
	if ( defined $entry->{'color_r'} )   { print "; COL $n\tcolor_r\n"; $n++; }
	if ( defined $entry->{'color_g'} )   { print "; COL $n\tcolor_g\n"; $n++; }
	if ( defined $entry->{'color_b'} )   { print "; COL $n\tcolor_b\n"; $n++; }
	if ( defined $entry->{'number'} )    { print "; COL $n\tnumber\n"; $n++; }
	if ( defined $entry->{'name'} )      { print "; COL $n\tname\n"; $n++; }
	if ( defined $entry->{'uplen'} )     { print "; COL $n\tuplen\n"; $n++; }
	if ( defined $entry->{'child'} )     { print "; COL $n\tchild\n"; $n++; }
    	if ( defined $entry->{'brother'} )   { print "; COL $n\tbrother\n"; $n++; }
	print "; ENTRY\t" . substr $entry->{'ENTRY'}, 0, 30; print "\n";
	if ( $entry->{'TYPE'} eq "TREE" ) { print "; root\t1\n"; }
	print "; ----------\n";
	
	# empty arrays
	@label = ( );
	@residue = ( );
	@seqpos = ( );
	@alignpos = ( );
	@align_bp = ( );
	@certainty = ( );
	@color_r = ( );
	@color_g = ( );
	@color_b = ( );
	@number = ( );
	@name = ( );
	@uplen = ( );
	@child = ( );
	@brother = ( );

        # split hashes
	if ( defined $entry->{'label'} )     { @label = split(/,/, $entry->{'label'}); }
	if ( defined $entry->{'residue'} )   { @residue = split(/,/, $entry->{'residue'}); }
	if ( defined $entry->{'seqpos'} )    { @seqpos = split(/,/, $entry->{'seqpos'}); }
	if ( defined $entry->{'alignpos'} )  { @alignpos = split(/,/, $entry->{'alignpos'}); }
	if ( defined $entry->{'align_bp'} )  { @align_bp = split(/,/, $entry->{'align_bp'}); }
	if ( defined $entry->{'certainty'} ) { @certainty = split(/,/, $entry->{'certainty'}); }	 
	if ( defined $entry->{'color_r'} )   { @color_r = split(/,/, $entry->{'color_r'}); }
	if ( defined $entry->{'color_g'} )   { @color_g = split(/,/, $entry->{'color_g'}); }
	if ( defined $entry->{'color_b'} )   { @color_b = split(/,/, $entry->{'color_b'}); }
	if ( defined $entry->{'number'} )    { @number = split(/,/, $entry->{'number'}); }
	if ( defined $entry->{'name'} )      { @name = split(/,/, $entry->{'name'}); }
	if ( defined $entry->{'uplen'} )     { @uplen = split(/,/, $entry->{'uplen'}); }
	if ( defined $entry->{'child'} )     { @child = split(/,/, $entry->{'child'}); }
	if ( defined $entry->{'brother'} )   { @brother = split(/,/, $entry->{'brother'}); }
	
        # print sequence
	$n = 0;    
	foreach $label ( @label ) 
	{
	    if ( defined $entry->{'label'} )     { print "$label\t"; }
	    if ( defined $entry->{'residue'} )   { print "$residue[$n]\t"; }
	    if ( defined $entry->{'seqpos'} )    { print "$seqpos[$n]\t"; }
	    if ( defined $entry->{'alignpos'} )  { print "$alignpos[$n]\t"; }
	    if ( defined $entry->{'align_bp'} )  { print "$align_bp[$n]\t"; }
	    if ( defined $entry->{'certainty'} ) { print "$certainty[$n]\t"; }
	    if ( defined $entry->{'color_r'} )   { print "$color_r[$n]\t"; }
	    if ( defined $entry->{'color_g'} )   { print "$color_g[$n]\t"; }
	    if ( defined $entry->{'color_b'} )   { print "$color_b[$n]\t"; }
	    if ( defined $entry->{'number'} )    { print "$number[$n]\t"; }
	    if ( defined $entry->{'name'} )      { print "$name[$n]\t"; }
	    if ( defined $entry->{'uplen'} )     { print "$uplen[$n]\t"; }
	    if ( defined $entry->{'child'} )     { print "$child[$n]\t"; }
	    if ( defined $entry->{'brother'} )   { print "$brother[$n]\t"; }
	    print "\n";
	    $n++;
	}
	
        # print footer
	print "; **********\n";
    }
}

sub read_col
{
    # READ_COL v1.1: Reads column format into data structure with an array of hashes.
    # Reads any number of columns with any kind of data.
   
    my ( $fname ) = @_;

    # Returns an array of hashes. The key corresponds to the header.
    # The value contains body data points seperated by comma.

    my ( $line,        # input lines
	 @entries,     # the output array of hashes
	 @cols, 
	 $TYPE,        
	 @COL,         # any number of columns
	 $ENTRY, 
	 @data, 
	 $i,           # counting numbers
	 $length,
	 $counter );

    # initialize
    $counter = 0;	    
    @data = ( );
    @entries = ( );
    @COL = ( );

    if ( not open FILE, "< $fname" ) {
	die "file not found!";
    }

    while ( $line = <FILE> )
    {
	if ( $line =~ /^; \*+/ )            # finds footer
	{
	    push @entries, 
	    {
		"TYPE" => $TYPE,
		"ENTRY" => $ENTRY,
	    };
	    if ( $TYPE eq "TREE" ) {         # this must be done
		shift ( @data );             # because of wierd
		$length = 6;                 # spacing in TREE file
	    }
	    for ( $i=0; $i<$length; $i++ ) {
		map { $entries[$counter]->{$COL[$i]} = $data[$i] } @entries;
	    }
	    $counter = $counter + 1;
	    @data = ( );
	    @COL = ( );
	}
	elsif ( $line =~ /^;/ )            # reads header
	{
	    if ( $line =~ /^; TYPE\s+(\S+)/ ) {
		$TYPE = $1;
	    } elsif ( $line =~ /^; COL\s+\S+\s+(\S+)/ ) {
		push ( @COL, $1 );
	    } elsif ( $line =~ /^; ENTRY\s+(\S+)/ ) { 
		$ENTRY = $1;
	    }
	}
	else                               # reads body
	{
	    @cols = split(/\s+/, $line);
	    $length = scalar @cols;
	    
	    # add sequences on an array

	    for ( $i=0; $i<$length; $i++ ) {
		$data[$i] .= "$cols[$i],";
	    }
	}   
    } 
    close FILE;
    return wantarray ? @entries : \@entries;
}
