#!/usr/bin/env perl   

#  -*- perl -*-

use strict;
use warnings FATAL => qw ( all );
use Data::Dumper;

# FASTA2TXT
# Ebbe Sloth Andersen, September 2005.

# >>>>>>>>>>>>>>>>>> RUN PROGRAM <<<<<<<<<<<<<<<<<<<<

my ( @entries, $file, $line, $header );

( $file ) = @ARGV;

@entries = &read_fasta ( $file );

#print Dumper ( @entries );
#exit;

$header = "; generated by fasta2col2";

&write_col ( @entries );

# >>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<

sub write_col
{
    # WRITE_COL_v2: outputs column file.
    # Ebbe Sloth Andersen, April 2005.

    my ( @entries ) = @_;

    # Returns nothing.

    my ( $entry,
	 $n,
	 $label,
	 @label,       # general variables 
	 @residue,
         $residue,
	 @seqpos,
	 @alignpos,
	 @align_bp,
	 @certainty,
	 @color_r, 
	 @color_g, 
	 @color_b, 
	 @number,      # special TREE variables 
	 @name,
	 @uplen,
	 @child,
	 @brother,
	 );

    # print header
    print "; $header \n";
    print "; ========== \n";
    
    # write sequences and masks
    foreach $entry ( @entries )
    { 
	# print header
	print "; TYPE\t$entry->{'TYPE'}\n";
	$n = 1;
	if ( defined $entry->{'label'} )     { print "; COL $n\tlabel\n"; $n++; }
	if ( defined $entry->{'residue'} )   { print "; COL $n\tresidue\n"; $n++; }
	if ( defined $entry->{'seqpos'} )    { print "; COL $n\tseqpos\n"; $n++; }
	if ( defined $entry->{'alignpos'} )  { print "; COL $n\talignpos\n"; $n++; }
	if ( defined $entry->{'align_bp'} )  { print "; COL $n\talign_bp\n"; $n++; }
	if ( defined $entry->{'certainty'} ) { print "; COL $n\tcertainty\n"; $n++; }
	if ( defined $entry->{'color_r'} )   { print "; COL $n\tcolor_r\n"; $n++; }
	if ( defined $entry->{'color_g'} )   { print "; COL $n\tcolor_g\n"; $n++; }
	if ( defined $entry->{'color_b'} )   { print "; COL $n\tcolor_b\n"; $n++; }
	if ( defined $entry->{'number'} )    { print "; COL $n\tnumber\n"; $n++; }
	if ( defined $entry->{'name'} )      { print "; COL $n\tname\n"; $n++; }
	if ( defined $entry->{'uplen'} )     { print "; COL $n\tuplen\n"; $n++; }
	if ( defined $entry->{'child'} )     { print "; COL $n\tchild\n"; $n++; }
    	if ( defined $entry->{'brother'} )   { print "; COL $n\tbrother\n"; $n++; }
	print "; ENTRY\t" . substr $entry->{'ENTRY'}, 0, 30; print "\n";
	if ( $entry->{'TYPE'} eq "TREE" ) { print "; root\t1\n"; }
	print "; ----------\n";
	
	# empty arrays
	@label = ( );
	@residue = ( );
	@seqpos = ( );
	@alignpos = ( );
	@align_bp = ( );
	@certainty = ( );
	@color_r = ( );
	@color_g = ( );
	@color_b = ( );
	@number = ( );
	@name = ( );
	@uplen = ( );
	@child = ( );
	@brother = ( );

        # split hashes
	if ( defined $entry->{'label'} )     { @label = split(/,/, $entry->{'label'}); }
	if ( defined $entry->{'residue'} )   { @residue = split(/,/, $entry->{'residue'}); }
	if ( defined $entry->{'seqpos'} )    { @seqpos = split(/,/, $entry->{'seqpos'}); }
	if ( defined $entry->{'alignpos'} )  { @alignpos = split(/,/, $entry->{'alignpos'}); }
	if ( defined $entry->{'align_bp'} )  { @align_bp = split(/,/, $entry->{'align_bp'}); }
	if ( defined $entry->{'certainty'} ) { @certainty = split(/,/, $entry->{'certainty'}); }	 
	if ( defined $entry->{'color_r'} )   { @color_r = split(/,/, $entry->{'color_r'}); }
	if ( defined $entry->{'color_g'} )   { @color_g = split(/,/, $entry->{'color_g'}); }
	if ( defined $entry->{'color_b'} )   { @color_b = split(/,/, $entry->{'color_b'}); }
	if ( defined $entry->{'number'} )    { @number = split(/,/, $entry->{'number'}); }
	if ( defined $entry->{'name'} )      { @name = split(/,/, $entry->{'name'}); }
	if ( defined $entry->{'uplen'} )     { @uplen = split(/,/, $entry->{'uplen'}); }
	if ( defined $entry->{'child'} )     { @child = split(/,/, $entry->{'child'}); }
	if ( defined $entry->{'brother'} )   { @brother = split(/,/, $entry->{'brother'}); }
	
        # print sequence
	$n = 0;    
	foreach $residue ( @residue ) 
	{
	    if ( defined $entry->{'label'} )     { print "$label[$n]\t"; }
	    if ( defined $entry->{'residue'} )   { print "$residue[$n]\t"; }
	    if ( defined $entry->{'seqpos'} )    { print "$seqpos[$n]\t"; }
	    if ( defined $entry->{'alignpos'} )  { print "$alignpos[$n]\t"; }
	    if ( defined $entry->{'align_bp'} )  { print "$align_bp[$n]\t"; }
	    if ( defined $entry->{'certainty'} ) { print "$certainty[$n]\t"; }
	    if ( defined $entry->{'color_r'} )   { print "$color_r[$n]\t"; }
	    if ( defined $entry->{'color_g'} )   { print "$color_g[$n]\t"; }
	    if ( defined $entry->{'color_b'} )   { print "$color_b[$n]\t"; }
	    if ( defined $entry->{'number'} )    { print "$number[$n]\t"; }
	    if ( defined $entry->{'name'} )      { print "$name[$n]\t"; }
	    if ( defined $entry->{'uplen'} )     { print "$uplen[$n]\t"; }
	    if ( defined $entry->{'child'} )     { print "$child[$n]\t"; }
	    if ( defined $entry->{'brother'} )   { print "$brother[$n]\t"; }
	    print "\n";
	    $n++;
	}
	
        # print footer
	print "; **********\n";
    }
}

sub write_txt
{
    # WRITE_TXT v1.2: outputs txt file.
    # Ebbe Sloth Andersen, October 2005.

    my ( @entries ) = @_;

    # Returns nothing.

    my ( $entry,       # running through entries array
	 $TYPE,        # for translation
	 $ENTRY,
	 $residue,     # for split and join
	 @residue,
         $length,      # longest name
         @length,
         $former,
	 );

    # find length of longest name
    $former = 0;
    $length = 10;
    foreach $entry ( @entries ) {
        $length = length $entry->{'ENTRY'};
        if ( $length > $former ) {
            $former = $length;
        }
    }
    $length = $former + 2;

    # print it
    foreach $entry ( @entries ) {
	@residue = ( );
	$ENTRY = $entry->{'ENTRY'};
	if ( defined $entry->{residue} ) {
	    @residue = split ( /,/, $entry->{residue} );
	    $residue = join ( "", @residue );
	    printf "%-" . "$length" . "s ", $ENTRY;
	    print "$residue\n";
	}
    }
}

sub read_fasta
{
    # READ_FASTA v1.1
    # Reads fasta format into data structure with an array of hashes.
    # Ebbe Sloth Andersen, August 2005.
   
    my ( $fname ) = @_;

    # Returns an array of hashes. The key corresponds to the header.
    # The value contains body data points seperated by comma.

    my ( $line,        # input lines
	 @entries,     # the output array of hashes
	 @cols, 
	 $TYPE,        
	 @COL,         # any number of columns
	 $ENTRY, 
	 @data, 
	 $i,           # counting numbers
	 $length,
	 $counter,
	 @res,
	 $res,
	 $now,
	 $former,
	 $residue,
         $ENTRY_former,
	 );

    # initialize
    $counter = 0;	    
    @data = ( );
    @entries = ( );
    @COL = ( );
    $former = "";
    $ENTRY = "";
    $residue = "";

    if ( not open FILE, "< $fname" ) {
	die "file not found!";
    }

    while ( $line = <FILE> )
    {
	chomp $line;
	if ( $line =~ /^\>lcl\|(\S+)/ ) {            # finds name
	    $ENTRY = $1;
	    $i++;
	} elsif ( $line =~ /^\>(\S+)/ ) {  
	} else {                                # reads sequence
	    $residue .= $line;
	}
	if ( $i == 2 ) {
	    if ( $residue =~ m/\(/ ) {
		$TYPE = "arbitrary";
	    } else {
		$TYPE = "RNA";
	    }
	    @res = split ( //, $residue );
	    $residue = "";	    
	    foreach $res ( @res ) {
		$residue .= "$res,";
	    }
	    push @entries, 
	    {
		"TYPE" => $TYPE,
		"ENTRY" => $ENTRY_former,
		"residue" => $residue,
	    };
	    $i = 1;
	    $residue = "";
	}
        $ENTRY_former = $ENTRY;
    }

    if ( $residue =~ m/\(/ ) {            # then print the last one
        $TYPE = "arbitrary";
    } else {
        $TYPE = "RNA";
    }
    @res = split ( //, $residue );
    $residue = "";	    
    foreach $res ( @res ) {
	$residue .= "$res,";
    }
    push @entries, 
    {
	"TYPE" => $TYPE,
	"ENTRY" => $ENTRY,
	"residue" => $residue,
    };

    close FILE;
    return wantarray ? @entries : \@entries;
}
