#!/usr/bin/env perl   

#  -*- perl -*-

use strict;
use warnings FATAL => qw ( all );
use Data::Dumper;
use Formats;
use Errors;

# SEPARATE
#
# separate1 output.col region.col -r1-10,40-50 > left.col
#
# region.col has the aligned sequences
# output.col has the unaligned sequences
#
# Ebbe Sloth Andersen, 2007.

# >>>>>>>>>>>>>>>>>> RUN PROGRAM <<<<<<<<<<<<<<<<<<<<

my ( $entries,
     $entries1, 
     $entries2, 
     $file1, 
     $file2,
     $header,
     $regions,
     );

( $file1, $file2, $regions ) = @ARGV;
$entries1 = &Formats::read_col ( $file1 );
$entries2 = &Formats::read_col ( $file2 );
#&Errors::check_col ( $entries1, { "TYPE" => ["RNA","pairingmask"],
#						         "COL"  => ["certainty","align_bp"] } );
#&Errors::check_col ( $entries2, { "TYPE" => ["RNA","pairingmask"],
#						         "COL"  => ["certainty","align_bp"] } );
$header .= "This file was made by separate1";
$entries = &separate1 ( $entries1, $entries2 );
&Formats::write_col ( $header, $entries );

# >>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<<

sub separate1
{
    my ( $entries1, $entries2 ) = @_;
    
    my ( $entry,
         @residue,
         @residue1,
         @residue2,
         @result,
         @result2,
         $residue,
         $i,
         $j,
         $k,
         @region,
         @region2,
         $region,
         $length,
         $result,
         $a,
         @pm,
         $f1,
         $t1,
         $f2,
         $t2,
        );
    
    if ( defined $regions ) {
        if ( $regions =~ /^\-r(\d+)\-(\d+)\,(\d+)\-(\d+)/ ) {
            $f1 = $1;
            $t1 = $2;
            $f2 = $3;
            $t2 = $4;
        } elsif ( $regions =~ /^\-r(\d+)\-(\d+)$/ ) {
            $f1 = 0;
            $t1 = 0;
            $f2 = $1;
            $t2 = $2;
        }
    }

    $i = 0;
    @result = ( );
    @pm = shift @{ $entries1 };
    foreach $entry ( @{ $entries2 } ) {
        @residue1 = ( );
        @residue2 = ( );
        @residue1 = split ( /,/, $entries1->[$i]->{'residue'} );          # foldalign
        @residue2 = split ( /,/, $entries2->[$i]->{'residue'} );          # input 
        @region = ( );
        @region2 = ( );
        
	# get region
	for ( $j=0; $j<=($t1-$f1); $j++ ) {
	    push @region, $residue2[$j];
	}
        
	# remove gaps
	foreach $region ( @region ) {
	    if ( $region ne "-" ) {
		push @region2, $region;
	    }
	}

	# get foldalign	
        $length = scalar @region2;
	for ( $j=0; $j<$length; $j++ ) {
            $result[$i] .= "$residue1[$j],";
            if ( $residue1[$j] eq "-" ) { $length++; }
	}
        $i++;
    }

    # run through all results to find the longest sequence
    $length = 0;
    foreach $result ( @result ) {
        if ( length $result > $length ) {
            $length = length $result;
        }
    }

    # now make all results the same length
    foreach $result ( @result ) {
        for ( $i=0;$i<$length;$i++ ) {
            if ( length $result < $length ) {
                $result .= "-,";       
            } 
        }
    }

    # now make pairingmask the same length
    @residue1 = ( );
    @residue1 = split ( /,/, $pm[0]->{'residue'} );

    $pm[0]->{'residue'} = "";
    for ( $i=0;$i<$length/2;$i++ ) {
        $pm[0]->{'residue'} .= "$residue1[$i],";
    }

    unshift @{ $entries1 }, $pm[0];

    # now insert in entries
    $i = 0;
    foreach $result ( @result ) {
        $i++;
        $entries1->[$i]->{'residue'} = $result;
    }

    return $entries1;
}
