#!/usr/bin/env perl   

#  -*- perl -*-

use strict;
use warnings;
use Data::Dumper;

# HOBOHM: Reduction of alignments
# Ebbe Sloth Andersen, November 2005.

# >>>>>>>>>>>>>>>>>> RUN PROGRAM <<<<<<<<<<<<<<<<<<<

my ( @entries, 
     $entry, 
     $count, 
     $length, 
     $i,
     $ali_file,
     $cutoff,
     );

( $ali_file ) = @ARGV;

@entries = &read_txt ( $ali_file );

$count = pop @entries;

#&identity ( @entries );

foreach $entry ( @entries ) {
	print ">$entry->{'id'}\n";
	print "$entry->{'seq'}\n";
}

#print Dumper ( @entries );
#exit;

# >>>>>>>>>>>>>>>>>> SUBROUTINES <<<<<<<<<<<<<<<<<<<<<<

sub identity
{
    # Ebbe Sloth Andersen, November 2005.

    my ( @entries ) = @_;

    my ( $entry,
         $entry2,
         $length,
         $i,
         $nt1,
         $nt2,
         @identity,
         $same,
         @matrix,
         $list,
         @list,
         );

    $length = length $entries[0]->{'seq'};

    foreach $entry ( @entries ) {
        foreach $entry2 ( @entries ) {
            for ( $i = 0; $i < $length; $i++ ) {
                $nt1 = substr ( $entry->{'seq'}, $i, 1 );
                $nt2 = substr ( $entry2->{'seq'}, $i, 1 );
                if ( $nt1 eq $nt2 ) {
                    $same++;
                }
            }
            if ( $same/$length > $cutoff ) {
                $list = "$entry->{'id'}  $entry2->{'id'}  " . $same/$length . "\n";            
                push @list, $list;
            }            
            $same = 0;
        }
    }

#    print Dumper (@list);
#    exit;

    # hobohm algorithm 2 perl version.
    # Jans Perl code. Modified by Ebbe.

    my %a;
    my %b;

    foreach $list ( @list ) {
        my @F=split(/\s+/,$list);
        if($F[0] ne $F[1] && ! defined $b{$F[0],$F[1]}) {
            $a{$F[0]} .= $F[1] . " ";
            $a{$F[1]} .= $F[0] . " ";
            $b{$F[0],$F[1]} = 1;
            $b{$F[1],$F[0]} = 1;
        }
    }
    my %c;
    foreach my $i ( sort keys %a) { 
        foreach my $j ( sort keys %a) {  
            if(defined $b{$i,$j}) { 
                $c{$i}++; 
            }
        }
    }
    @list = ( );
    A: while(1) {
        my $k="";
        $c{$k}=0;
        foreach my $i ( sort keys %c ) { if($c{$i}>$c{$k}) { $k=$i; } }
        if($k eq "") { last A; }
        my %d;
        map {$d{$_}=1} split(/\s+/,$a{$k});
        push @list, "$k";
        delete $c{$k};
        foreach my $l ( sort keys %d ) { $c{$l}--; }
    }

#    print Dumper ( @list );
#    exit;

    my $test;
	my $n;
	
	$n = 0;
	
    foreach $entry ( @entries ) {

# print only if it does not belong to lis

        $test = "yes";

        foreach $list ( @list ) {
            if ( $entry->{'id'} eq $list ) {
                $test = "no";
            }
        }

        if ( $test eq "yes" ) {
            printf "%-33s", $entry->{'id'};
            print "$entry->{'seq'}\n";
            $test = "yes";
			$n++;
        }	
    }
	print STDERR "$n\n";
    
    #return @entries;
}

sub read_txt
{
    my ( $fname ) = @_;

    my ( $line, $id, $type, $seq, @entries );

    if ( not open FILE, "< $fname" ) {
	die "file not found!";
    }
    
    while ( $line = <FILE> )
    {
        chomp $line;
	( $id, $seq ) = split ( /\s+/, $line );

	push @entries,
	{
	    "id" => $id,
	    "seq" => $seq,
	};
    }
    close FILE;

    return @entries;
}

sub write_txt
{
    # Ebbe Sloth Andersen, December 2004.

    my ( @entries ) = @_;

    my ( $entry,
         $space,
         );

    $space = " ";

    foreach $entry ( @entries ) {	
		printf "%-33s", $entry->{'id'};
        print "$entry->{'seq'}\n";
    }
}

