#! /usr/bin/perl -w

# Zizhen Yao

# CVS $Id: CombMotif.pl,v 3.2 2008/03/21 22:45:06 yzizhen Exp $

use Class::Struct;
$path= $ENV{CMfinder};
do "$path/io.pl";

$MAX_GAP = 100;
struct 'MergeMotif' => {motif1=> '$', motif2=>'$', num_seq=>'$', score=>'$', gap=>'$', 
			overlap=>'$', weight => '$', proc => '$'};

sub my_strcmp
{
    ($s1, $s2)= @_;
    @t1 = split /\./, $s1;
    @t2 = split /\./, $s2;
    $prefix = "";    
    for( $i = 0; $i < scalar @t1 && $i < scalar @t2; $i++) {
	last if ($t1[$i] ne $t2[$i]);	
	if ( $prefix eq "") {
	    $prefix =$t1[$i];	
	}
	else{
	    $prefix ="$prefix.".$t1[$i];	
	}
    }
    $suffix1 = join ".", @t1[$i..($#t1)];
    $suffix2 = join ".", @t2[$i..($#t2)];    
    return ($prefix, $suffix1, $suffix2);
}	  

sub match_file{  
    my @files = ();
    $f = shift @_;
    while(<$f*>) {
	push @files, $_;
    }
    return @files;
}


$seq_file = shift @ARGV;
@files = @ARGV;
@align_files = ();
@alignments = ();
foreach $f (@files) {
    push @align_files, match_file($f);
}

print join " ", @align_files, "\n";
@all_files = @align_files;

%merged_files = ();
foreach $f (@align_files) {
    $merged_files{$f} = 0;
    $align = read_stockholm($f);  
    $alignments{$f}= $align;
}

%merge_motif=();

while(1) {    
    while(scalar @align_files > 0) 
    {    
	$f1 = shift @align_files;    
	for($j = 0; $j < scalar @align_files; $j++) {
	    $f2 = $align_files[$j];
	    $index = join ".", my_strcmp($f1, $f2);	    
	    next if (exists $merge_motif{$index});	   
	    #print $f1, "\t", $f2, "\t", $index, "\n";
	    $num_overlap = 0;
	    $start1 = 0;
	    $start2 = 0;
	    $start1_score=0;
	    $start2_score=0;
	    $gap1=0;
	    $gap2=0;
	    $overlap1 = 0;
	    $overlap2 = 0;
	    
	    #detect overlap
	    $align1 = $alignments{$f1}->seqs;
	    $align2 = $alignments{$f2}->seqs;
	    foreach $id (keys %$align1) {		
		next if (!exists $align2->{$id});
		$motif1 = $align1->{$id};
		$motif2 = $align2->{$id};
		$len1 = abs($motif1->end - $motif1->start);
		$len2 = abs($motif2->end - $motif2->start);
		if ($motif1->start > $motif2->start) {
		    if ($motif1->end < $motif2->end) { #overlap 
			$num_overlap += $motif1->weight * $motif2->weight;
		    }
		    else{ 
			$overlap = $motif2->end - $motif1->start;
			if(($len1 - $overlap < 25 || $len2 - $overlap < 25) &&
			   ($overlap > 0.85 * $len1 || $overlap > 0.85 * $len2)){
			    $num_overlap+=  $motif1->weight * $motif2->weight;
			}
			else{
			    $start2 +=  $motif1->weight * $motif2->weight;
			    $start2_score += $motif1->score * $motif1->weight + $motif2->score* $motif2->weight;
			    if ($overlap > 0){
				$overlap2 += $overlap *  $motif1->weight * $motif2->weight;
			    }
			    else{
				$gap2 += - $overlap * $motif1->weight * $motif2->weight;
			    }
			}
		    }		    
		}
		else{
		    if ($motif1->end < $motif2->end) { 
			$overlap = $motif1->end - $motif2->start;
			if(($len1 - $overlap < 25 || $len2 - $overlap < 25) &&
			   ($overlap > 0.85 * $len1 || $overlap > 0.85 * $len2)){
			    $num_overlap+=  $motif1->weight * $motif2->weight;
			}
			else{
			    $start1+=  ($motif1->weight * $motif2->weight);
			    $start1_score += $motif1->score * $motif1->weight + $motif2->score * $motif2->weight;
			    if ($overlap > 0){
				$overlap1 += $overlap *  $motif1->weight * $motif2->weight;
			    }
			    else{
				$gap1 += - $overlap *  $motif1->weight * $motif2->weight;
			    }
			}
		    }
		    else{ #overlap 
			$num_overlap +=  $motif1->weight * $motif2->weight;
		    }
		}		
	    }

	    if ($num_overlap > $start1 && $num_overlap > $start2) {
		print "$f1 $f2 Overlap\n";
		next;
	    }
	    if ($start1_score > $start2_score) {
		next if $start1 < 3;
		#motif1 is before motif2
		$merge_motif{$index} = 
		    MergeMotif->new(motif1 => $f1, 
				    motif2 => $f2, 
				    num_seq=> $start1, 
				    score => $start1_score, 
				    gap => $gap1/$start1, 
				    overlap=>$overlap1/$start1, 
				    weight=> $start1_score - $gap1/2 - $overlap1, 
				    proc=>0);
	    }
	    else{
		next if $start2 < 3;
		#motif2 is before motif1
		$merge_motif{$index} = 
		    MergeMotif->new(motif1 => $f2, 
				    motif2 => $f1, 
				    num_seq => $start2,
				    score => $start2_score, 
				    gap => $gap2/$start2, 
				    overlap=>$overlap2/$start2,
				    weight=> $start2_score - $gap2/2 - $overlap2, 
				    proc=>0);	    
	    }		  
	}
    }
           
    @new_files = ();
    foreach $id (reverse sort {$merge_motif{$a}->weight <=> $merge_motif{$b}->weight } keys %merge_motif) {
	next if ($merge_motif{$id}->proc);
	$f1 = $merge_motif{$id}->motif1;
	$f2 = $merge_motif{$id}->motif2;	

	#print "$id, ";	
	#print $merge_motif{$id}->num_seq;
	#print "\t$f1\t$f2";
	#print "\t";
	#print $merge_motif{$id}->weight;
	#print "\t";
	#print $merge_motif{$id}->gap;
	#print "\t";
	#print $merge_motif{$id}->overlap;	
	#print "\n";
       
	$merge_motif{$id}->proc(1);
	if ($merge_motif{$id}->weight > 0) {

	    next if ($merged_files{$f1} || $merged_files{$f2});
	    $merged_files{$f1} = 1;
	    $merged_files{$f2} = 1;	
	    next if ($merge_motif{$id}->gap > $MAX_GAP);	      	    
	    $f = $id;
	    $found = 0;
	    foreach $tmp (@all_files){
		if ($tmp eq $f){
		    $found = 1;
		    last;
		}
	    }
	    #print "$f found $found\n";
	    next if ($found);
	    print  "Merge $f1 $f2 > $f\n";
	    `$path/merge_motif.pl $seq_file $f1 $f2 $f.temp`;				
	    $cm_file = $f;
	    $cm_file =~ s/\.motif/\.cm/;
	    `$path/cmfinder --g 0.7 -o $f -a $f.temp $seq_file $cm_file`;
	    `rm -f $f.temp`;
	    $merge_align = read_stockholm($f);  
	    if ( $merge_align->score < $alignments{$f1}->score ||
	    	 $merge_align->score < $alignments{$f2}->score){
	    	`rm -f $f`;
	    }
	    else{
		push @new_files, $f;
		$merged_files{$f} = 0;
		$align = read_stockholm($f);  
		$alignments{$f}= $align;
	    }		
	}
    }
    last if (scalar @new_files == 0);
    @align_files = ();
    foreach $f (keys %merged_files) {
	if ($merged_files{$f}==0) {
	    push @align_files, $f;
	}
    }
    #print (join " ", @align_files);
    #print "\n";
}
    


