#!/usr/bin/perl -sw

my $length = 4;
if (defined $l) {$length = $l;}
$l="";

my $max_len = 30;
if (defined $m) {$max_len = $m;}
$m="";

my $min_dist = 3;
if (defined $d) {$min_dist = $d;}
$d="";

use strict;

my $count = 0;
my $scount = 0;
while (<>) {

	my ($name1, $seq1, $name2, $seq2) = split(/\s+/, $_, 5);
	
	my %pos1;
	
	$count += findStems(uc $seq1, \%pos1, $length, $max_len, $min_dist);
	
	my %pos2;
	
	$count *= findStems(uc $seq2, \%pos2, $length, $max_len, $min_dist);

	print "# $name1 $name2 $length $max_len $min_dist\n";
	
	foreach my $b1 (sort num keys %pos1) {
		foreach my $b2 (sort num keys %pos2) {
			foreach my $e1 (@{$pos1{$b1}}) {
				foreach my $e2 (@{$pos2{$b2}}) {
					print "$b1 $e1 $b2 $e2\n";
				}
			}
		}
	}
	
	
#	compareStemAnno($name, $anno, \%pos);
	$scount++;
}
#printf "$length\t%1.2f\t$count\n", $count/$scount;
warn "Number of constraints: $count\n";

sub findStems {

	my %reg = (
		'A' => 'U',
		'C' => 'G',
		'G' => '[CU]',
		'U' => '[AG]',
		'N' => 'Q',
		'X' => 'Q',
	);

	my ($seq, $pos, $len, $max_len, $min_dist) = @_;
	
	my $hcount = 0;
	my $length = length($seq);

	for(my $i = $length - $len - $min_dist; $i >= 0; $i--) {
	
		# Build the regular expression
		my $reg = "";
		for(my $p = 0; $p < $len; $p++) {
			$reg = $reg{substr($seq, $i+$p, 1)} . $reg;
		}
		
		# Extract the subsequence in which there can be a match
		my $start = $i + $len + $min_dist;
		my $sublen = $max_len + $len - $min_dist;
		my $subseq = substr($seq, $start, $sublen);
		
		my @end;
		
		my $begin = $i + $len; # -1 to make the lengths fit, +1 to change to start
		                       # at position one (not zero).
		while ($subseq =~ /($reg)/g) {
			my $val =$start + pos($subseq) - $len +1; # +1 to from base zero to one
			
			my $skip = 0;
			for(my $p = $begin+$len; $p < $val - $len -$min_dist -1; $p++) {
				if (defined $$pos{$p} and $$pos{$p} < $val - $len) {$skip=1; next;}
			}
			if ($skip) {next;}
			
			push(@end, $val);
			$hcount++;
		}
		
		if ($#end >= 0) {$$pos{$begin} = \@end;}
		
	}
	
	return $hcount;
}

sub compareStemAnno {

	my ($name, $anno, $pos) = @_;
	
	my $hit = 0;
	my $miss = 0;
	while ($anno =~ /([^-]+)/g) {
	
		my $start = pos($anno) - length($1);
		my $end = pos($anno);
		
		my $found = 0;
		for(my $i = $start; $i < $end and $found == 0; $i++) {
		
			if (defined $$pos{$i}) {
				foreach my $e (@{$$pos{$i}}) {
					if ($e < $end) {$found = 1; last;}
				}
			}
		}

		if ($found) {$hit++;}
		else {$miss++;}
		
	}
	printf "%-30s\t$hit\t$miss\n", $name;
}

sub num {$a <=> $b;}

