#!/usr/bin/env perl
#
# extracts a sequence-constraint-pair with specified flanking region sizes
# from a full length sequence-constraint-pair
#
#    Copyright (C) 2015  Nikolai Hecker
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
# 
############################################################################
use strict;
use Carp;

my $usage = "Usage:\n\t$0 [CONSTRAINT-FILE] [LEFT] [RIGHT]\n";


if(@ARGV < 3)
{
    print $usage;
    croak "Too few arguments.\n";
}

my $cf=$ARGV[0];
my $left=$ARGV[1];
my $right=$ARGV[2];

my $fh;

##read file
open($fh, "<$cf") || croak "Can't open '$cf'.\n";
my $head = readline($fh);
chomp($head);
my $seq = readline($fh);
chomp($seq);
my $cons = readline($fh);
chomp($cons);
close($fh);

#find first and last base pair
my $firstbp = -1;
my $lastbp = -1;

my %h_constrchar = (
    'x' => 1,
    '(' => 1,
    ')' => 1,
    '|' => 1,
    '<' => 1,
    '>' => 1,
    );

for(my $i=0; $i < length($cons); $i++)
{
    my $c = substr($cons, $i, 1);
    if( $firstbp == -1 && defined($h_constrchar{$c}) )
    {
	$firstbp = $i;
    }
    if(defined($h_constrchar{$c}))
    {
	$lastbp = $i;
    }    
}


my $s = $firstbp - $left;
my $e = $lastbp + $right;
my $len = $e-$s+1;

if($s < 0 || $e > length($cons)-1 || $len == 0)
{
    croak "Error! Something is wrong is those flanks: start=$s, end=$e, length=$len\n";
}

my $nseq = substr($seq, $s, $len);
my $ncons = substr($cons, $s, $len);

print "$head\n";
print "$nseq\n";
print "$ncons\n";
