package EST2ncRNA::SequenceInterface;

use strict;
use warnings;

require Exporter;

our @ISA = qw(Exporter);

# Items to export into callers namespace by default. Note: do not export
# names by default without a very good reason. Use EXPORT_OK instead.
# Do not simply export all your public functions/methods/constants.

our @EXPORT_OK = qw(get_subject_subsequences_of_one_chromosome
		    get_subsequence
		    );

our @EXPORT = qw();

our $VERSION = '0.01';


# get subsequences of a list of queries in a fasta file of one chromosome with several sequence entries
# start counting with '1' (blastn format)
sub get_subject_subsequences_of_one_chromosome {
    my $fastafile = shift;
    my $queries_ary_ref = shift;

    my $seq = "";
    my (%hits, $finish, $nn, $s, $tmp, $line, @line, $newline, $char, $chromass, %queries_hash, $n);

    foreach $s ( @$queries_ary_ref ) {
        # test type of strand
        if( $$s[3]>$$s[4] ) {
            $tmp = $$s[3];
            $$s[3] = $$s[4];
            $$s[4] = $tmp;
            push @$s, "-";
        }
        else {
            push @$s, "+";
        }
    }

    # order subjects by startindex
    my @sortqueries = sort { $$a[3] <=> $$b[3] } @$queries_ary_ref;

    foreach $s ( @sortqueries ) {
        push @{$queries_hash{$$s[2]}}, [$$s[0],$$s[1],$$s[3],$$s[4],$$s[5]];
    }

    #map {print "$_\t";map{map{print "$_\t"}@$_}@{$queries_hash{$_}};print "\n";}keys %queries_hash;

    # run one time through the genome file
    open IN, "zcat $fastafile |" || die("Can not open the file!\n");
    while( <IN> ) {
        chomp $_;
        $line = $_;

        if( $line =~ /^>(\S+)/ ) {
            $chromass = $1;
            $n = 0;
            next;
        }
        # search the first entries of the query array until the start index is lower as the end of line
        foreach $s ( @{$queries_hash{$chromass}} ) {
            $nn = $n;
            $seq = "";
            $finish = 0;
            if($n+length($line)<$$s[2]) {
                last;
            }
            elsif($n>$$s[3]) {
                next;
            }
	    # performance increase
	    elsif($$s[2]<=$n && $$s[3]>=$n+length($line)) {
		$seq = $seq.$line;
		# store hits in a hash
                if( defined $hits{"$$s[0]|$$s[1]"} ) {
                    $hits{"$$s[0]|$$s[1]"} = $hits{"$$s[0]|$$s[1]"}.$seq;
                }
                else  {
                    $hits{"$$s[0]|$$s[1]"} = $seq;
                } 
	    }
            else {
                @line=split //,$line;
                #$newline = pop @line;
                #push @line, $newline if $newline ne "\n";
                foreach $char (@line) {
                    $nn++;
                    if($nn>=$$s[2] && $nn<=$$s[3]) {
                        $seq = $seq.$char;
                    }
                    elsif( $nn>$$s[3] ) {
                        $finish = 1;
                        last;
                    }
                }
                # store hits in a hash
                if( defined $hits{"$$s[0]|$$s[1]"} ) {
                    $hits{"$$s[0]|$$s[1]"} = $hits{"$$s[0]|$$s[1]"}.$seq;
                }
                else  {
                    $hits{"$$s[0]|$$s[1]"} = $seq;
                }
		# if sequence is finished then create reverse complement for '-' strand
                if( $finish && $$s[4] eq "-" ) {
                    $hits{"$$s[0]|$$s[1]"} = reverse $hits{"$$s[0]|$$s[1]"};
                    $hits{"$$s[0]|$$s[1]"} =~ tr/ATCGatcg/TAGCtagc/;
                }
            }
        }

        $n+=length($line);
    }
    close IN;

    return \%hits;
}


sub get_subsequence {
    my ($seq, $start, $end) = @_;
    my (@line, $char);
    my $n = 0;
    my $subseq = "";

    @line=split //, $seq;
    foreach $char (@line) {
        $n++;
        if($n>=$start && $n<=$end) {
            $subseq = $subseq.$char;
        }
    }

    return $subseq;
}


1;
__END__
# Below is stub documentation for your module. You'd better edit it!

=head1 NAME

EST2ncRNA::SequenceInterface - Perl extension for blah blah blah

=head1 SYNOPSIS

  use EST2ncRNA::SequenceInterface;
  blah blah blah

=head1 DESCRIPTION

Stub documentation for EST2ncRNA::SequenceInterface, created by h2xs. It looks like the
author of the extension was negligent enough to leave the stub
unedited.

Blah blah blah.

=head2 EXPORT

None by default.



=head1 SEE ALSO

Mention other useful documentation such as the documentation of
related modules or operating system documentation (such as man pages
in UNIX), or any relevant external documentation such as RFCs or
standards.

If you have a mailing list set up for your module, mention it here.

If you have a web site set up for your module, mention it here.

=head1 AUTHOR

Stefan Seemann, E<lt>seemann@bioinf.uni-leipzig.deE<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2006 by Stefan Seemann

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.6 or,
at your option, any later version of Perl 5 you may have available.


=cut
