#!/usr/bin/gawk -f



#########################################################################
#  program al2seqbp / al2seqbp.awk
#
#  001019 Jan Gorodkin (gorodkin@bioinf.au.dk)
#
#  Copyright (C) 2000 Jan Gorodkin
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#  02111-1307, USA.
#
#########################################################################



BEGIN{
 headout=1;
 type="RNA";
 col_label=1;
 col_residue=2;
 col_seqpos=4;
 col_alignpos=3;
 col_align_bp=5;
 col=0;

 for(a=1;a<ARGC+1;a++)
 {
   if(ARGV[a]=="-help" || ARGV[a]=="-") { print "Usage:   al2seqbp <file>";  exit; }
 }

}


$1==";"&&headout==1{ print $0; }
substr($2,1,10)=="=========="{ headout=0; }

headout==0&&toupper($2)=="TYPE"{ dtype=toupper($3); entout=1; }
headout==0&&toupper($2)=="COL"{
    if(col<$3) col=$3; 
    if(toupper($4)=="LABEL") col_label=$3;
    else if(toupper($4)=="RESIDUE") col_residue=$3;
    else if(toupper($4)=="SEQPOS") col_seqpos=$3;
    else if(toupper($4)=="ALIGNPOS") col_alignpos=$3;
    else if(toupper($4)=="ALIGN_BP") col_align_bp=$3;
}
headout==0&&toupper($2)=="ENTRY"{
  if(dtype==type)
  {
    print "; COL "col+1"             seq_bp";
    parsedat=0;
  }
  else { parsedat=1; }
}


headout==0&&entout==0&&parsedat==1{ print $0; }


headout==0&&substr($2,1,10)=="----------"{
  entout=0;
  k=1;
  print $0;
}


headout==0&&entout==1{ print $0; }


headout==0&&entout==0&&parsedat==0&&$col_label!=";"{
  line[k]=$0;
  label[k]=$col_residue
  seqpos[k]=$col_seqpos;
  alignpos[k]=$col_alignpos;
  align_bp[k]=$col_align_bp;
  bp[k,align_bp[k]]=$col_seqpos;
  k++;
}


headout==0&&entout==0&&parsedat==0&&substr($2,1,10)=="**********"{
  # initializing

  for(l=1;l<k;l++)
  {
      seq_bp="\.";
      if(align_bp[l]!="\.") seq_bp=bp[align_bp[l],l];  
      print line[l]"  "seq_bp;
  }

  print $0;
}


########### end of file ##############
