#! /bin/tcsh -f
#
# Use this script to cluster RNA sequences into groups of
# different secondary structure using Pfold:
#
#   - First argument is the fasta file
#   - Second argument is the group file to plot
#   - Optional third argument is the position range
#

set PCLUSTER = "${SARSE_HOME}/programs/pcluster/bin"
set PFOLD = "${SARSE_HOME}/programs/pfold/bin"
set LOGFILE = "/dev/null"
set BASE = "$2:r"

# Number of groups
set numgroup = `awk 'END {printf NR}' $2`

if ($numgroup == 1) then
  set group1 = `awk '{print $1}' $2`
  echo "Calculating dotplot for group"
  $PCLUSTER/runscfg_pp.tcsh $1 $group1 $3 > plot1.$$.pp
  ($PFOLD/drawdot plot1.$$.pp > "$BASE"_plot.ps) > & $LOGFILE
  echo "output to file: "$BASE"_plot.ps"
  rm plot1.$$.pp
else
  # Find biggest group
  set big_group = `awk '{s = $1; gsub("[^,]", ""); print "," $1 , NR, s}' $2 | sort | tail -1 | awk '{print $3}'`
  set big_group_no = `awk '{s = $1; gsub("[^,]", ""); print "," $1 , NR, s}' $2 | sort | tail -1 | awk '{printf "%-2.2d", $2}'`

  echo "The largest group is number $big_group_no"

  set group_no = 0

  echo "Calculating dotplot for largest group (orange)"
  $PCLUSTER/runscfg_pp.tcsh $1 $big_group $3 > plot1.$$.pp

  foreach group (`awk '{print $1}' $2`)
    set group_no = `echo $group_no | awk '{printf "%-2.2d", $1+1}'`

    if ($group != $big_group) then

      echo "Calculating dotplot for group" $group_no "(blue)"
      $PCLUSTER/runscfg_pp.tcsh $1 $group $3 > plot2.$$.pp
      ($PFOLD/drawdot plot1.$$.pp plot2.$$.pp > "$BASE"_plot_"$big_group_no"_vs_"$group_no".ps) > & $LOGFILE
      echo "Output to file: "$BASE"_plot_"$big_group_no"_vs_"$group_no".ps"
      echo 
    endif
  end
endif

