#!/usr/bin/gawk -f # Computes the K-category correlation coefficient. # Copyright: Jan Gorodkin, gorodkin@bioinf.kvl.dk # This software is distributed with a # GNU GENERAL PUBLIC LICENSE, see http://www.gnu.org/licenses/gpl.txt # Copyright (C) 2004 Jan Gorodkin # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # For publication of results, please cite: # Comparing two K-category assignments by a K-category correlation coefficient. # J. Gorodkin, Computational Biology and Chemistry, 28:367-374, 2004. # get data. Matrix should be listed as one line like: # field1 ... fieldM C_11 C_12 ... C_1K C_21 C_22 ... C_2K ... C_K1 C_K2 ... C_KK # C_11 start at field startpos!!! # This script works with gawk 3.1.1. BEGIN{ for(i=1;i"; print ""; print "Description:"; print "------------"; print " Computes the discrete version of the R_K correlation coefficient"; print " from the confusion matrix C."; print " The KxK confusion matrix C should be listed as one line like:"; print " field1 ... fieldM C_11 C_12 ... C_1K C_21 C_22 ... C_2K ... C_K1 C_K2 ... C_KK"; print " C_11 start at field (position) startpos"; print " The output is"; print " field1 ... fieldM R_K K b b' COV_XY COV_XX COV_YY Q_K C_11 .... C_KK"; print " The parameter w_k is assumed constant to 1/K for all k = 1,...,K."; print ""; print "Examples:"; print " echo 12 34 56 78 | rkorrC"; print " echo data 12 34 56 78 | rkorrC startpos=2"; print " echo \"102 2 3 4 50 6 7 8 92\\n3 4 5 6 7 8 0 4 5\" | rkorrC"; print "" print "Options:"; print "--------"; print " -h|--help Show this message."; print ""; print "Note:"; print " This script works with gawk 3.1.1. Higher version like"; print " gawk 3.1.3 might give problems due to foolish standards and"; print " no backward compatibility in string concatenation!"; print ""; print "Reference:"; print "----------"; print " Comparing two K-category assignments by a K-category correlation coefficient"; print " J. Gorodkin, Computational Biology and Chemistry,28:367-374, 2004."; print ""; print "Author:"; print "-------"; print "Jan Gorodkin, gorodkin@bioinf.kvl.dk."; print ""; exit; } } } NR==1{ if(startpos=="") startpos=1; } NF>0{ for(i=1;i0) { RK=COV_XY/denominator; b=COV_XY/COV_XX; bprime=COV_XY/COV_YY; for(i=1;i "/dev/stderr"; close("/dev/stderr"); } } } function Tr(tC,tK) { val=0; for(k=1;k