#include <iostream>
#include <fstream>
#include <string.h>
#include <stdio.h>
#include <map>
#include <vector>
#include <list>
#include <set>
#include <stack>

#include <Utils/FileTools.h>
#include <Phyl/Newick.h>
#include <Phyl/Tree.h>
#include <Phyl/TreeTemplate.h>
#include <Phyl/TreeTemplateTools.h>
#include <Utils/Clonable.h>

 
#include "StockholmReader.h"  
#include "TestSubstitutionModel.h"
#include "SubstitutionModel.h"
#include "HKY85GapSubstitutionModel.h"
#include "HKY85SubstitutionModel.h"
#include "EukSSUrRNASubstitutionModel.h"
#include "PairedHKY85SubstitutionModel.h"
#include "PairedHKY85GapSubstitutionModel.h"
#include "PairedEukSSUrRNASubstitutionModel.h"
#include "PairedEukSSUrRNAGapSubstitutionModel.h"
#include "PairedEukSSUrRNASubstitutionModel.h"
#include "PairedProSSUrRNASubstitutionModel.h"
#include "ProSSUrRNASubstitutionModel.h"


#include "Log.h"
#include "opt.h"  
 
using namespace std;
using namespace ptr;

struct global_parms {
	string log_dir_name;
	string aln_file_name;
	float upperbound_mu;
};

struct calc_tree_parms {
	bpp::Node *root;
	double mu;
	bool paired;
	SubstitutionModel *model;
	int col;
	list<string> *AcInTree;
	map<int,int> *basepairs;
	map<string, string> *ac2seq;
	global_parms *global;	
};



bpp::TreeTemplate<bpp::Node>* parseTree(const char *treefilename, bpp::Node **nodeWithMaxDistToFather, list<string> *missing_species, map<string,string> *ac2seq, list<string> * AcInTree);
void printTree(bpp::Node*, int);
void deleteSubTree(bpp::Node*);
bool fileExists(const char *fileName); 
bool fileExists(const string& fileName);
void findPairs(const char *ss, const int len, map<int, int> *basepairs, char open, char close);
void walkTree(bpp::Node *curr_node, double mu, bool paired,  SubstitutionModel *model, int n, map<int,double*> & LH, map<int, int> *basepairs, map<string, string> *ac2seq);
double calcTreeByMu(double mu, calc_tree_parms *tp);
void calcCol(int col, calc_tree_parms *tp, double* max_mus);
void mnbrak(double &ax, double &bx, double &cx, calc_tree_parms *tp);
double golden(const double ax, const double bx, const double cx, double & max_mu,  calc_tree_parms *tp);
inline void shift3(double & a, double & b, double & c, const double d);
inline void shift2(double & a, double & b, const double c);
bool hasOnlyGapOrN(int col, map<string, string> &ac2seq, list<string> &AcInTree);
bool hasGap(int col, map<string, string> &ac2seq);
bool hasNoMutation(int col, bool forceunpaired, map<string, string> & ac2seq, list<string> & AcInTree, map<int,int> & basepairs);
bool isAmbiguous(char & c);
bool isAmbiguous(string & s);
SubstitutionModel* getUnpairedModel(char * unpairedModelName, float kappa);
SubstitutionModel* getPairedModel(char * pairedModelName, float kappa);
 
// Options parsed with:
// http://www.decompile.com/not_invented_here/opt/index.htm   Opt 3.19


int main(int argc, char *argv[]){
 

	// some default options
	char *treefilename = (char*)"tree.ph";
	char *alnfilename = (char*)"aln.stk";
	char *loglevel = (char*)"info";
	
	int verbose  = 0;	
	int alnlength;
	int offset_start = 0;
	int offset_end = -1;  // defaults to aln_length -1 later
	
	char *unpairedModelName = (char*)"hky85gap";
	char *pairedModelName = (char*)"EukSSUrRNA";
	char *targetNameCStr = NULL;
	string targetName = "";
	
	float kappa = 1.0;
	float upperbound_mu = 5.0;

	float *unpairedFrequencies;
	float *pairedFrequencies;

	string SS_cons;
  	map<string, string> ac2seq; // sequence
  	map<string, string> ac2ss;  // SS string
  	map<string, char*> ac2cstr;
  	
  	list<string> missing_species;
  	list<string> AcInTree;
	map<int, int> basepairs;
	
  	bpp::Node *nodeWithMaxDistToFather;
  	
  	float sumOfBranchLengthsToTarget = 0.0;
  	
  	global_parms *global = new global_parms;
	
	int parentOfTargetInt = 0;
	bool parentOfTarget = false;
	
	int pseudoCount = 2;
  	
	OptRegister(&treefilename, 't', (char*)"tree", (char*)"tree file name (.ph)");
	OptRegister(&alnfilename, 'a', (char*)"aln", (char*)"alignment file name (.stk)");
  	OptRegister(&loglevel, 'l', (char*)"loglevel", (char*)"loglevel: error, warn (default), info (=verbose), debug, trace, trace2, trace3");
  	OptRegister(&verbose ,OPT_BOOL, 'v', (char*)"verbose", (char*)"verbose mode");
  	OptRegister(&offset_start, 'x', (char*)"start", (char*)"start column (0 is first column)");
  	OptRegister(&offset_end, 'y', (char*)"end", (char*)"end column");
  	OptRegister(&pairedModelName, 'p', (char*)"paired", (char*)"paired model name");
  	OptRegister(&unpairedModelName, 'u', (char*)"unpaired", (char*)"unpaired model name");
	OptRegister(&kappa,OPT_FLOAT, 'k', (char*)"kappa", (char*)"HKY85 kappa (transversion to transition ratio)");
	OptRegister(&upperbound_mu, 'm', (char*)"maxmu", (char*)"Upper bound for \\mu (defaults to 20.0)");
	OptRegister(&targetNameCStr, 'n', (char*)"targetname", (char*)"Name of target species in tree and alignment file");
	OptRegister(&parentOfTargetInt, OPT_BOOL, 'q', (char*)"parent", (char*)"calculate parent of target");
	OptRegister(&pseudoCount, 'c', (char*)"pseudocount", (char*)"pseudoCounts (defaults to 2)");

	
	// Parse options  	 
	opt(&argc,&argv);
	 
	// set Log Levels
	if(verbose == 1 && (strcmp(loglevel,(char*)"error") == 0 ||strcmp(loglevel,(char*)"warn") == 0)) {
		loglevel = (char*)"info";  // verbose == lINFO
	}
	Log::setLogLevel(loglevel);
	LOG(lINFO) << "Loglevel: " <<loglevel;
	
	
	if(kappa == -1.0 && (strcmp(unpairedModelName,(char*)"hky85") == 0 || strcmp(unpairedModelName,(char*)"hky85gap") == 0 ||  strcmp(pairedModelName,(char*)"hky85") == 0 || strcmp(pairedModelName,(char*)"hky85gap") == 0)) {
		LOG(lINFO) << "Setting \\kappa to 1.0";
		kappa = 1.0;
	}
		
	global->upperbound_mu = upperbound_mu; 	
		
	global->aln_file_name = string(alnfilename);
	
	LOG(lINFO) << "Alignment file name = " <<alnfilename;
	LOG(lINFO) << "Paired model pseudocounts = " << pseudoCount;
	LOG(lINFO) << "Paired model name = " <<pairedModelName;
	LOG(lINFO) << "Unpaired model name = " <<unpairedModelName;
	LOG(lINFO) << "Kappa (HKY85) = " <<kappa;
	LOG(lINFO) << "Upper bound \\mu = " <<upperbound_mu;
	if(targetNameCStr == NULL || strlen(targetNameCStr)==0) {
		LOG(lINFO) << "No target name specified";
	}
	else {
		LOG(lINFO) << "Target name = " << targetNameCStr;	
		targetName = targetNameCStr;
	}
	
	if(parentOfTargetInt == 1) { parentOfTarget = true; }
	
	
	// ************* read alignment **************************************************************************************
  	{  	
  	StockholmReader sr;
  		sr.read(alnfilename,ac2seq,ac2ss,SS_cons);
  	}
  	
  	LOG(lINFO) << ac2seq.size() << " sequences are in the alignment.";
  	
  	alnlength = SS_cons.length();
   	
  	if(offset_end == -1 || offset_end > alnlength || offset_end < offset_start) {
  		LOG(lDEBUG) << "End column set to " << alnlength << endl;
  		offset_end = alnlength - 1;
  	}
  	if(offset_start > offset_end) {
  		cerr << "ERROR: Start offset can not be greater then end offset." << endl;
  		exit(1);
  	}
  	LOG(lDEBUG) << "Alignment length: " << alnlength;
  	
  	LOG(lDEBUG) << SS_cons ;
  	
  	findPairs(SS_cons.c_str(), alnlength, &basepairs, '<', '>');
  	// find pseudoknotted pairs, using rfam annotation, uppercase char is 5' end of bp..
  	for(char c='a'; c<='z'; c++) {
  		char upper = toupper(c);
  		findPairs(SS_cons.c_str(), alnlength, &basepairs, upper, c);
  	}
  	  		
	// ************* parse tree ********************************************************************************************
	bpp::TreeTemplate<bpp::Node> * tree = parseTree(treefilename, &nodeWithMaxDistToFather, &missing_species, &ac2seq, &AcInTree);
	
	LOG(lINFO) << tree->getNumberOfLeaves () << " species are in the tree.";
			
	if(missing_species.empty()) {
		cerr << "ERROR: All the species in the tree are contained in the alignment." << endl;
		exit(1);	
	}
		
	if(missing_species.size() > 1 && targetName == "") {
		LOG(lDEBUG) << "Missing species:";
		for(list<string>::iterator it = missing_species.begin() ; it != missing_species.end(); it++ ) {
			LOG(lDEBUG) <<  *it ; 
		}
		
		cerr << "ERROR: More than one species is missing in the alignment and no target species was specified." << endl;
		exit(1);
	}
	
	if(missing_species.size() == 1 && targetName == "") {
		targetName = missing_species.front().c_str();	
	}
	
	
	// check if target is in tree 
	try {
		tree->getNode(targetName);	
	}	
	catch(exception e) {
		cerr << "ERROR: The specified target name " << targetName << " is not contained in the tree." << endl;
		exit(1);
	}
	
	LOG(lINFO) << "Target species = " << targetName;
	
	
    printTree(tree->getNode(tree->getRootId()),0);
 
 
 	// check if there are multifurcations in the tree, eg. produced by fasttree with identical sequences: (Leaf1:0.0,Leaf2:0.0,Leaf3:0.0)ParentOfIdenticalLeaves:0.1
 	// general multifurcations would not work with Felsenstein's alg. per se, but we can at least kick out multifurcations with these identical leafs.
 	// 
 	// remove all but two leafs from a multifurcation if they have zero branch length
 	{
 		vector<int> leavesIds = tree->getLeavesId();
 		set<int> fathersWithMoreThanTwoLeavesIds;
		for(unsigned int i=0; i < leavesIds.size(); i++) {
 			bpp::Node *n = tree->getNode(leavesIds[i]);
 			bpp::Node *father = n->getFather();
 			if(father->getNumberOfSons() > 2 && father->getId() != tree->getRootId()) {	
 				//check if father has >2 leafs as sons 
 				vector<bpp::Node*> sons = father->getSons();
 				bool allLeafs = true;
 				for(vector<bpp::Node*>::iterator it = sons.begin(); it != sons.end(); ++it) {
 					allLeafs &= (*it)->isLeaf();	
 				}
 				if(allLeafs) {
 					LOG(lDEBUG) << "Node " << father->getId() << " has more than 2 sons and all sons are leafs.";	
 					fathersWithMoreThanTwoLeavesIds.insert(father->getId());
 				}
 				else {
 					// this is a "inner" multifurcation, i.e. not all sons are leaf nodes
 					// this cannot be solved immediatly within the Felsenstein computation, so we rather exit here
 					cerr << "ERROR: The tree contains an internal multifurcation."<< "(Node #" << father->getId() << ")" << endl;
 					exit(1);	
 				}
 			}	
		}
		
		// now removing leafs from multifurcations which only have leaf nodes
		// try to remove leafs which are not in alignment first
		// if there is still a multifurcation, then only kick out leafs with branch length zero
		// otherwise exit, if the multifurcation cannot be resolved
		if(!fathersWithMoreThanTwoLeavesIds.empty()) {
			for(set<int>::iterator it = fathersWithMoreThanTwoLeavesIds.begin(); it != fathersWithMoreThanTwoLeavesIds.end(); ++it) {	
				bpp::Node *father = tree->getNode(*it);
				LOG(lDEBUG) << "Removing all but two child nodes from node " << father->getId();
				vector<bpp::Node*> sons = father->getSons();
				
				// try to remove leafs which are not in alignment first:
				for(vector<bpp::Node*>::iterator it = sons.begin(); it != sons.end(); ++it) {
 					if((*it)->getName() != targetName && father->getNumberOfSons() > 2 && ac2seq.count((*it)->getName())==0) {
 						LOG(lDEBUG) << "Removing leaf node " << (*it)->getId() << " " << (*it)->getName();
 						father->removeSon(*it);
 						missing_species.remove((*it)->getName());
 					}		
 				}
 				
 				// remove leafs with 0.0 branch length
				for(vector<bpp::Node*>::iterator it = sons.begin(); it != sons.end(); ++it) {
 					if((*it)->getName() != targetName && father->getNumberOfSons() > 2 && (*it)->getDistanceToFather() == 0.0) {
 						LOG(lDEBUG) << "Removing leaf node (with branch length 0.0) " << (*it)->getId() << " " << (*it)->getName();
 						father->removeSon(*it);
 					}		
 				}
 				
 				// if there is still al multifurcation left, it has only species which are in the alignment and branch length >0.0
 				// then we cannot handle this and exit
 				if(father->getNumberOfSons() > 2) {
 					cerr << "ERROR: The tree contains a node with more than 2 leafs, all of them in the alignment. (Node #" << father->getId() << ")" << endl;
 					exit(1);
 				}
 				
			}
			printTree(tree->getNode(tree->getRootId()),0);
		}
 	}
		
		
			
	// set target species as root 
	LOG(lDEBUG) << "Rooting tree at target species.";
	{
		bpp::Node *n = tree->getNode(targetName);
		tree->rootAt(n->getId());	
	}
    printTree(tree->getNode(tree->getRootId()),0);
 
	 
	// remove all missing species iteratively from the tree, except target species
	LOG(lDEBUG) << "Removing all tree leaves that are not in the alignment (except target species).";
	{ 
		list<string>::iterator it;
		for(it = missing_species.begin() ; it != missing_species.end(); it++ ) {
			if(*it != targetName) {
				LOG(lDEBUG) << "Removing " << *it << " from tree."; 
				bpp::Node *n = tree->getNode(*it);
				bpp::Node *father = n->getFather();
				father->removeSon(n);
			}	
		}
	}
    printTree(tree->getNode(tree->getRootId()),0);
 
	
	// remove all subtrees that have nodes with no names... this happens after removing some missing species
	// more precise: iterate over all leaves with no name and delete them until nothing is left to delete
	// so the empty subtrees become shorter and shorter in each iteration
	LOG(lDEBUG) << "Removing all \"empty\" subtrees";
	{
		bool deletedSmth = true;
		while(deletedSmth) {
			deletedSmth = false;
			vector<int> leavesIds = tree->getLeavesId();
			for (unsigned int i=0; i < leavesIds.size(); i++) {
    			bpp::Node *n = tree->getNode(leavesIds[i]);
				if(!tree->hasNodeName(leavesIds[i])) {  // achtung inner node names von input tree?  die werden erstmal ignoriert beim parsen des ph files
					bpp::Node *father = n->getFather();
					father->removeSon(n);
					deletedSmth = true;
					LOG(lDEBUG) << "Deleting former inner node with id " << leavesIds[i];
				}
			}
		}
	}
	printTree(tree->getNode(tree->getRootId()),0);
   
	
	//now we can have this case, some nodes between target and last ancestor with remaining other species
	// x----*----*----*---- target
    // \------X
    // we walk up to this node and add the branch lengths and set it as temporary root in newtree, it will also be the target later if we choose -q
    // notice that target is the current root of the tree
     {
     	bpp::Node * last;
     	bpp::Node * t = tree->getNode(targetName);
	    last = t;
	    while(t->getNumberOfSons() == 1) {
	    	last = t;
	    	t = t->getSon(0);
	    	LOG(lTRACE) << "Node: " << t->getId() << " Father: " << t->getFather()->getId() << " Dist: " << t->getDistanceToFather() ;
	    	sumOfBranchLengthsToTarget += t->getDistanceToFather();	
	    }
	  	LOG(lDEBUG) << "t = " << t->getId() << " Sum of distance = " << sumOfBranchLengthsToTarget;
	    tree->rootAt(t->getId());
	    LOG(lTRACE) << "Tree rooted at node " <<  t->getId();
	    LOG(lTRACE) << "Path to target species starts at node " << last->getId();
	
    	printTree(tree->getNode(tree->getRootId()),0);
 
	 	LOG(lDEBUG) << "Delete subtree containing target species, starting at node " << last->getId();
	 	deleteSubTree(last);
	 	tree->getNode(tree->getRootId())->removeSon(last);
		delete last;	
		printTree(tree->getNode(tree->getRootId()),0);
    }
 
 	//******************** make Substitution Models and count Frequencies *******************************************************************
 	
	LOG(lTRACE3) << "Creating unpaired model";
	SubstitutionModel *unpairedModel = getUnpairedModel(unpairedModelName, kappa);
	unpairedFrequencies = new float[unpairedModel->getMatrixSize()];
	// init with 0
	for(int i = 0; i < unpairedModel->getMatrixSize(); i++) { 
		unpairedFrequencies[i] = 0.; 
	}
			
	LOG(lTRACE3) << "Creating paired model";
	SubstitutionModel *pairedModel = getPairedModel(pairedModelName, kappa);
	// init with 0
	pairedFrequencies = new float[pairedModel->getMatrixSize()];
	for(int i = 0; i < pairedModel->getMatrixSize(); i++) {
		pairedFrequencies[i] = 0. ;
	}	
	
	
	// calculate residue frequencies
	{
	LOG(lDEBUG) << "Calculating frequencies ";
	unsigned int sumpaired = 0;
	unsigned int sumunpaired = 0;
  	for(map<string,string>::iterator it = ac2seq.begin(); it != ac2seq.end(); ++it) {
  		for(int pos = 0; pos < alnlength;pos++) {
  			const bool paired = (basepairs[pos] > pos);  // leftside
  			const bool unpaired = (basepairs[pos]==-1); 
  			string bp = string(1,it->second[pos]);  // base at position n
			LOG(lTRACE3) << "Column " << pos << " paired: " <<paired << "pairpos="<<basepairs[pos] ;
			if(paired) {
				bp += string(1,it->second[basepairs[pos]]);  // append right side
				LOG(lTRACE3) << "BP=" << bp << " row=" << pairedModel->row[bp];
				if(!isAmbiguous(bp)) {
  					pairedFrequencies[ pairedModel->row[bp] ] += 1.0;
  					sumpaired++;
  				}
			}
			if(unpaired) { // unpaired
				LOG(lTRACE3) << "BP=" << bp << " row=" << unpairedModel->row[bp];
  				if(!isAmbiguous(bp)) { 
  					if(bp=="-") {
  						if(pos==0) { // first gap counts
  							unpairedFrequencies[unpairedModel->row[bp]] += 1.0;
  							sumunpaired++;
  						}
  						else {
  							string previous_bp = string(1,it->second[pos-1]);  // base at position n-1
  							if(previous_bp != "-") { // only count the first occurance of each gap
  								unpairedFrequencies[unpairedModel->row[bp] ] += 1.0;
  								sumunpaired++;
  							}
  							// else: previous bp was already a gap, so we don't count this one
  						}
  					}
  					else {
  						unpairedFrequencies[unpairedModel->row[bp] ] += 1.0;
  						sumunpaired++;
  					}
  				}
  			}
  			
  		}	
  	 }
   	LOG(lDEBUG) << "Sum of residues: paired=" << sumpaired << "  " << "unpaired=" << sumunpaired;
  	
  	LOG(lDEBUG) << "Frequencies unpaired:";
	// include pseudocounts
	double t = 0.0;
   	for(int i = 0; i < unpairedModel->getMatrixSize(); i++)
   		t += unpairedFrequencies[i] + pseudoCount;
   	for(int i = 0; i < unpairedModel->getMatrixSize(); i++) {
		unpairedFrequencies[i] = (unpairedFrequencies[i] + pseudoCount) / t;
		LOG(lDEBUG) << unpairedModel->rrow[i]  << " "  << unpairedFrequencies[i];
   	}
		
	LOG(lDEBUG) << "Frequencies paired:";
  	// include pseudocounts
	t = 0;
   	for(int i = 0; i < pairedModel->getMatrixSize(); i++)
   		t += pairedFrequencies[i] + pseudoCount;
   	for(int i = 0; i < pairedModel->getMatrixSize(); i++) {
		pairedFrequencies[i] = (pairedFrequencies[i] + pseudoCount) / t;
		LOG(lDEBUG) << pairedModel->rrow[i]  << " "  << pairedFrequencies[i];
   	}
		
	} // end calculate frequencies
		 
	LOG(lDEBUG) << "Init Paired Model ";
	pairedModel->setFreq(pairedFrequencies);
	pairedModel->init();
	LOG(lDEBUG) << "Init Unpaired Model ";
	unpairedModel->setFreq(unpairedFrequencies);
	unpairedModel->init();
	
	//********************** calculate mutation rates for each column **********************************************************
	
	double *max_mus = new double[alnlength];
	for(int col = 0; col < alnlength; col++) {
		max_mus[col] = -1;
	}
				
	for(int for_col = offset_start; for_col <= offset_end; for_col++) {
		int col = for_col;
		bool unpaired = (basepairs[col]==-1);
		bool paired = !unpaired;
		bool leftside = paired && basepairs[col] > col; 
		bool rightside = paired && !leftside; 
		bool paired_to_unpaired = false; // converted from paired to unpaired, because paired model cannot handle gaps, but still it is a basepair
		
		if(hasOnlyGapOrN(col, ac2seq, AcInTree)) {
			LOG(lINFO) << "Column " << col << " has only gaps or ambigoues (N) nucleotides.";	
			max_mus[col] = 0;
			continue;	
		}
			
		bool hasGaps = hasGap(col, ac2seq);
		if(hasGaps || paired && hasGap(basepairs[col], ac2seq) ) {
			LOG(lDEBUG) << col << " has gaps";
			if(unpaired && !unpairedModel->canHandleGaps()) {
				LOG(lINFO) << "Unpaired model cannot handle gaps, skipping column " << col;
				continue;
			}
			else if(paired) {
				if(unpairedModel->canHandleGaps() && !pairedModel->canHandleGaps()) {
					LOG(lINFO) << "Paired model cannot handle gaps, using unpaired model for both columns " << col << " and " << basepairs[col];
					paired=false; // set to unpaired, so that unpaired model will be used to calculate both columns independently 
					unpaired=true;
					rightside=false;
					leftside=false;
					paired_to_unpaired = true;
				}
				else {
					LOG(lINFO) << "Paired and unpaired model cannot handle gaps, skipping column " << col;
					continue;
				}
			}
		}
		else
			LOG(lDEBUG) << col << " has no gaps";
				
		if(rightside) { 
			LOG(lDEBUG) << col << " is right side of a base pair.";
			 
			// if left side was already calculated, then copy stuff from left side
			if(max_mus[basepairs[col]] >= 0.0) {
				max_mus[col] = max_mus[basepairs[col]];
				continue;
			}
			else {
				// left side is not yet calculated, this is due to param -x > basepairs[col] (window doesnt overlap with left side)
				// then we switch to left side and copy max_mus to right side later
				// walkTree() could only calculate with col being the left side of the bp, that's why we switch	
				LOG(lDEBUG) << col << " needs to be calculated, since left side was not calculated";
				col = basepairs[col];
				LOG(lDEBUG) << "Switch to column " << col <<", which is the left side of column " << basepairs[col];
			
			}
			
		}
		else if(leftside)
			LOG(lDEBUG) << col << " is left side of a base pair.";
		else
			LOG(lDEBUG) << col << " is unpaired.";

		
		SubstitutionModel * model = (paired) ? pairedModel : unpairedModel;
					
		calc_tree_parms *tm = new calc_tree_parms;
			
		tm->col=col;
		tm->model = model;
		tm->paired = paired;
		tm->root = tree->getRootNode();
		tm->basepairs = &basepairs;
		tm->ac2seq = &ac2seq;
		tm->AcInTree = &AcInTree;
		tm->global = global;
		
		
		// check if cols have mutations, if not, they get mu=0, so that the conserved residue gets 100% probabilty
		// if we set mu to 1.0, the conserved residue would get less than 100% according to the branch length
				
		if(paired_to_unpaired) {
			if(hasNoMutation(col, true, ac2seq, AcInTree, basepairs))
				max_mus[col] = 0.0;  
			else
				calcCol(col, tm, max_mus);
		}
		else { // normal case
			if(hasNoMutation(col, false, ac2seq, AcInTree, basepairs))
				max_mus[col] = 0.0;  
			else
				calcCol(col, tm, max_mus);
		}
		
		if(rightside) {
			// col was switched to left side before, now copy result to right side
			max_mus[basepairs[col]] = max_mus[col];
		}
		
		// in the paired_to_unpaired case we need to check if the pairing partner is out of the -x and -y bounds and calculate it too
		// this is not necessary for "normal" base pairs, since both columns are calculated at the same time using big substitution matrix
		if(paired_to_unpaired) {
			if(basepairs[col] < offset_start || basepairs[col] > offset_end) {
				if(hasNoMutation(basepairs[col], true, ac2seq, AcInTree, basepairs))
					max_mus[basepairs[col]] = 0.0;
				else {			
					tm->col = basepairs[col];
					calcCol(basepairs[col], tm, max_mus);
				}
			}
		}
	
	} // end for loop over all columns
	
	LOG(lDEBUG) << "Finished calculating all columns";
	
	// *************** reroot the tree to the target species****************************************************************************
	if(!parentOfTarget){
		LOG(lDEBUG) << "Adding target back to root of tree using branch length " << sumOfBranchLengthsToTarget;
		int newTargetId = tree->getNextId(); 
		bpp::Node * target = new bpp::Node(newTargetId,targetName);
		bpp::Node * root = tree->getNode(tree->getRootId());
		root->addSon(target);
		target->setDistanceToFather(sumOfBranchLengthsToTarget);
	
		printTree(tree->getNode(tree->getRootId()),0);
	
		LOG(lDEBUG) << "Rerooting tree to target";
		tree->rootAt(newTargetId);
		printTree(tree->getNode(tree->getRootId()),0);
	}
	else {
		LOG(lDEBUG) << "Parent of target species kept as root";
	}
	
	//*********************calculate residue probabilities from the estimated mutation rates ********************************************
	LOG(lDEBUG) << "Calculating basepair probabilities for each column now";
	
	string ausgabedatei = alnfilename;
	ausgabedatei += ".out";
	LOG(lINFO) << "Output file = " << ausgabedatei;
	ofstream out_str;
	out_str.open(ausgabedatei.c_str(), ios::out);
		
	for(int for_col = offset_start; for_col <= offset_end; for_col++) {
		int col = for_col;
		bool unpaired = (basepairs[col]==-1);
		bool paired = !unpaired;
		const bool leftside = paired && basepairs[col] > col; 
		const bool rightside = paired && !leftside; 
		bool paired_to_unpaired = false; // converted from paired to unpaired, because paired model cannot handle gaps, but still it is a basepair
		int output_col = col;  // output_col will be used for printing, while col might change if it is a right side of a bp, then will change to left side
			
		if(unpaired) 
			LOG(lDEBUG) << col << " is unpaired"; 
		else
			LOG(lDEBUG) << col << ((leftside) ? " is left side of a base pair" : " is right side of a base pair") << ", pair is " <<basepairs[col];
		
		
		
		bool hasGaps = hasGap(col, ac2seq);
		if(hasGaps || paired && hasGap(basepairs[col], ac2seq) ) {
			LOG(lDEBUG) << col << " has gaps";
			if(unpaired && !unpairedModel->canHandleGaps()) {
				LOG(lINFO) << "Unpaired model cannot handle gaps, skipping column " << col;
				out_str << col << "\tpair=-1\tmax_mu=n/a\tIC=n/a\tcolumn was not computed due to gaps" << endl;
				continue;
			}
			else if(paired) {
				if(unpairedModel->canHandleGaps() && !pairedModel->canHandleGaps()) {
					LOG(lINFO) << "Paired model cannot handle gaps, using unpaired model for both columns " << col << " and " << basepairs[col];
					paired=false; // set to unpaired, so that unpaired model will be used to calculate both columns independently 
					unpaired=true;
					paired_to_unpaired = true;
				}
				else {
					LOG(lINFO) << "Paired and unpaired model cannot handle gaps, skipping column " << col;
					out_str << col << "\tpair=" << basepairs[col] << "\tmax_mu=n/a\tIC=n/a\tcolumn was not computed due to gaps" << endl;
					continue;
				}
			}
		}
		else
			LOG(lDEBUG) << col << " has no gaps";
			
		// for basepairs calculated with pairedmodel, switch to left side again (for walkTree())
		if(!paired_to_unpaired && rightside)
			col = basepairs[col];
		
		SubstitutionModel * model = (paired) ? pairedModel : unpairedModel;
			
		
		map<int,double*> LH; // nodeID -> Likelihoods for each state
		
		// compute tree again with the estimated max mu
		walkTree(tree->getRootNode(), max_mus[col], paired, model, col, LH, &basepairs, &ac2seq);			
								
		double *pL = LH[tree->getRootNode()->getId()];  // Likelihoods at root node
		double sum = 0.0; //sum of likelihoods for all states
			
		for(int i=0; i < model->getMatrixSize(); i++) {
			LOG(lTRACE) << "Likelihood for state "	<< i << " " << model->rrow[i] <<": " << pL[i];
			LOG(lTRACE) << "Frequencies for state " << i << " " << model->rrow[i] <<":" << ((unpaired) ? unpairedFrequencies[i] : pairedFrequencies[i]);
			pL[i] = pL[i] * ((unpaired) ? unpairedFrequencies[i] : pairedFrequencies[i]) ; // hier noch die equillibrium frequency von state i multiplizieren
			pL[i] = abs(pL[i]); // get rid of small negative numbers, like -8.232E-17
			sum += pL[i] ;
		}

		// the probability for each state would now be pL[i] / sum
			
		// calculate information content for that row: 
		double ic = 0.0;
		for(int i=0; i < model->getMatrixSize(); i++) {
			double prob_i = pL[i]/sum;
			if(prob_i < 0.0005) { // get rid of small numbers
				prob_i = 0.0;
			}
			double ic_part = (prob_i == 0.0) ? 0.0 : prob_i * log(prob_i) / log(2.);
			LOG(lDEBUG) << "Prob=" << prob_i << " IC_part=" << ic_part;
			if(fabs(prob_i - 1.0) < 1E-15) { 
				LOG(lDEBUG) << "Prob ist 1, IC ist max!";
				ic = 0;
				break;
			} // max ic erreicht bei 100% konservierung, i.e. ein residue kommt so in allen seq vor und hat Prob. 1.0
			ic += ic_part;
		}
		double max = log(model->getMatrixSize())/log(2.);  // = 2 for unpaired (without gaps) or 4 for paired (16x16 matrix) without gaps
    	ic = ic + max;
    		   	
		
		out_str << output_col << "\t" << "pair=" << basepairs[output_col] << "\t"; 
		out_str << "max_mu=" << max_mus[col] << "\t";
		out_str << "IC=" << ic << "\t";
		LOG(lDEBUG)  << "col="<< output_col << " " << "pair=" << basepairs[output_col] << " " << "max_mu=" << max_mus[col] << " " <<  "IC=" << ic;
		for(int i=0; i < model->getMatrixSize(); i++) {
			double prob_i = pL[i]/sum;
			if(prob_i < 0.0005) { // get rid of small numbers
				prob_i = 0.0;
			}
			LOG(lDEBUG) << model->rrow[i] << "=" << prob_i;
			out_str << model->rrow[i] << "=" << prob_i << "\t";
		}
		
		if(paired) {
			unsigned int index_end = leftside ? 0 : 1;
			for(int u = 0; u < unpairedModel->getMatrixSize(); u++) {	
				double u_sum = 0.0;
				for(int p = 0; p < pairedModel->getMatrixSize(); p++) {	
					//take the first or second char of that basepair and compare to unpaired base and add up all Probs for that base
					if((pairedModel->rrow[p])[index_end] == (unpairedModel->rrow[u])[0])
						u_sum += pL[p]/sum;
				}
				LOG(lDEBUG) << unpairedModel->rrow[u] << "=" << u_sum;
				out_str << unpairedModel->rrow[u] << "=" << u_sum << "\t";
			}			
			
			// sum probs for basepairs AT TA GC CG GU UG == paired prob
			double sum_canonical_bp = pL[pairedModel->row["AT"]]/sum + pL[pairedModel->row["TA"]]/sum + pL[pairedModel->row["GC"]]/sum
										+ pL[pairedModel->row["CG"]]/sum + pL[pairedModel->row["GT"]]/sum + pL[pairedModel->row["TG"]]/sum;
			out_str << "prob_paired=" << sum_canonical_bp << "\t";	 
		
		}
		else if(paired_to_unpaired) {	
			LOG(lDEBUG) << "Calculating likelihoods for column " << basepairs[col];
			// in any case, we need to calculate the other side of the basepair, to calculate the probs of the canonical basepairs
			map<int,double*> LH_pair;
			walkTree(tree->getRootNode(), max_mus[basepairs[col]], paired, model, basepairs[col], LH_pair, &basepairs, &ac2seq);
			double *pL_pair = LH_pair[tree->getRootNode()->getId()];  // Likelihoods at root node
			double sum_pair = 0.0;
			for(int i=0; i < unpairedModel->getMatrixSize(); i++) {
				LOG(lTRACE) << "Likelihood for state "	<< i << " " << unpairedModel->rrow[i] <<": " << pL_pair[i];
				LOG(lTRACE) << "Frequencies for state " << i << " " << unpairedModel->rrow[i] <<":" << unpairedFrequencies[i];
				pL_pair[i] = pL_pair[i] * unpairedFrequencies[i]; // hier noch die equillibrium frequency von state i multiplizieren
				pL_pair[i] = abs(pL_pair[i]); // get rid of small negative numbers, like -8.232E-17
				sum_pair += pL_pair[i];
			}
			
			// berechne probs für alle basepairs
			double bpsum = 0.0;
			double sum_canonical_bp = 0.0;
			for(int p = 0; p < pairedModel->getMatrixSize(); p++) {
				//get first and second base
				unsigned int index_left = leftside ? 0 : 1;
				unsigned int index_right = leftside ? 1 : 0;
				char leftbase  = (pairedModel->rrow[p])[index_left];
				char rightbase = (pairedModel->rrow[p])[index_right];
				double leftprob = pL[unpairedModel->row[string(1,leftbase)]] / sum;
				double rightprob = pL_pair[unpairedModel->row[string(1,rightbase)]] / sum_pair;
				LOG(lDEBUG) << "leftprob " << leftbase  << " " << leftprob;
				LOG(lDEBUG) << "rightprob " << rightbase  << " " << rightprob;
				LOG(lDEBUG) << pairedModel->rrow[p] << "=" << leftprob*rightprob ;
				out_str 	<< pairedModel->rrow[p] << "=" << leftprob*rightprob << "\t";
				if(pairedModel->rrow[p]=="AT" || pairedModel->rrow[p]=="TA"|| pairedModel->rrow[p]=="GC"
					|| pairedModel->rrow[p]=="CG" || pairedModel->rrow[p]=="GT" || pairedModel->rrow[p]=="TG")
					sum_canonical_bp += leftprob*rightprob;
			}
			out_str << "prob_paired=" << sum_canonical_bp << "\tunpairedModel=1";
			LOG(lDEBUG) << "sum=" << bpsum;
		
			
		
		}
		out_str << endl;
					
	}  // end for loop
		
	out_str.close();
	
	if(unpairedModel != NULL) {
		delete unpairedModel;
	}
	if(pairedModel != NULL) {
		delete pairedModel;
	}
	if(tree != NULL) {
		delete tree;
	}
  
	return 0;
	
}


void calcCol(int col, calc_tree_parms *tp, double* max_mus) {
		LOG(lDEBUG) << "Start calculating column " << col << " now.";
			
		double max_fmu = 0.0; // current max likelihood for mu
		double max_mu = 0.0; // current mu, der init wert wird nie benutzt
						
		double a = 0.0;
		double b = tp->global->upperbound_mu;
		double c = 0.0;
				
		// mal startwerte 
		mnbrak(a, b, c, tp);  // parameter a b c werden per referenz übergeben und verändert
		//LOG(lDEBUG) << "a=" << a << " b=" << b << " c=" << c << "\n";
		max_fmu = golden(a, b, c, max_mu, tp);
							
		if(max_mu > tp->global->upperbound_mu) {
			max_mu = tp->global->upperbound_mu;
		}
							
		// hier sollte nun für jede spalte das optimale max_mu feststehen
		max_mus[col] = max_mu;
		LOG(lDEBUG) << "Max mu for col " << col << " is " << max_mu;

	
		LOG(lDEBUG) << "End calculating column " << col;		
}

	

/*  Golden Section Search */
double golden(const double ax, const double bx, const double cx, double & max_mu,  calc_tree_parms *tp){

	const double tol = 0.001; // fractional precision
		
	const double R = 0.61803399;
	const double C = 1.0-R;
		
	double f1, f2, x0, x1,x2, x3; 
	x0 = ax;
	x3 = cx;
		
	if(fabs(cx - bx) > fabs(bx - ax)){
		x1 = bx;
		x2 = bx + C * (cx - bx);
	}else{
		x2 = bx;
		x1 = bx - C * (bx - ax);
	}
	
		
	f1 = calcTreeByMu(x1, tp);
	f2 = calcTreeByMu(x2, tp);
		
	while(fabs(x3-x0) > tol * (fabs(x1) + fabs(x2))){
		if(f2 < f1){
			shift3(x0, x1, x2, R*x2+C*x3);
			shift2(f1, f2, calcTreeByMu(x2, tp));
			
		}
		else{
			shift3(x3, x2, x1, R*x1+C*x0);	
			shift2(f2, f1, calcTreeByMu(x1, tp));
		}
	}
		
	if(f1 < f2){
		max_mu = x1; //mu
		return f1; //f(mu)
	}else{	
		max_mu = x2;
		return f2;
	}
	
}

inline void shift3(double & a, double & b, double & c, const double d) {
	a=b;
	b=c;
	c=d;	
}

inline void shift2(double & a, double & b, const double c) {
	a=b;
	b=c;
}


void mnbrak(double &ax, double &bx, double &cx, calc_tree_parms *tp){
	const double GOLD = 1.618034;
	const double TINY = 1.0e-20;
	double GLIMIT = 100.0;
	
	double fa, fb, fc, fu;
	

	fa = calcTreeByMu(ax, tp);
	fb = calcTreeByMu(bx, tp);
		
	if(fb > fa){
		swap(ax, bx);
		swap(fb, fa);
	}
	
	cx = bx + GOLD * (bx-ax);
	fc = calcTreeByMu(cx, tp);
		
	while(fb > fc){ 
		
		
		double r = (bx - ax) * (fb - fc);

		double q = (bx - cx) * (fb - fa);
		
		//double u = bx -((bx-cx)*q-(bx-ax)*r)/(2.0*SIGN(max(abs(q-r),TINY),q-r)):
		double h = max(fabs(q-r),TINY);
		double nenner;
		if( q - r < 0){				// equals 2*SIGN(h1,q-r)
			nenner = -2 * fabs(h); 
		}else{
			nenner = 2*fabs(h);
		}
		
		double u = bx -((bx-cx)*q-(bx-ax)*r) / nenner;	
		double ulim = bx + GLIMIT * (cx - bx);
		
		// Test various possibilities:
				
		if((bx-u)*(u-cx) > 0.0){
			fu = calcTreeByMu(u, tp);
			if(fu < fc){  // Got minimum between b and c
				ax = bx;
				bx = u;
				fa = fb;
				fb = fu;
				return;  				
			} else if(fu > fb){  // Got minimum, between a and u
				cx = u;
				fc = fu;
				return;
			}
			u = cx + GOLD * (cx - bx);
			fu = calcTreeByMu(u, tp);
					
		} else if((cx - u) * (u - ulim) > 0.0){
			fu = calcTreeByMu(u, tp);

			if(fu < fc){
				shift3(bx, cx, u, cx + GOLD*(cx-bx));
				shift3(fb,fc,fu,calcTreeByMu(u, tp));
			}
			
		}else if ((u-ulim)*(ulim-cx) >= 0.0){
			u = ulim;
			fu = calcTreeByMu(u, tp);
			
		}else{
			u = cx + GOLD*(cx-bx);
			fu = calcTreeByMu(u, tp);
		}
		
		
		// eliminate oldest point and continue
		shift3(ax, bx, cx, u);
		shift3(fa,fb,fc,fu);
		
	      		
	} 	
	
	
}

// returns the likelihood of the tree given mu
double calcTreeByMu(double mu, calc_tree_parms *tp) {
	// big penalty for mu which are < 0
	if(mu < 0.0 || mu > tp->global->upperbound_mu)
		return 1; 
	
	LOG(lTRACE) << "Calculating tree for \\mu==" << mu;
	
	const bool paired = tp->paired;
	const int col = tp->col;
	SubstitutionModel *model = tp->model;
	bpp::Node *root = tp->root;
	map<int,int> *basepairs = tp->basepairs;
	map<string, string> *ac2seq = tp->ac2seq; 
		
	map<int,double*> LH;
	walkTree(root, mu, paired, model, col, LH, basepairs, ac2seq);	
			
	double *pL = LH[root->getId()];
	double sum = 0.0;
	for(int i=0; i < model->getMatrixSize(); i++) {
		sum +=  pL[i] * (model->getFreq())[i];
	}
	
	for(map<int,double*>::iterator it = LH.begin(); it != LH.end(); ++it) {
  		delete[] it->second;
  		it->second = NULL;
	}
	LH.clear();	
	
	LOG(lTRACE) << "\\LH==" << -1*sum;
	
	return -1 * sum;
}


void walkTree(bpp::Node *curr_node, double mu, bool paired, SubstitutionModel *model, int n, map<int,double*> & LH, map<int, int> *basepairs, map<string, string> *ac2seq) {

	const int size = model->getMatrixSize();
		
	LOG(lTRACE2) << "Being at node #" << curr_node->getId() << " " << ((curr_node->hasName()) ? curr_node->getName() : "");
	LOG(lTRACE2) << "nr of child nodes: " << curr_node->getNumberOfSons();
	
	double *pL = new double[size];
	
	// if leave node, figure out which base(pair) is in the sequence and write that to L
	if(curr_node->hasFather() && curr_node->isLeaf()) {
		LOG(lTRACE3) << "which is a leaf node";
		string name = curr_node->getName();  // look how certain we are, that leaf nodes have names !!
		string seq = (*ac2seq)[name];
		if(seq == "") {
			seq = (*ac2seq)[name];
			if(seq == "") {
				cerr << "Cannot get the sequence for species " <<name <<". That should not have happened, I must kill myself now." << endl;
				exit(1);
			}
		LOG(lTRACE3) << "and has seq" << seq;
		}

		for(int i = 0; i < size; i++) { pL[i] = 0.0; }  // init all values with zero
		
		string bp = string(1,seq[n]);  // base at position n
		if(paired) {
			bp += seq[(*basepairs)[n]];  // append right side
		}
		LOG(lTRACE3) << "Base pair is " << bp;
		if(isAmbiguous(bp)) { // simple ambiguity test, if ambiguous bp, then set all L for this leave to 1.0 (siehe Felsenstein book p.255) 
			LOG(lTRACE2) << "Ambiguous nucleotide ("<<bp<<") at position " << n << " in species " << name;
			for(int i = 0; i < size; i++) { pL[i] = 1.0; }  
		} 
		else {
			pL[model->row[bp]] = 1.0;  // set likelihood to 1.0 for the actually occuring bp in the seq	
		}
		
	}
	else if(curr_node->getNumberOfSons() == 1) {  // inner nodes with one son
		LOG(lTRACE3) << "which is an inner node with degree 2";
		const int id_0 =  curr_node->getSon(0)->getId();
		if(LH[id_0] == NULL) { // son was not yet computed
			walkTree(curr_node->getSon(0), mu, paired, model, n, LH, basepairs, ac2seq);
		}else {
			// should not be reached	
		}
		double t_0 = curr_node->getSon(0)->getDistanceToFather();
			
		for(int k = 0; k < size; k++) { 
			double sum = 0.0; 
			for(int i=0; i < size; i++) {
				sum += model->P(k,i,t_0,mu) * LH[id_0][i];	
			}
			pL[k] = sum;
		}
				
	}
	else {	// normal inner nodes with two sons 
		LOG(lTRACE3) << "which is a inner node with two sons";
		const int id_0 =  curr_node->getSon(0)->getId();
		const int id_1 =  curr_node->getSon(1)->getId();
		LOG(lTRACE3) << "son id's are: "<< id_0 << " and " << id_1;
		// if !son[0] has already likelihoods for all bases
		if(LH[id_0] == NULL) {
			walkTree(curr_node->getSon(0), mu, paired, model, n, LH, basepairs, ac2seq);
		}
		
		// if !son[1] has already likelihoods for all bases
		if(LH[id_1] == NULL) {
			walkTree(curr_node->getSon(1), mu, paired, model, n, LH, basepairs, ac2seq);
		}
		
		// now calculate L(mu) for curr_node and return	

		double t_0 = curr_node->getSon(0)->getDistanceToFather();
		double t_1 = curr_node->getSon(1)->getDistanceToFather();
		
		for(int k = 0; k < size; k++) { 	
			double leftsum = 0.0;
			for(int i=0; i < size; i++) {
				leftsum += model->P(k,i,t_0,mu) * LH[id_0][i];	
			}
			double rightsum = 0.0;
			for(int j=0; j < size; j++) {
				rightsum += model->P(k,j,t_1,mu) * LH[id_1][j];	
			}	
			pL[k] = leftsum * rightsum;	
		}
	}
	
	LH[curr_node->getId()] = pL;	
	LOG(lTRACE3) << "Ending node " << curr_node->getId();	
}




bpp::TreeTemplate<bpp::Node>* parseTree(const char *treefilename, bpp::Node **nodeWithMaxDistToFather, list<string> *missing_species, map<string,string> *ac2seq, list<string> * AcInTree) {
 	LOG(lTRACE3) << "Entering parseTree()";
 	bpp::TreeTemplate<bpp::Node> *tree;
	LOG(lINFO) << "Tree file name: " <<treefilename;
	if(!fileExists(treefilename)) {
		cerr << "ERROR: Cannot open file " <<treefilename << endl;
		exit(1);	
	}
  	  	
	bpp::Newick * ne = new bpp::Newick(false,false);
	ne->enableExtendedBootstrapProperty("label"); 
	
	try {
		tree = ne->read(treefilename); 
	}
	catch(exception e) {
		cerr << "ERROR: Error while parsing the tree.";
		cerr << e.what();
		exit(1);
	}
  		
	  	  	 
	LOG(lDEBUG) << "Tree has " << tree->getNumberOfNodes() << " nodes.";
	LOG(lDEBUG) << "Tree has " << tree->getNumberOfLeaves() << " leaves.";
	LOG(lTRACE) << "Root ID is " << tree->getRootId();
	if(tree->hasNodeName(tree->getRootId())) { 
		string rootname = tree->getNodeName(tree->getRootId());
		LOG(lDEBUG) << "Root node name is " << rootname;
	}
	/*else if(tree->hasBranchProperty(tree->getRootId(),"label")) { 
	* ignore inner node names now, see below
			bpp::Clonable* c = tree->getBranchProperty(tree->getRootId(),"label");
			string* name = (dynamic_cast<string *>(c));
			LOG(lDEBUG) << "Root node name is " << *name;
			tree->setNodeName(tree->getRootId(),*name);
	}*/
	else
  		LOG(lDEBUG) << "Root node has no name"; 

	
	double maxBranchLen = 0.0;
		
	vector<int> nodes = tree->getNodesId();
	vector<int>::iterator it;
	for(it = nodes.begin(); it != nodes.end(); it++) {
		int currid = *(it);
		bpp::Node* n = tree->getNode(currid);
		
		LOG(lTRACE2) << "Node #" << currid << " has degree " << n->degree();
		
		
		if(n->hasName()) { 
			LOG(lTRACE2) << "Node #" << currid << " has name " << n->getName() ;
			string currname = n->getName();
			// check if n is in alignment, if not then add to list of missing species
			if(n->isLeaf()) {
				if(ac2seq->count(currname) == 0) {
					missing_species->push_back(currname);
				} 
				else {
					// add node to list of acs that are in the alignment
					AcInTree->push_back(n->getName());
				}	
			}
		}
		/*else if(n->hasBranchProperty("label")) { // set names of inner nodes
		 * IGNORE inner node names now. so it's easier to delete all stuff from the tree that's not in the alignment
		 * bpp::Clonable* c = n->getBranchProperty("label");
			string* name = (dynamic_cast<string *>(c));
			LOG(lTRACE2) << "Node #" << currid << " has name " << *(name);
			n->setName(*(name)); // somewhat important line hidden here
		}*/
		else {
			LOG(lTRACE2) << "Node #" << currid << " has no name" ;
		}
		
		if(n->isLeaf()) {
			LOG(lTRACE2) << "Node #" << currid << " is a leaf";
			if(!n->hasName()) {  // Leave nodes need names, otherwise the tree sucks..
				cerr << "UNEXPECTED ERROR: No name on leaf node " << currid << endl;
				exit(1);
			}
		}
		else {
			LOG(lTRACE2) << "Node #" << currid << " is an inner node";
		}

			LOG(lTRACE3) <<  "Node # " << currid << " has address " << n;
		// who has the longest?
		if(n->hasDistanceToFather()) { 
			// check here if n is in missing species, if not then compare length, otherwise skip n
			bool isMissing = false;
			list<string>::iterator s;
			for(s = missing_species->begin(); s != missing_species->end(); s++) {
				if(n->hasName() && n->getName() == *(s))
					isMissing = true; 
			}
			if(!isMissing) {
				double distToFather = n->getDistanceToFather();
				LOG(lTRACE2) << "Node #" << currid << " has distance to father node of " << distToFather;
				if(distToFather>maxBranchLen) {
					LOG(lTRACE3) << "Node #" << currid << " this is longer than " << maxBranchLen;
					maxBranchLen = distToFather;
					*nodeWithMaxDistToFather = n;
					LOG(lTRACE3) << "nodeWithMaxDistToFather " << " has address " << nodeWithMaxDistToFather;
					
				}
			}
		}
	}
	LOG(lTRACE3) << "nodeWithMaxDistToFather points to " << *nodeWithMaxDistToFather;
	
	LOG(lDEBUG) << "Node with max distance to father #" << (*nodeWithMaxDistToFather)->getId() << " " << (((*nodeWithMaxDistToFather)->hasName()) ? (*nodeWithMaxDistToFather)->getName() : "") << ", father id = " <<(*nodeWithMaxDistToFather)->getFatherId();
		
	if(Log::globalLevel >= lDEBUG) {	 	 
		list<string>::iterator s;
		for(s = missing_species->begin(); s != missing_species->end(); s++) {
			LOG(lDEBUG) << "Not in alignment: " << *(s) ; 
		}
		for(s = AcInTree->begin(); s != AcInTree->end(); s++) {
			LOG(lDEBUG) << "In alignment: " << *(s) ; 
		}
	}
	
	
	LOG(lTRACE3) << "Leaving parseTree()";
	return tree;
	 	
}



void findPairs(const char *ss, const int len, map<int,int> *basepairs, char open, char close) {

	const char single = '.';
	const char gap = '-';
	stack<int> lager;

	for(int i = 0; i < len; i++) {

		// This is the left side of a base pair
		if(ss[i] == open) {
			lager.push(i);
		}
		else if(ss[i] == close) { // right side of a bp

			if(lager.empty()) {
				cerr << "ERROR: Secondary structure annotation is not well-formed. (Underflow at position " << i+1 <<")"<< endl;
				exit(1);	
			}
			int start = lager.top(); // get top element
			lager.pop(); // remove element.. stupid stack impl.
			
			basepairs->insert(make_pair(i, start));
			basepairs->insert(make_pair(start,i));
			LOG(lTRACE3) << "Base at position " << start << " is paired with base at position " << i+1;
		}
		else {
			if(ss[i] == single) {
				basepairs->insert(make_pair(i,-1));	
				LOG(lTRACE3) << "Base at position " << i+1 << " is not paired.";
			}
			else if(ss[i] == gap) {
				basepairs->insert(make_pair(i,-1));	
				LOG(lTRACE3) << "Base at position " << i+1 << " is a gap.";
			}
			else {
				//LOG(lWARN) << "Found character >>" << ss[i] << "<< at position " << i+1 << " in the secondary structure string. That might cause problems.. Let's see what happens.";
			}
		}
	}
	
	if(!lager.empty()) {
		cerr << "ERROR: Secondary structure annotation is not well-formed. (Overflow)" << endl;
		exit(1);		
	}

}


SubstitutionModel* getUnpairedModel(char * unpairedModelName, float kappa) {
	SubstitutionModel *m = NULL;
	if(strcmp(unpairedModelName,(char*)"hky85") == 0) {
		m =  new HKY85SubstitutionModel(kappa); 
	}
	else if(strcmp(unpairedModelName,(char*)"hky85gap") == 0) {
		m = new HKY85GapSubstitutionModel(kappa);
	}
	else if(strcmp(unpairedModelName,(char*)"EukSSUrRNA") == 0) {
		m = new EukSSUrRNASubstitutionModel();
	}
	else if(strcmp(unpairedModelName,(char*)"ProSSUrRNA") == 0) {
		m = new ProSSUrRNASubstitutionModel();
	}
	else {
		cout << "Couldnt find unpaired model " << unpairedModelName << endl;
		exit(1);
	}
	return m;
	
}

SubstitutionModel* getPairedModel(char * pairedModelName, float kappa) {
	SubstitutionModel *m = NULL;
	if(strcmp(pairedModelName,(char*)"hky85gap") == 0) {
		m = new PairedHKY85GapSubstitutionModel(kappa);
	}
	else if(strcmp(pairedModelName,(char*)"hky85") == 0) {
		m = new PairedHKY85SubstitutionModel(kappa);
	}
	else if(strcmp(pairedModelName,(char*)"EukSSUrRNA") == 0) {
		m = new PairedEukSSUrRNASubstitutionModel();
	}
	else if(strcmp(pairedModelName,(char*)"EukSSUrRNAGap") == 0) {
		PairedEukSSUrRNAGapSubstitutionModel * p = new PairedEukSSUrRNAGapSubstitutionModel();
		HKY85GapSubstitutionModel * h = new HKY85GapSubstitutionModel(kappa);
		p->setHKY85GapSubstitutionModel(h);
		m = p;
	}
	else if(strcmp(pairedModelName,(char*)"ProSSUrRNA") == 0) {
		m = new PairedProSSUrRNASubstitutionModel();
	}
	else {
		cout << "Couldnt find paired model " << pairedModelName << endl;
		exit(1);
	}
	
	return m;
}

bool hasNoMutation(int col, bool forceunpaired, map<string, string> & ac2seq, list<string> & AcInTree, map<int,int> & basepairs) {
  // 1. find first sequence which has a non-ambiguous char at that position 	
	char reference_base;
	list<string>::iterator s;
	for(s = AcInTree.begin(); s != AcInTree.end(); s++) {
		reference_base = (ac2seq[*s])[col];
		if(!isAmbiguous(reference_base)) {
			break;
		}
	}
	//2. iterate over all sequences again and find a non-ambiguous base which is different than reference
	for(s = AcInTree.begin(); s != AcInTree.end(); s++) {
		char c = (ac2seq[*s])[col];
		if(c != reference_base && !isAmbiguous(c))
			return false; 
	}
	
	if(!forceunpaired) {
		int pairing_col = basepairs[col];
		if(pairing_col > 0) {
			//1. as above
			for(s = AcInTree.begin(); s != AcInTree.end(); s++) {
				reference_base = (ac2seq[*s])[pairing_col];
				if(!isAmbiguous(reference_base)) {
					break;
				}
			}
			//2. as above
			for(s = AcInTree.begin(); s != AcInTree.end(); s++) {
				char c = (ac2seq[*s])[pairing_col];
				if(c != reference_base && !isAmbiguous(c))
					return false; 
			}
		}
	}
	LOG(lDEBUG) << "colum " << col << " has no mutations";
	return true;	
} 

bool hasOnlyGapOrN(int col, map<string, string> &ac2seq, list<string> &AcInTree) {
	list<string>::iterator s;
	for(s = AcInTree.begin(); s != AcInTree.end(); s++) {
		char c = (ac2seq[*s])[col];  
		if(!isAmbiguous(c) && c != '-')
			return false; 
	}
	return true;
}

bool hasGap(int col, map<string, string> &ac2seq) {
	 bool hasGap = false;
	 for(map<string,string>::iterator it = ac2seq.begin(); it != ac2seq.end(); ++it) {
  		
  		if(it->second[col] == '-') {
  			hasGap = true;
  			break;
  		}
	 }
	 return hasGap;
	
}

bool isAmbiguous(string & s) {
	bool ambiguous = false;
	// treat everything not in ACGTU as ambigous
	size_t found = s.find_first_of("BDEFHIJKLMNOPQRSVWXYZ?");
	if(found != string::npos) {
		ambiguous = true;
	}
	return ambiguous;
}

bool isAmbiguous(char & c) {
	string s = string(1,c);
	return isAmbiguous(s);
}

bool fileExists(const std::string& fileName) {
  return fileExists(fileName.c_str());
}

bool fileExists(const char *fileName) {
  std::fstream fin;
  fin.open(fileName,std::ios::in);
  if( fin.is_open() )
  {
    fin.close();
    return true;
  }
  fin.close();
  return false;
}


void printTree(bpp::Node * root, int depth=0) {
	if(Log::globalLevel < lTRACE) { return; }
	
	for(int i = 0; i <= depth; i++) {
		cout << "   " ;
	}
	cout << root->getId();
	if(root->hasName()) { cout << " (" <<root->getName()<<")"; }
	cout << endl;
	 
	for(unsigned int i = 0; i < root->getNumberOfSons(); i++) {
		bpp::Node * s = root->getSon(i);
		printTree(s,depth+1);
	}
	
	
}

// walk down the sons from the root and delete tree recursively bottom-up
// root node will persist
void deleteSubTree(bpp::Node* root) {
	LOG(lTRACE) << "Deleting node " << root->getId();
	if(root->getNumberOfSons() > 0) {
		for(unsigned int i = 0; i < root->getNumberOfSons(); i++) {
			bpp::Node * s = root->getSon(i);
			root->removeSon(i);
			deleteSubTree(s);
			delete s;
		}	
	}
	
	
}



