#include <iostream>
#include <fstream>
#include <string>
#include <stdio.h>
#include <map>
#include <vector>
#include <algorithm> // for transform()
#include <cctype> // for toupper()

#include <Utils/StringTokenizer.h>

#include "StockholmReader.h"
#include "Log.h"

using namespace std;
	
void ptr::StockholmReader::read(string alnfilename, map<string, string> & ac2seq, map<string, string> & ac2ss, string & SS_cons) { // call by ref SS_cons
		 
  	 // read stockholm file
  	 // input stream
  	 ifstream alnfile;
  	 
  	 string line;
  	 string first_seq_name = "";
	 SS_cons = "";

  	 
  	 alnfile.open(alnfilename.c_str());
  	 if(!alnfile.is_open()) {
  		 cerr << "Could not open file " << alnfilename << endl;
  		 exit(1);  		  
  	 }
  	 
  	 while(!alnfile.eof()) {
  		 getline(alnfile,line);
  	     line =  trim(line," \t\n");
  	       		  	       		 	
  		 if(blankline(line) or '/' == line[0]) { // empty lines or // end line
  			 // do nothing
  		 }
  		 else if('#' == line[0]) { // line starts with a #
  			size_t found;
  			
  			found = line.find("#=GC SS_cons"); // look for SS concsensus line and save
  			if(found != string::npos) {
  				
  				// split line
  				//erstes token sollte #=GC, zweites token SS_cons und drittes (und folgende) der (..) string
  				
  				bpp::StringTokenizer st(line);
				st.nextToken();
				st.nextToken();
				while(st.hasMoreToken()) {
					SS_cons += st.nextToken();				
				}
				  				
  			}
  			
  			found = line.find("#=GR");  // SS structure per sequence
  			if(found != string::npos) {
  				
  				// split line into ac and structure string
  				// zweites token is AC, viertes und folgende token ist der ss string
  				bpp::StringTokenizer st(line);
				st.nextToken();
				string ac = st.nextToken();
				st.nextToken();
				string ss = st.nextToken();	
				while(st.hasMoreToken()) {
					ss = st.nextToken();				
				}
  				
  				
  				if(ac2ss.count(ac) == 0) {  				
  					ac2ss.insert(make_pair(ac,ss));
  				}
  				else {
  					ac2ss[ac] += ss;
  				}
  				
  			}
  		} 
  		else { // non-# lines
  			
  			// sollten in der form    NAME  SEQUENZ sein
  			bpp::StringTokenizer st(line);
  			if(st.numberOfRemainingTokens() >= 2) {
  				// tokenisieren
  				// erstes token ist die AC, zweites und weitere token ist die sequenz
  				
  				string ac = st.nextToken();
  				if(first_seq_name.size() == 0) {
  					first_seq_name = ac;
  				}
  				
  				string seq = st.nextToken();
  				 				
  				while(st.hasMoreToken()) {
  					seq += st.nextToken();
  				}
  				  				  				
	 			//string species; 			
	 			//ac auseinandernehmen und species rauskramen, bzw konvention: letzte xxx_xxx substr ist die species 			
	 			/*if(ac.length() > 6) {
	 				species = ac.substr(ac.length()-7);
	 			}
	 			else {
	 				species = ac;	
	 			}*/
	 			// lowercase that thing
	 			//transform(species.begin(),species.end(),species.begin(),static_cast < int(*)(int) >(tolower));
	 			//ac2spec->insert(make_pair(ac,species));
	  			
  				StringToUpper(seq);
  				// gaps should be denoted as "-"
  				replace(seq.begin(), seq.end(), '.', '-');
  				
  				if(ac2seq.count(ac) == 0) {  				
  					ac2seq.insert(make_pair(ac,seq));
  				}
  				else {
  					ac2seq[ac] += seq;
  				}
	 		}
	 		else { // not enough tokens
	 			cerr << "ERROR: Alignment file: Could not split this line into name and sequence: " << line << endl;
  		 		exit(1); 
	 		}
  			
  		}
  		 
  	 }
 	 
  	 alnfile.close();

  	 // check that for each seq there is a structure in the map
  	 // also all sequences and structures have to be of the same lengths
  	/* if(!(ac2seq->size() == ac2ss->size())) {
  		 cerr << "ERROR: Number of sequences (" << ac2seq->size() << ") does not match number of structure strings (" << ac2ss->size() << ")" <<endl;
  		 exit(1);
  	 }
  	*/ 
  	//size_t found;
  	/* found = SS_cons.find_first_not_of(".()<>");
  	if(found != string::npos) {
    	cerr << "Found character " << SS_cons[found] << " at position "<< int(found) << " in secondary structure SS_cons line, while only the characters ., (, ), <, ann > are allowed" <<endl;
  		exit(1);
  	}*/	 
  	
  	//Replace all () with <>
  	replace(SS_cons.begin(), SS_cons.end(), '(', '<');
  	replace(SS_cons.begin(), SS_cons.end(), ')', '>');
  	  	 
  	for(map<string,string>::iterator it = ac2seq.begin(); it != ac2seq.end(); ++it) {
  		 //cout << it->first << " = " << it->second << endl;
  		 //cout << it->first << " = " << ac2ss[it->first] << endl;
  	/*	if(ac2ss->count(it->first) != 1) {
  			cerr << "ERROR: No structure string was found for accession number " << it->first << endl;
  			exit(1);
  		}
  	*/	
  	//	cout << it->first << " " <<SS_cons->size()<<" "<<(it->second).size() <<" "<<((*ac2ss)[it->first]).size()<<endl;
  		//if((it->second).size() != SS_cons->size() || (*ac2ss)[it->first].size() != SS_cons->size()) {
  		if((it->second).size() != ac2seq[first_seq_name].size()) {
  			cerr << "ERROR: The length of sequence with name \"" << it->first << "\" does not match the length of the first sequence in the alignment." << endl;
  			exit(1);
  		}
  		
  		//size_t found = (it->second).find_first_not_of("ACGTUN-");
  		size_t found = (it->second).find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ-");
  		if(found != string::npos) {
    		cerr << "ERROR: Found character " << (it->second)[found] << " at position "<< int(found) << "  in sequence with name " << it->first <<", while only the characters in {A C G T U N . -} are allowed" <<endl;
  			exit(1);
  		}	    
	 
  	 }
  	 
  	if(SS_cons.size() > 0) {
  		if(SS_cons.size() != ac2seq[first_seq_name].size()) {
  			cerr << "ERROR: The length of the SS_cons line does not match the length of the first sequence in the alignment." << endl;
  			exit(1);
  		}
  	}
  	else {
  		SS_cons = ".";
  		for(unsigned int i = 0; i < ac2seq[first_seq_name].size() -1; i++) { 
  			SS_cons += ".";
  		}
  	}	
  		 
 
}

/**
 * True iff s is empty or only contains space and/or TABs.
 */
bool ptr::StockholmReader::blankline(const std::string& s)
{
    return s.find_first_not_of(" \t")==std::string::npos;

} 


void ptr::StockholmReader::StringToUpper(string & strToConvert)
{//change each element of the string to upper case
   for(unsigned int i=0;i<strToConvert.length();i++)
   {
      strToConvert[i] = toupper(strToConvert[i]);
   }
}

void ptr::StockholmReader::StringToLower(string & strToConvert)
{//change each element of the string to lower case
   for(unsigned int i=0;i<strToConvert.length();i++)
   {
      strToConvert[i] = tolower(strToConvert[i]);
   }
}

std::string ptr::StockholmReader::trim(std::string& s,const std::string& drop = " ")
{
 std::string r=s.erase(s.find_last_not_of(drop)+1);
 return r.erase(0,r.find_first_not_of(drop));
}

