#include "../cmfinder/global.h"

char* all_species[] = {"hg18", "panTrol", "rheMac2", "rn4", "mm8", "oryCun1", 
		       "bosTau2", "canFam2","dasNov1","loxAfr1",
		       "echTel1", "monDom4", "galGal2", "xenTrol", 
		       "tetNig1", "fr1", "danRer3"};


char* encode_alphabet = "ABCDEFGHI";
char* pairs[]={"AU", "UA", "GC", "CG", "GU", "UG"};
int   npair = 6;
char encode_pair(char l, char r)
{
  int i;
  if (IsBasePair(l,r)){
    char pair[3];
    pair[2]='\0';
    pair[0] = l;
    pair[1] = r;
    for(i=0; i <  npair; i++){
      if (strcmp(pair, pairs[i])==0){
	return encode_alphabet[i];
      }
    }
  }
  else if (isgap(l) && isgap(r)){
    return encode_alphabet[npair];
  }
  else if (isgap(l) || isgap(r)){
    return encode_alphabet[npair+1];
  }
  else return encode_alphabet[npair+2];
}

int extract_column(MSA* msa, int l, int r, char* col)
{
  int i;
  int pair_count=0;
  for(i=0; i < msa->nseq; i++){
    if (IsBasePair(msa->aseq[i][l], msa->aseq[i][r]))      
      pair_count++;    
  }
  if(pair_count < msa->nseq * 0.5) return 0;
  for(i=0; i < msa->nseq; i++){
    col[i] = encode_pair(msa->aseq[i][l], msa->aseq[i][r]);    
  }
  return 1;
}



int main(int argc, char* argv[])
{
  char* ali_file = argv[1];
  int   format =  MSAFILE_STOCKHOLM;
  MSAFILE     *afp = NULL;        /* file handle of initial alignment */
  MSA*   msa;
  char** encoded_alignment = NULL;
  char*  col;
  int    alloc_block = 100;
  int    alloc_len = 0;
  int    alen=0;
  char** sqname;
  int    i,j,k;
  int    nseq=0;
  if ((afp = MSAFileOpen(ali_file, format, NULL)) == NULL)
    Die("Alignment file %s could not be opened for reading", ali_file);
  while ((msa = MSAFileRead(afp)) != NULL){
    if (encoded_alignment == NULL){
      encoded_alignment = (char**)MallocOrDie(sizeof(char*) * msa->nseq);
      sqname = (char**)MallocOrDie(sizeof(char*) * msa->nseq);
      for(i=0; i < msa->nseq; i++){
	encoded_alignment[i] = (char*)MallocOrDie(sizeof(char) * alloc_block);	
	sqname[i] = (char*)MallocOrDie(sizeof(char) * (strlen(msa->sqname[i]) + 1));
	strcpy(sqname[i], msa->sqname[i]);	 	
      }
      nseq = msa->nseq;
      alloc_len = alloc_block;
      alen = 0;
      col = (char*) MallocOrDie(sizeof(char) * msa->nseq);      
    }
    if (msa->nseq != nseq) 
      Die("%s file with %d seq (other $d seq)\n", msa->name, msa->nseq, nseq);    
    int* pair_table = GetPairtable(msa->ss_cons);
    for(i=0; i < msa->alen; i++){
      if (pair_table[i] > i){
	if (extract_column(msa, i, pair_table[i], col)){
	  if (alen + 1 >= alloc_len ){
	    alloc_len += alloc_block;
	    for(k=0; k < nseq; k++){
	      encoded_alignment[k] = realloc(encoded_alignment[k], sizeof(char) * alloc_len);	  
	    }
	  }
	  for(k = 0; k < nseq; k++){
	    encoded_alignment[k][alen] = col[k];
	  }
	  alen++;
	}
      }
    }
    MSAFree(msa);
  }
  for(k = 0; k < nseq; k++){
    encoded_alignment[k][alen] = '\0';
    printf(">%s\n", sqname[k]);
    printf("%s\n", encoded_alignment[k]);    
    free(encoded_alignment[k]);
    free(sqname[k]);
  }  
  free(sqname);
  free(encoded_alignment);
  MSAFileClose(afp);
}
