import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import options.Options;
import options.Options.Multiplicity;
import options.Options.Separator;
import src.GlobalParameters;
import src.MakeMultiple;
import src.PairAlignment;
import src.RNAfold;
import src.Wpgma;

public class FoldalignM_McCaskill {
	/**
	 * @param args
	 * @throws Exception 
	 */
	@SuppressWarnings("unchecked")
	public static void main(String[] args) throws Exception {
		Options opt = new Options(args,2);
		opt.getSet().addOption("global",Multiplicity.ZERO_OR_ONE);
		opt.getSet().addOption("fast",Multiplicity.ZERO_OR_ONE);
		opt.getSet().addOption("no_pruning",Multiplicity.ZERO_OR_ONE);
		opt.getSet().addOption("col",Multiplicity.ZERO_OR_ONE);
		//opt.getSet().addOption("local",Multiplicity.ZERO_OR_ONE);
		opt.getSet().addOption("nolog", Multiplicity.ZERO_OR_ONE);
		opt.getSet().addOption("plfold",Multiplicity.ZERO_OR_ONE);
		opt.getSet().addOption("delta", Separator.BLANK, Multiplicity.ZERO_OR_ONE);
		opt.getSet().addOption("gap", Separator.BLANK, Multiplicity.ZERO_OR_ONE);
		opt.getSet().addOption("seqw", Separator.BLANK, Multiplicity.ZERO_OR_ONE);
		opt.getSet().addOption("consensus", Separator.BLANK, Multiplicity.ZERO_OR_ONE);
		//opt.getSet().addOption("t", Separator.EQUALS, Multiplicity.ZERO_OR_ONE);
		
		if (!opt.check()) {
			// Print usage hints
			System.err.println("\n\nTo run this program you need to provide it with a fasta file and a output name!");
			System.err.println("Usage: java FoldalignM_McCaskill [-fast] [-delta value] [-gap value] [-seqw value] [-nolog] [no_pruning] <input> <output>");
			System.err.println("The default values are:");
			System.err.println("\t-fast: Fast is global alignment that uses more memory but is faster, default is not to use this.");
			System.err.println("\t-delta: The maximum allowed length difference between any given pair, default is max(10,length difference)");
			System.err.println("\t-gap: The gap cost, default is -300");
			System.err.println("\t-seqw: The score for a sequence match, default is 5");
			System.err.println("\t-consensus: The consensus prob. matrix cutoff, default is 4 (i.e. #sequences/4)");
			System.err.println("\t-nolog: Set this if you don't want to use log-odds score, default is to use them");
			System.err.println("\t-no_pruning: Default is to prune away low scoring cells");
			System.err.println("\t-col: Also output the alignment in column format");
			System.err.println();
			System.exit(1);
		}
		
		//final GlobalParameters globalP = new GlobalParameters();
		final boolean nolog = GlobalParameters.nolog;
		
		/*if (opt.getSet().isSet("local")) {
		 GlobalParameters.mode = 1;
		 GlobalParameters.delta = 25;
		 }else*/
		if(opt.getSet().isSet("fast")){
			GlobalParameters.mode = 2;
		}
		if(opt.getSet().isSet("no_pruning")){
			GlobalParameters.noprune = true;
		}	
		if(opt.getSet().isSet("col")){
			GlobalParameters.col = true;
		}	
		if (opt.getSet().isSet("delta")) {
			GlobalParameters.delta = Short.parseShort(opt.getSet().getOption("delta").getResultValue(0));			
		}
		if (opt.getSet().isSet("gap")) {
			GlobalParameters.gap = Short.parseShort(opt.getSet().getOption("gap").getResultValue(0));			
		}
		if (opt.getSet().isSet("seqw")) {
			GlobalParameters.seqw = Short.parseShort(opt.getSet().getOption("seqw").getResultValue(0));			
		}
		if (opt.getSet().isSet("consensus")) {
		    GlobalParameters.consensus = Short.parseShort(opt.getSet().getOption("consensus").getResultValue(0));                   
                }

		if (opt.getSet().isSet("nolog")) {
			GlobalParameters.nolog = true;			
		}
		if (opt.getSet().isSet("plfold")) {
			GlobalParameters.plfold = true;			
		}
		String line;
		String name="", seq, temp;
		Pattern p,p2;
		HashMap<String,String> fasta = new HashMap<String,String>();
		//Read a file with all the pairwise scores
		final String fastaFile = opt.getSet().getData().get(0);
		final String outName = opt.getSet().getData().get(1);
		
		final InputStream input = new FileInputStream(fastaFile);
		final BufferedReader fil = new BufferedReader(new InputStreamReader(input));
		Matcher match,match2;
		p = Pattern.compile("^>(.*?)$");
		p2 = Pattern.compile("^(.*)$");
		
		/*
		 * Process the fasta file into the ID,Sequence @fasta HashMap
		 */
		while((line = fil.readLine()) != null){
			match = p.matcher(line);
			match2 = p2.matcher(line);
			if(match.find()){
				name = match.group(1);
			}else if(match2.find()){
				temp = match2.group(1);
				if(fasta.containsKey(name)){
					seq = fasta.get(name);
					seq += temp;
				}else{
					seq = temp;
				}
				fasta.put(name,seq);
			}
		}
		
		//Make the relevant directories if they do not exist
		String mkdir = "";
		Process mv;
		if(!exists(".fold_matrix")){
			mkdir = "mkdir .fold_matrix";
			mv = Runtime.getRuntime().exec(mkdir);
			mv.waitFor();
		}
		if(!exists(".fold_cons")){
			mkdir = "mkdir .fold_cons";
			mv = Runtime.getRuntime().exec(mkdir);
			mv.waitFor();
		}
		if(!exists(".fold_rnafold")){
			mkdir = "mkdir .fold_rnafold";
			mv = Runtime.getRuntime().exec(mkdir);
			mv.waitFor();
		}
		if(!exists(".fold_out")){
			mkdir = "mkdir .fold_out";
			mv = Runtime.getRuntime().exec(mkdir);
			mv.waitFor();
		}
		if(!exists(".fold_matrix/"+outName)){
			mkdir = "mkdir .fold_matrix/"+outName;
			mv = Runtime.getRuntime().exec(mkdir);
			mv.waitFor();
		}

		/*
		 * Cluster the lengths
		 */		
		Set<String> set= fasta.keySet();
		Iterator<String> iter = set.iterator();
		int NN = set.size();
		GlobalParameters.allSeqs = NN;
		String[] names = new String[NN];
		String[] seqs = new String[NN];
		final int[] lengths = new int[NN];
		final int[] clusters = new int[NN];
		int z = 0;
		while(iter.hasNext()){
			name = iter.next();
			names[z] = name;
			seq = fasta.get(name);
			seqs[z] = seq;
			if(seq.length() > 150){System.err.println("Some/one of the sequences are too long"); System.exit(1);}
			lengths[z] = seq.length();
			z++;
		}
		int currentCluster = 0;
		final int[] min = new int[NN];
		final int[] max=new int[NN];
		min[0] = lengths[0]; max[0] = lengths[0];
		int length;
		boolean found;
		for(int q=1; q<NN; q++){
			found = false;
			length = lengths[q];
			for(int qq=0;qq<=currentCluster; qq++){
				//System.out.println("Cluster: "+qq+" Length: "+length+" Min: "+min[qq]+" Max: "+max[qq]);
				if( (Math.abs(length-(Integer)min[qq]) < 50) && (Math.abs(length-(Integer)max[qq]) < 50)){
					clusters[q] = qq;
					if(length < (Integer)min[qq]){min[qq] =length;}
					if(length > (Integer)max[qq]){max[qq] =length;}
					found = true;
					break;
				}
			}
			if(!found){
			    System.err.println("The length difference between some of the sequences is too large"); System.exit(1);
			    currentCluster++;
			    clusters[q] = currentCluster;
			    min[currentCluster]=length;
			    max[currentCluster]=length;
			}
		}
		
		final int numClusters = currentCluster+1;
		final HashMap<String,String>[] fastas = new HashMap[numClusters];
		
		for(int q=0; q<numClusters; q++){
			fastas[q] = new HashMap<String,String>();
		}
		//System.out.println("Clusters: "+numClusters);
		for(int q=0; q<numClusters; q++){
			for(int qq=0; qq<NN; qq++){
				name = names[qq];
				seq = seqs[qq];
				if(clusters[qq] == q){
					fastas[q].put(name,seq);
					//System.out.println("Cluster: "+clusters[qq]+" Name:"+name+"\tLength:"+lengths[qq]);
				}
			}
		}
		
		for(int q=0; q<numClusters; q++){
			if(q==0){
				if(exists(".fold_out/"+outName+".original.out")){
					mkdir = "rm .fold_out/"+outName+".original.out";
					mv = Runtime.getRuntime().exec(mkdir);
					mv.waitFor();
				}
				if(exists(".fold_out/"+outName+".refined.out")){
					mkdir = "rm .fold_out/"+outName+".refined.out";
					mv = Runtime.getRuntime().exec(mkdir);
					mv.waitFor();
				}
			}
			fasta = fastas[q];
			RNAfold rnafold = new RNAfold(fasta,nolog,".fold_matrix/"+outName+"/");
			NN = rnafold.getNN();
			if(NN>1){
				double[][] SM = rnafold.getSM();
				names = rnafold.getNames();
				String[] allNames = rnafold.getAllNames();
				//Make a WPGMA guide tree to guide the multiple alignment
				final Wpgma guide = new Wpgma(NN, SM, ".fold_matrix/"+outName+"/");
				final ArrayList<PairAlignment> pas = guide.getPairAlignments();
				//String[] align;
				//The last pairwise alignment is the multiple alignment
				final PairAlignment last = pas.get(pas.size()-1);
				new MakeMultiple(last,allNames,names, pas,NN, ".fold_matrix/"+outName+"/",outName);
			}else{
				System.out.println("Only one sequence in cluster "+q);
			}
		}
	}
	
	
	private static boolean exists (String filename) {
		return exists (filename, new File ("."));
	}
	
	private static boolean exists (String filename, File dir) {
		boolean exists = false;
		
		if (new File (dir, filename).exists ()) {
			exists = true;
		} else {
			File[] subdirs = dir.listFiles ();
			
			int i = 0;
			int n = (subdirs == null) ? 0 : subdirs.length;
			
			while ((i < n) && ! exists) {
				File subdir = subdirs[i];
				
				if (subdir.isDirectory ()) {
					exists = exists (filename, subdir);
				}
				
				i ++;
			}
		}
		return exists;
	}
}

