/*
 *  SARSE, Semi-Automated RNA Sequence Editor. Copyright (C) 2004 Allan
 *  Lind-Thomsen This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or (at your
 *  option) any later version. This program is distributed in the hope that it
 *  will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
 *  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 *  Public License for more details. You should have received a copy of the GNU
 *  General Public License along with this program; if not, write to the Free
 *  Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 *  02111-1307, USA.
 */
package dk.kvl.alignmenttools.io;

import java.io.*;
import dk.kvl.alignmenttools.*;
import dk.kvl.sequencetools.*;
import java.util.regex.*;

/**
 *  Description of the Class
 *
 * @author     allan
 * @created    February 19, 2004
 */
public class ColReader extends BufferedReader implements AlignmentReader
{
    private Alignment alignment;

    /**
     *  Constructor for the ColReader object
     *
     * @param  fileName                   Description of the Parameter
     * @exception  FileNotFoundException  Description of the Exception
     */
    public ColReader(File fileName) throws FileNotFoundException
    {
        super(new FileReader(fileName));
        alignment = new Alignment();
        readHeader();
    }


    /**
     *  Constructor for the ColReader object
     *
     * @param  fileName                   Description of the Parameter
     * @exception  FileNotFoundException  Description of the Exception
     */
    public ColReader(String fileName) throws FileNotFoundException
    {
        this(new File(fileName));
    }


    /**
     *  Description of the Method
     */
    public void readHeader()
    {
        String line = null;
        StringBuffer fileHeader = new StringBuffer();
        try
        {
            line = readLine();
            if (line != null)
            {
                while (!line.startsWith("; =========="))
                {
                    //Header lines Starts with "; " that must be removed
                    if (line.length() > 2)
                    {
                        fileHeader.append(line.substring(2) + "\n");
                    }
                    else
                    {
                        fileHeader.append("\n");
                    }
                    line = readLine();
                }
                alignment.setHeader(fileHeader);
                readSequences(line);
            }
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }
    }


    /**
     *  Description of the Method
     *
     * @param  line  Description of the Parameter
     */
    public void readSequences(String line)
    {
        int number = 1;
        //regex patterns
        Pattern typePattern = Pattern.compile("^; *TYPE");
        Pattern pairingmaskPattern = Pattern.compile("[Pp][Aa][Ii][Rr][Ii][Nn][Gg][Mm][Aa][Ss][Kk]");
        Pattern colPattern = Pattern.compile("^; *[Cc][Oo][Ll]");
        Pattern entryPattern = Pattern.compile("^; *[Ee][Nn][Tt][Rr][Yy]");
        Pattern wordPattern = Pattern.compile("\\S+");
        Pattern numberPattern = Pattern.compile("\\d+");
        Pattern dnaPattern = Pattern.compile("[Dd][Nn][Aa]");
        Pattern rnaPattern = Pattern.compile("[Rr][Nn][Aa]");
        Pattern arbitraryPattern = Pattern.compile("[Aa][Rr][Bb][Ii][Tt][Rr][Aa][Rr][Yy]");
        Pattern treePattern = Pattern.compile("[Tt][Rr][Ee][Ee]");
        int counter = 1;
        try
        {
            Sequence seq = null;
            while (line != null)
            {
             
                //sequenceheader
                while (line.charAt(0) == ';')
                {
                  //System.out.println(line);
                    if (typePattern.matcher(line).find())
                    {
                        //TYPE must be first line according to col-format documentation
                        if (pairingmaskPattern.matcher(line).find())
                        {
                            seq = new PairingMask();
                        }
                        else if (treePattern.matcher(line).find())
                        {
                            seq = new Sequence(SequenceAlphabet.ARBITRARY);
                        }
                        else if(arbitraryPattern.matcher(line).find())
                        {
                        	seq = new Sequence(SequenceAlphabet.ARBITRARY);
                        }
                        else
                        {
                            int alphabet = SequenceAlphabet.RNAALPHABET;
                            seq = new Sequence(alphabet);
                        }
                    }
                    else if (colPattern.matcher(line).find())
                    {
                        Matcher m = numberPattern.matcher(line);
                        m.find();
                        int end = m.end();
                        int colNumber = Integer.parseInt(line.substring(m.start(), end));
                        m = wordPattern.matcher(line);
                        m.find(end);
                        String colLabel = line.substring(m.start(), m.end());
                        seq.addColumnLabel(colLabel, colNumber);
                    }
                    else if (entryPattern.matcher(line).find())
                    {
                        Matcher m = entryPattern.matcher(line);
                        m.find();
                        int end = m.end();
                        m = wordPattern.matcher(line);
                        m.find(end);
                        seq.setLabel(line.substring(m.start(), m.end()));
                    }
                    else if (seq != null)
                    {
                        //other informations
                        if (!line.startsWith("; ----------"))
                        {
                            seq.addInformation(line.substring(2));
                        }
                    }
                    line = readLine();
                }
                //System.out.println("test");
                seq.prepareForColumns();
                //continue with the sequence columns
                do
                {
                    seq.addColumns(line);
                    line = readLine();
                } while (!line.startsWith(";"));
                int[] pairings = seq.finalProcessing();
                //seq.setPairings(pairings);
//              for(int i = 0;i<pairings.length;i++)
//              {
//              System.out.print(pairings[i]+",");
//              }
//              System.out.print("\n");
                if (seq instanceof dk.kvl.sequencetools.PairingMask)
                {
                    alignment.addPairingMask((PairingMask)seq);
                }
                else// if (seq.getType() == SequenceAlphabet.RNAALPHABET)
                {
                    seq.setNumber(number);
                    number++;
                    if (alignment.getSequence(seq.getLabel()) == null)
                    {
                    	alignment.addSequence(seq);
                    }
                    else
                    {
                        //pairingmask assumed first sequence
                        //System.out.println("Pairingmask: "+seq.getLabel());
                    }
                }
//                else
//                {
//                    // handle other alphabet types here   
//                }
                /* This was an artefact from the early days when pfold results was supposed to be processed to optain a pairingmask 
                if(pairings != null && alignment.getPairingMask() == null)
                {
                    PairingMask p = new PairingMask("Pairingmask", pairings, SequenceAlphabet.UNDEFINED, true);
                    newStructure = true;
                    alignment.addPairingMask(p);
                }*/
                line = readLine();
                seq = null;
            }
            //alignment.prepareAlignment();
            
            alignment.getSequences().nextElement();
            /*if(newStructure)
            {
                alignment.fitCases();
            }*/
        }
        catch (Exception e)
        {
            //e.printStackTrace();
        	//TODO
        }
    }


    /**
     *  Gets the alignment attribute of the ColReader object
     *
     * @return    The alignment value
     */
    public Alignment getAlignment()
    {
//        if (alignment.getPairingMask() != null)
//        {
//            alignment.updatePairings();
//        }
        return alignment;
    }
}

