#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <algorithm>

/*
This program takes a score file and finds the best group number.

The program takes the mean of the maximum score and the score of the
grouping with individual sequences.

Then it finds the line from the grouping with individual sequences to
the first grouping that exceeds the mean.

The slope of the line is inversed and lowered onto the plot from
above. The first point that it hits is the best grouping.

This grouping has less (or the same) groups than the maximum scoring
grouping. This also means that the score is lower.

An adjustment factor to the slope of the line can be included as a
second argument. A high value tends to choose fewer groups, while a
lower value tends to choose more (until the maximum scoreing group is
reached).

For adjustment factor = 0, the maximum scoring group is chosen, since
the slope is 0.
*/

using namespace std;

int main(int argc, char **argv)
{
  string s;
  ifstream f;
  istringstream *sstr;
  double score;
  string group;
  vector< double > scorelist;
  vector< string > grouplist;
  double slope;
  double mean;
  double max;
  int max_i;
  int n;
  double adjust;

  adjust = 1;

  if (argc == 3) {
    adjust = atof(argv[2]);
  }
  else if (argc != 2) {
    cerr << "usage: findstate <score_file> [<adjustment factor>]" << endl;
    exit(1);
  }

  // Read state
  f.open(argv[1]);
  if (!f.good()) {
    cerr << "findstate: error in opening state file: " << argv[1] << endl;
    exit(1);
  }

  while(getline(f, s) != 0) {
    // Read line
    sstr = new istringstream(s);

    if (!(*sstr >> group)) {
      cerr << "findstate: error in reading from state file" << endl;
      exit(1);
    }
    if (!(*sstr >> score)) {
      cerr << "findstate: error in reading from state file" << endl;
      exit(1);
    }

    grouplist.push_back(group);
    scorelist.push_back(score);
  }

  f.close();


  // Find maximum score
  max = scorelist[0];

  for (int i = 0; i < scorelist.size(); i++)
    if (scorelist[i] > max)
      max = scorelist[i];

  // Mean of maximum and first score
  mean = (max+scorelist[0])/2;

  // Find first score over the mean
  for (int i = 0; i < scorelist.size(); i++)
    if (scorelist[i] > mean) {
      n = i;
      break;
    }

  // The slope of the curve from the start to the mean score
  slope = adjust*(scorelist[n]-scorelist[0])/n;

  max = scorelist[0];
  max_i = 0;

  for (int i = 0; i < scorelist.size(); i++) {
    double d = scorelist[i]+slope*i;
    if (d > max) {
      max = d;
      max_i = i;
    }
  }

  cout << grouplist[max_i] << endl;
    
  return 0;
}
