//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Sun Dec 21 05:13:34 PST 2008
// Last Modified: Mon May 18 11:42:45 PDT 2009
// Filename:      ...sig/examples/all/cherryc.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/humdrum/cherryc.cpp
// Syntax:        C++; museinfo
//
// Description:   Performance correlation comparison
//

#include "humdrum.h"
#include "MidiFile.h"

#include <math.h>
#include <iomanip>

// function declarations:
void     checkOptions(Options& opts, int argc, char** argv);
void     example(void);
void     usage(const char* command);
void     getSequences(Array<Array<double> >& performances, 
                                 Array<Array<char> >& names, 
                                 HumdrumFile& infile);
int      compareSequences(Array<double>& a, Array<double>& b, 
		                 int ind, int len);
double   pearsonCorrelation(int size, double* x, double* y);
double   pearsonCorrelationHole(int size, double* x, double* y, int ignore);
double   getMean(Array<double>& data);
double   getSampleSD(double mean, Array<double>& data);
void     removeIndex(Array<double>& a, Array<double>& b, int best);
double   model(double x);
double   modelOld(double x);
double   getDifference(Array<double>& data, Array<double>& models, 
                                 int preexclude, int exclude, double parameter);
double   getCorrected(Array<double>& data, Array<double>& models, 
                                 int preexclude, int exclude);
double   getParabolicMinimum(double x1, double y1, double x2, double y2,
                                 double x3, double y3);
void     extractNames(Array<Array<char> >& names, 
                                 HumdrumRecord& record);
double   adjustedCorrelation(Array<double> a, Array<double> b);
double   adjustedCorrelationNew(Array<double> a, Array<double> b, int count);
void     printCorrelations(Array<Array<double> >& corr, 
                                 Array<Array<char> >& names);
void     fixLocations(Array<int>& locations);
double   adjustedCorrelationSigmoid(Array<double> a, Array<double> b);
double   sigmoid(double value);

// User interface variables:
Options   options;
int       verboseQ   = 0;
int       mmaQ       = 1;
int       exclude    = 5;
int       preexclude = 5;
int       printSig   = 0;  // used with -s option
double    sigsens    = 1.0;
int       printAdj   = 0;  // used with -a option
int       printNew   = 0;  // used with -n option
int       newcount   = 20; // used with -n option

//////////////////////////////////////////////////////////////////////////

int main(int argc, char** argv) {

   // process the command-line options
   checkOptions(options, argc, argv);

   HumdrumFile infile;
   infile.read(options.getArg(1));

   Array<Array<char> >   names;         // name/year of the performer
   Array<Array<double> > performances;  // data for each performer

   getSequences(performances, names, infile);

   Array<Array<double> > rawcorr;
   Array<Array<double> > adjcorr;

   rawcorr.setSize(performances.getSize());
   adjcorr.setSize(performances.getSize());
   int i, j;
   for (i=0; i<performances.getSize(); i++) {
      for (j=0; j<performances.getSize(); j++) {
         rawcorr[i].setSize(performances.getSize());
         adjcorr[i].setSize(performances.getSize());
         rawcorr[i].setAll(-1.0);
         adjcorr[i].setAll(-1.0);
      }
   }

   double corr;
   if (printSig) {

      // calculate sigmoid-scaled correlations
      for (i=0; i<performances.getSize(); i++) {
         adjcorr[i][i] = 1.0;
         for (j=i+1; j<performances.getSize(); j++) {
            corr = adjustedCorrelationSigmoid(performances[i], performances[j]);
            adjcorr[i][j] = corr;
            adjcorr[j][i] = corr;
         }
      }

      cout << "!! SIGMOID CORRELATIONS:" << endl;
      printCorrelations(adjcorr, names);


   } else if (printAdj) {

      // calculate adjusted correlations
      for (i=0; i<performances.getSize(); i++) {
         adjcorr[i][i] = 1.0;
         for (j=i+1; j<performances.getSize(); j++) {
            cerr << "Calculating " << i << "," << j << "..." << endl;
            corr = adjustedCorrelation(performances[i], performances[j]);
            adjcorr[i][j] = corr;
            adjcorr[j][i] = corr;
         }
      }

      cout << "!! ADJUSTED CORRELATIONS:" << endl;
      printCorrelations(adjcorr, names);

   } else if (printNew) {

      // calculate adjusted correlations, new model
      for (i=0; i<performances.getSize(); i++) {
         adjcorr[i][i] = 1.0;
         for (j=i+1; j<performances.getSize(); j++) {
            cerr << "Calculating " << i << "," << j << "..." << endl;
            corr = adjustedCorrelationNew(performances[i], 
                  performances[j], newcount);
            adjcorr[i][j] = corr;
            adjcorr[j][i] = corr;
         }
      }

      cout << "!! ADJUSTED CORRELATIONS:" << endl;
      printCorrelations(adjcorr, names);

   } else {

      // calculate raw correlations
      for (i=0; i<performances.getSize(); i++) {
         rawcorr[i][i] = 1.0;
         for (j=i+1; j<performances.getSize(); j++) {
            corr = pearsonCorrelation(performances[i].getSize(), 
                  performances[i].getBase(), performances[j].getBase());
            rawcorr[i][j] = corr;
            rawcorr[j][i] = corr;
         }
      }

      cout << "!! RAW CORRELATIONS:" << endl;
      printCorrelations(rawcorr, names);

   }

   return 0;
}



//////////////////////////////
//
// printCorrelations --
//

void printCorrelations(Array<Array<double> >& corr, 
      Array<Array<char> >& names) {

   int i, j;

   cout << "**name";
   for (i=0; i<names.getSize(); i++) {
      cout << "\t**corr";
   }
   cout << endl;

   cout << "!";
   for (i=0; i<names.getSize(); i++) {
      cout << "\t!" << names[i].getBase();
   }
   cout << endl;

   for (i=0; i<corr.getSize(); i++) {
      cout << names[i].getBase();
      for (j=0; j<corr[i].getSize(); j++) {
         cout << "\t" << corr[i][j];
      }
      cout << endl;
   }

   cout << "*-";
   for (i=0; i<names.getSize(); i++) {
      cout << "\t*-";
   }
   cout << endl;


}


//////////////////////////////////////////////////////////////////////////




//////////////////////////////
//
// adjustedCorrelation --
//

double adjustedCorrelation(Array a, Array b) {
   int i, best;
   int len = a.getSize();

   Array<double> data;
   data.setSize(a.getSize());
   data.setSize(0);

   Array<double> models;
   models.setSize(a.getSize());
   models.setAll(0.0);
   models.allowGrowth(0);

   for (i=0; i<models.getSize(); i++) {
      models[i] = model((double)i/models.getSize());
   }

   double corr;
   corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
   data.append(corr);

   int iterations = a.getSize()-exclude;
   for (i=0; i<iterations; i++) {
      best = compareSequences(a, b, i+1, len);
      removeIndex(a, b, best);
      corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
      data.append(corr);
      //if (mmaQ) {
      //   if (i < iterations-1) {
      //      cout << ",\n";
      //   }
      //}
   }

   //if (mmaQ) {
   //   cout << "};\n";
   //}

   int count = 1000;
   Array<double> value;
   value.setSize(count);
   value.allowGrowth(0);

   Array<double> parameter;
   parameter.setSize(count);
   parameter.allowGrowth(0);

   for (i=0; i<count; i++) {
      parameter[i] = (double)i/count;
      value[i] = getDifference(data, models, preexclude, exclude, parameter[i]);
   }

//   cout.setf(ios::fixed);
//   cout << "data = {\n";
//   for (i=0; i<value.getSize(); i++) {
//      cout << "{" << parameter[i] << ", " << value[i] << "}";
//      if (i < value.getSize()-1) {
//         cout << ",";
//      }
//      cout << "\n";
//   }
//   cout << "};\n";
//
//   cout << "\nmodel = {\n";
//   for (i=0; i<models.getSize(); i++) {
//      cout << "\t" << models[i];
//      if (i < models.getSize()-1) {
//         cout << ",";
//      }
//      cout << "\n";
//   }
//   cout << "};\n";
//
//   cout << "\nraw = {\n";
//   for (i=0; i<data.getSize(); i++) {
//      cout << "\t" << data[i];
//      if (i < data.getSize()-1) {
//         cout << ",";
//      }
//      cout << "\n";
//   }
//   cout << "};\n";

   double corrected = getCorrected(data, models, preexclude, exclude);
//   cout << "original = " << data[0] << ";\n";
//   cout << "revised = " << corrected << ";\n";

   return corrected;
}



//////////////////////////////
//
// adjustedCorrelationNew -- attenuate bad points
//

double adjustedCorrelationNew(Array a, Array b, int ncount) {
   int i, best;
   int len = a.getSize();

   //Array<double> data;
   //data.setSize(a.getSize());
   //data.setSize(0);

   Array<double> models;
   models.setSize(a.getSize());
   models.setAll(0.0);
   models.allowGrowth(0);

   // first adjust the values so that they are z-scores:
   // dividing by the sd is not necessary but the mean subtraction is.
   double amean, bmean;
   double asd, bsd;
   amean = getMean(a);
   bmean = getMean(b);
   asd   = getSampleSD(amean, a);
   bsd   = getSampleSD(bmean, b);
   Array<double> afinale(a.getSize());
   Array<double> bfinale(b.getSize());
   for (i=0; i<a.getSize(); i++) {
      a[i] = (a[i] - amean) / asd;
      b[i] = (b[i] - bmean) / bsd;
      afinale[i] = a[i];
      bfinale[i] = b[i];
   }

   for (i=0; i<models.getSize(); i++) {
      models[i] = model((double)i/models.getSize());
   }

   double corr;
   //corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
   //data.append(corr);

   int iterations = ncount;
   Array<int> locations;
   locations.setSize(iterations);
   locations.setAll(0);

   // int iterations = a.getSize()-exclude;
   for (i=0; i<iterations; i++) {
      best = compareSequences(a, b, i+1, len);
      locations[i] = best;
      removeIndex(a, b, best);
      corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
      //data.append(corr);
      //if (mmaQ) {
      //   if (i < iterations-1) {
      //      cout << ",\n";
      //   }
      //}
   }

   fixLocations(locations);

   Array<double> window;
   window.setSize(locations.getSize());
   window.allowGrowth(0);
   for (i=0; i<window.getSize(); i++) {
      window[i] = pow(sin(M_PI / 2.0 * (i + 0.5) / window.getSize()), 0.5);
      afinale[locations[i]] *= window[i];
      bfinale[locations[i]] *= window[i];
   }
   
   return pearsonCorrelation(afinale.getSize(), 
               afinale.getBase(), bfinale.getBase());
}



//////////////////////////////
//
// adjustedCorrelationSigmoid --
//

double adjustedCorrelationSigmoid(Array a, Array b) {
   // first adjust the values so that they are z-scores:
   // dividing by the sd is not necessary but the mean subtraction is.
   double amean, bmean;
   double asd,   bsd;
   amean = getMean(a);
   bmean = getMean(b);
   asd   = getSampleSD(amean, a);
   bsd   = getSampleSD(bmean, b);
   int i;
   for (i=0; i<a.getSize(); i++) {
      a[i] = (a[i] - amean) / asd;
      b[i] = (b[i] - bmean) / bsd;
   }

   // now apply a sigmoid scaling to the results:
   for (i=0; i<a.getSize(); i++) {
      a[i] = sigmoid(a[i] * sigsens);
      b[i] = sigmoid(b[i]);
   }

   return pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
}



///////////////////////////////
//
// sigmoid -- 
//

double sigmoid(double value) {
   double output = 1.0 / (1.0 + exp(-value));
   return 2 * (output - 0.5);
}



///////////////////////////////
//
// fixLocations --
//

void fixLocations(Array& locations) {
   int i, j;
   for (i=0; i<locations.getSize(); i++) {
      for (j=i+1; j<locations.getSize(); j++) {
         if (locations[j] >= locations[i]) {
            locations[j]++;
         }
      }
   }
}



//////////////////////////////
//
// getCorrected --
//

double getCorrected(Array<double>& data, Array<double>& models, 
      int preexclude, int exclude) {
   double target = data[0];
   double targethi = (1.0 - target)/2 + target;
   double targetlo = target/2;
   
   double value   = getDifference(data, models, preexclude, exclude, target);
   double valuehi = getDifference(data, models, preexclude, exclude, targethi);
   double valuelo = getDifference(data, models, preexclude, exclude, targetlo);

   double minimum = getParabolicMinimum(targetlo, valuelo, target, value, 
         targethi, valuehi);

   return minimum;
}



//////////////////////////////
//
// getParabolicMinimum -- or maximum if inflection is negative...
//

double getParabolicMinimum(double x1, double y1, double x2, double y2,
      double x3, double y3) {
   double a = (x3*(y2-y1)+x2*(y1-y3)+x1*(y3-y2))/((x1-x2)*(x1-x3)*(x2-x3));
   double b = (x3*x3*(y1-y2)+x1*x1*(y2-y3)+x2*x2*(y3-y1))/
                    ((x1-x2)*(x1-x3)*(x2-x3));
   //double c = (x1*x3*(x3-x1)*y2 + x2*x2*(x3*y1-x1*y3)+x2(x1*x1*y3-x3*x3*y1))/
   //                 ((x1-x2)*(x1-x3)*(x2-x3));

   return -b/(2.0*a);
}



//////////////////////////////
//
// getDifference --
//

double getDifference(Array<double>& data, Array<double>& models, 
      int preexclude, int exclude, double parameter) {
   int i;
   double sum = 0.0;
   double value;
   for (i=preexclude; i<models.getSize() - exclude; i++) {
      value = data[i] - (models[i] * (1.0 - parameter) + parameter);
      sum += value * value;
   }
   return sum;
}



//////////////////////////////
//
// removeIndex --
//

void removeIndex(Array& a, Array& b, int best) {
   int i;
   int size = a.getSize();
   for (i=best; i<size-1; i++) {
      a[i] = a[i+1];
      b[i] = b[i+1];
   }
   a.setSize(size-1);
   b.setSize(size-1);
}



//////////////////////////////
//
// getSequences --
//

void getSequences(Array<Array<double> >& performances, 
      Array<Array<char> >& names, HumdrumFile& infile) {
   int i, j;
   double value;
   int pcount = infile.getMaxTracks();
   int rcount = infile.getNumLines();
   int foundnames = 0;
   performances.setSize(pcount);
   names.setSize(pcount);
   performances.allowGrowth(0);
   names.allowGrowth(0);
   for (i=0; i<pcount; i++) {
      performances[i].setSize(rcount);
      performances[i].setSize(0);
      names[i].setSize(1);
      names[0] = '\0';
      names[i].setSize(0);
   }

   for (i=0; i<infile.getNumLines(); i++) {
      if ((!foundnames) && infile[i].isLocalComment()) {
         if (strstr(infile[i].getLine(), "pid") == NULL) {
            extractNames(names, infile[i]);
            foundnames = 1;
         }
         continue;
      }
      if (infile[i].getType() != E_humrec_data) {
         continue;
      }
      for (j=0; j<infile[i].getFieldCount(); j++) {
         value = 0;
         sscanf(infile[i][j], "%lf", &value);
         performances[j].append(value);
      }
   }
}



//////////////////////////////
//
// extractNames --
//

void extractNames(Array >& names, HumdrumRecord& record) {
   int i, len;
   for (i=0; i<record.getFieldCount(); i++) {
      len = strlen(record[i]);
      names[i].setSize(len);                    // no need for +1 here, see next
      strcpy(names[i].getBase(), record[i]+1);  // +1 to skip initial "!"
   }
}



//////////////////////////////
//
// compareSequences --
//

int compareSequences(Array& a, Array& b, int ind, int len) {
   int i;
   double basecorr;
   double corr;

   Array<double> corrlist;
   corrlist.setSize(a.getSize());
   corrlist.setSize(0);
   Array<int> index;
   index.setSize(a.getSize());
   index.setSize(0);

   basecorr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
   if (verboseQ) {
      cout << "base" << "\t" << basecorr << "\n";
   }

   for (i=0; i<a.getSize(); i++) {
      corr = pearsonCorrelationHole(a.getSize(), a.getBase(), b.getBase(), i);
      corrlist.append(corr);
      index.append(i);
   }

   double mean = getMean(corrlist);
   double sd   = getSampleSD(mean, corrlist);


   Array<double> zscores;
   zscores.setSize(corrlist.getSize());
   int asize = corrlist.getSize();
   for (i=0; i<asize; i++)  {
      zscores[i] = (corrlist[i] - mean) / sd;
   }

   int maxi = index[0];
   for (i=1; i<asize; i++) {
      if (zscores[i] > zscores[maxi]) {
         maxi = i;
      }
   }

   //if (mmaQ) {
   //   cout << "{" << ind << "/" << len << ", " << corrlist[maxi] << "}";
   //}

   if (verboseQ) {
      cout << "max\t" << maxi << "\t";
      cout << "mean\t" << mean << "\t";
      cout << "sd\t"   << sd   << "\t";
      cout << corrlist[maxi] << "\t" << zscores[maxi] << "\n";
      for (i=0; i<asize; i++) {
         cout << i << "\t" << corrlist[i] << "\t" << zscores[i] << "\n";
      }
   }

   return maxi;
}



//////////////////////////////
//
// getSampleSD --
//

double getSampleSD(double mean, Array& data) {
   int size = data.getSize();
   double sum = 0.0;
   double value;
   int i;
   for (i=0; i<size; i++) {
      value = data[i] - mean;
      sum += value * value;
   }

   return sqrt(sum / (size - 1.0));
}



//////////////////////////////
//
// getMean --
//

double getMean(Array& data) {
   int size = data.getSize();
   if (size <= 0) {
      return 0.0;
   }

   int i;
   double sum = 0.0;
   for (i=0; i<size; i++) {
      sum += data[i];
   }

   return sum / size;
}


//////////////////////////////
//
// ranksort -- sort counts by largest first
//

int ranksort(const void* A, const void* B) {
   int& a = *(*((int**)A));
   int& b = *(*((int**)B));
   if (a < b) {
      return +1;
   } else if (a > b) {
      return -1;
   } else {
      return 0;
   }
}



//////////////////////////////
//
// pearsonCorrelationHole --
//

double pearsonCorrelationHole(int size, double* x, double* y, int ignore) {

   double sumx  = 0.0;
   double sumy  = 0.0;
   double sumco = 0.0;

   double meanx = x[0];
   double meany = y[0];

   int starti = 1;

   if (ignore == 0) {
      meanx = x[1];
      meany = y[1];
      starti = 2;
   }

   double sweep;
   double deltax;
   double deltay;

   int i;

   for (i=starti; i<ignore; i++) {
      sweep = i / (i+1.0);
      deltax = x[i] - meanx;
      deltay = y[i] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / (i+1);
      meany += deltay / (i+1);
   }

   for (i=ignore+1; i<size; i++) {
      sweep = i / (i+1.0);
      deltax = x[i] - meanx;
      deltay = y[i] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / (i+1);
      meany += deltay / (i+1);
   }

   double popsdx = sqrt(sumx / (size-1));
   double popsdy = sqrt(sumy / (size-1));
   double covxy  = sumco / (size-1);

   return covxy / (popsdx * popsdy);
}




//////////////////////////////
//
// checkOptions --
//

void checkOptions(Options& opts, int argc, char* argv[]) {
   opts.define("a|adjusted=b",  "print adjusted correlation values");
   opts.define("n|new-model=i:20", "print adjusted correlation values");
   opts.define("s|sigmoid=d:1.0", "print sigmoid-scaled correlation values");

   opts.define("author=b",  "author of program");
   opts.define("version=b", "compilation info");
   opts.define("example=b", "example usages");
   opts.define("help=b",  "short description");
   opts.process(argc, argv);

   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, Jan 2008" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 30 Jan 2008" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

   printAdj = opts.getBoolean("adjusted");
   printSig = opts.getBoolean("sigmoid");
   sigsens  = opts.getDouble("sigmoid");
   printNew = opts.getBoolean("new-model");
   newcount = opts.getInteger("new-model");

}



//////////////////////////////
//
// pearsonCorrelation --
//

double pearsonCorrelation(int size, double* x, double* y) {

   double sumx  = 0.0;
   double sumy  = 0.0;
   double sumco = 0.0;
   double meanx = x[0];
   double meany = y[0];
   double sweep;
   double deltax;
   double deltay;

   int i;
   for (i=2; i<=size; i++) {
      sweep = (i-1.0) / i;
      deltax = x[i-1] - meanx;
      deltay = y[i-1] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / i;
      meany += deltay / i;
   }

   double popsdx = sqrt(sumx / size);
   double popsdy = sqrt(sumy / size);
   double covxy  = sumco / size;

   return covxy / (popsdx * popsdy);
}



//////////////////////////////
//
// model -- modeled after a whitenoise sequence compared to
//  another whitenoise sequence added to the original whitenoise sequence.
//

double model(double x) {
   double y;
   y =   0.923588 * pow(x,5.0) 
       - 3.385430 * pow(x,4.0) 
       + 5.339920 * pow(x,3.0) 
       - 5.237760 * pow(x,2.0) 
       + 3.359680 * x;
   return y;
}



//////////////////////////////
//
// modelOld -- modeled after the curve generated by two whitenoise
//    sequences.
//

double modelOld(double x) {
   double y;
   y =   0.516818 * pow(x,5.0) 
       - 1.708827 * pow(x,4.0) 
       + 2.771050 * pow(x,3.0) 
       - 3.489990 * pow(x,2.0) 
       + 2.910390 * x;
   return y;
}



//////////////////////////////
//
// example --
//

void example(void) {


}



//////////////////////////////
//
// usage --
//

void usage(const char* command) {

}



// md5sum: 34e03c86fea5cec9cbd74461161faace cherryc.cpp [20090525]