// // Programmer: Craig Stuart Sapp // Creation Date: Tue Jun 5 00:28:50 PDT 2007 // Last Modified: Sat Jun 23 19:36:30 PDT 2007 (added random noise) // Filename: ...sig/examples/all/scaperank.cpp // Web Address: http://sig.sapp.org/examples/museinfo/scaperank/scaperank.cpp // Syntax: C++; museinfo // // Description: Generate a comparison between multiple sets of // number sequences. // #include "Array.h" #include "Options.h" #include #include #include /* for qsort and bsearch functions */ #include /* for random number seeding */ #include using namespace std; // function declarations: void checkOptions (Options& opts, int argc, char** argv); void example (void); void usage (const char* command); void readData (Array >& data, const char* hfile); void printData (Array >& data); double pearsonCorrelation (int size, double* a, double* b); double getMean (int size, double* a); void calculateRank0 (Array >& data, int reference, Array& rank0values, Array& rank0rank); void calculateRank1 (Array >& data, Array > >& correlationset, int reference, Array& rank0values, Array& rank0rank); void calculateRank2 (Array >& data, Array > >& correlationset, int reference, Array& rank2values, Array& rank2rank); void calculateRank3 (Array >& data, Array > >& correlationset, int reference, Array& rank2values, Array& rank2rank, Array& rank3values, Array& rank3rank); void sortRanks (Array& values, Array& ranks); int numbersort (const void* A, const void* B); void calculateCorrelationSet (Array > >& correlationset, Array >& data, int reference); void calculateCorrelations (Array >& correlations, Array& a, Array& b); int getBestIndex (Array > >& correlations, int i, int j, Array& mask); void findBestArea (Array > >& correlationset, Array& mask, int& bestmatch, double& bestarea); void fillWithDummy (Array > >& correlationset, int index, double value = -1.0); int getArea (Array > >& correlationset, Array& mask, Array& areas); void printRange (int count); void unsmoothSequence (Array& sequence, double gain); void smoothSequence (Array& sequence, double gain); void applySmoothing (Array >& data, double smooth); double getRandomDelta (double value, double fraction); Options options; int rank0q = 0; // used with -0 option int rank1q = 0; // used with -1 option int rank2q = 0; // used with -2 option int lowlimit = 3; int rank3q = 0; // used with -3 option int reference = 0; // used with -r option int datacountQ = 0; // used with -n option double smooth = 0.0; // used with -s option int randomseed = 0; // used with -S option double randomamt = 0.0; // used with -R option int printrandom = 1; // used with --random char pids[50000] = {0}; char labels[50000] = {0}; Array pidindex; Array labelindex; const char null[2] = "."; ////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { // process the command-line options checkOptions(options, argc, argv); Array > data; string filename = ""; if (options.getArgCount() <= 0) { filename = ""; } else { filename = options.getArg(1); } readData(data, filename.c_str()); if (datacountQ) { // report the number of data sequences and then exit cout << data.getSize() << endl; exit(0); } if (options.getBoolean("range")) { printRange(data.getSize()); exit(0); } if (reference < 0) { reference = 0; } if (reference >= data.getSize()) { reference = data.getSize()-1; } if (randomseed < 0) { randomseed = time(NULL); // time in seconds } srand48(randomseed); // srand(randomseed); // for non-unix compiling int i; Array randomaddition; randomaddition.setSize(data[0].getSize()); randomaddition.allowGrowth(0); if (randomamt > 0.0) { data.setSize(data.getSize()+1); data[data.getSize()-1].setSize(data[0].getSize()); for (i=0; i rank0values; Array rank0rank; calculateRank0(data, reference, rank0values, rank0rank); Array > > correlationset; calculateCorrelationSet(correlationset, data, reference); Array rank1values; Array rank1rank; calculateRank1(data, correlationset, reference, rank1values, rank1rank); Array rank2values; Array rank2rank; calculateRank2(data, correlationset, reference, rank2values, rank2rank); Array rank3values; Array rank3rank; calculateRank3(data, correlationset, reference, rank2values, rank2rank, rank3values, rank3rank); cout << "**label\t" << "**pid\t" << "**rank0\t" << "**score0\t" << "**rank1\t" << "**score1\t" << "**rank2\t" << "**score2\t" << "**rank3\t" << "**score3\n"; for (i=0; i 0.0) { cout << "!!!srand:\t" << randomseed << endl; cout << "**rand" << endl; for (i=0; i >& data, double smooth) { if (smooth >= 1.0 || smooth <= -1.0) { return; } int i; if (smooth > 0.0) { for (i=0; i= 1000 && i < 1000) { cout << "0"; } //if (count >= 100 && i < 100) { cout << "0"; } //if (count >= 10 && i < 10) { cout << "0"; } cout << i; if (i < count) { cout << " "; } } } ////////////////////////////// // // calculateRank0 -- calculate the Pearson correlation of all sequences // compared to the reference sequence. // void calculateRank0(Array >& data, int reference, Array& rank0values, Array& rank0rank) { if (reference < 0 || reference >= data.getSize()) { cerr << "Error: invalid reference number: " << reference << "\n"; cerr << "Expected value in range from 0 to " << data.getSize()-1; cerr << endl; exit(1); } int i; rank0values.setSize(data.getSize()); rank0values.allowGrowth(0); rank0rank.setSize(data.getSize()); rank0rank.allowGrowth(0); for (i=0; i& values, Array& ranks) { int i; Array presort; Array sortdata; presort.setSize(values.getSize()); sortdata.setSize(values.getSize()); for (i=0; i valueb) { return -1; } else if (valuea < valueb) { return +1; } else { return 0; } } ////////////////////////////// // // getMean -- // double getMean(int size, double* a) { if (size <= 0) { return 0.0; } int i; double sum = 0.0; for (i=0; i >& data) { int i, j; for (i=0; i >& data, const char* filename) { data.setSize(500); data.setSize(0); data.setGrowth(500); data.allowGrowth(1); ifstream infile; #ifndef OLDCPP infile.open(filename, ios::in); #else infile.open(filename, ios::in | ios::nocreate); #endif if (!infile.is_open()) { cerr << "Cannot open file for reading: " << filename << endl; exit(1); } int inputsize = 0; double inputvalues[500]; int ii; char *ptr; double value; const char* blanks = " \t,:"; int foundlabels = 0; char templine[100000]; while (!infile.eof()) { infile.getline(templine, 90000, '\n'); if (!foundlabels) { if (!((strncmp(templine, "pid", 3) == 0) || (strncmp(templine, "!!", 2) == 0) || (strncmp(templine, "*", 1) == 0) || (strncmp(templine, "!pid", 4) == 0))) { foundlabels = 1; strcpy(labels, templine); continue; } } if (infile.eof() && (strcmp(templine, "") == 0)) { break; } else if (isdigit(templine[0]) || templine[0] == '-' || templine[0] == '+') { ptr = strtok(templine, blanks); inputsize = 0; while (ptr != NULL && sscanf(ptr, "%lf", &value) == 1) { inputvalues[inputsize++] = value; if (inputsize >= 100) { break; } ptr = strtok(NULL, blanks); } if (inputsize > 0 && data.getSize() == 0) { data.setSize(inputsize); for (ii=0; ii > >& correlationset, Array >& data, int reference) { correlationset.setSize(data.getSize()); int i; for (i=0; i >& data, Array > >& correlationset, int reference, Array& rank1values, Array& rank1rank) { int setcount = correlationset.getSize(); int mastercount = 0; Array localcount; localcount.allowGrowth(0); localcount.setSize(setcount); localcount.setAll(0); Array mask; mask.setSize(setcount); mask.setAll(1); mask[reference] = 0; int marker = 0; if (correlationset[0].getSize() == 0) { marker = 1; } int length = correlationset[marker].getSize(); int i,j; int datawidth; int bestindex; for (i=0; i > >& correlationset, Array& mask, Array& areas) { areas.setSize(correlationset.getSize()); areas.setAll(0.0); int marker = 0; if (correlationset[0].getSize() == 0) { marker = 1; } int length = correlationset[marker].getSize(); int i,j; int mastercount = 0; int datawidth; int bestindex; for (i=0; i maxx) { maxx = i; } areas[i] = areas[i] / mastercount; } return maxx; // return the index of the largest area } ////////////////////////////// // // calculateRank3 -- // void calculateRank3(Array >& data, Array > >& correlationset, int reference, Array& rank2values, Array& rank2rank, Array& rank3values, Array& rank3rank) { rank3rank.setSize(data.getSize()); rank3rank.setAll(0); rank3rank.allowGrowth(0); rank3values.setSize(data.getSize()); rank3values.setAll(0); rank3values.allowGrowth(0); int noisecut = data.getSize() / 2; Array mask; mask.setSize(data.getSize()); mask.allowGrowth(0); mask.setAll(0); int i; // turn on all of the noise layers: for (i=0; i= noisecut) { mask[i] = 1; } rank3values[i] = rank2values[i]; } // place each non-noise performance into the comparison Array area; for (i=0; i >& data, Array > >& correlationset, int reference, Array& rank2values, Array& rank2rank) { int i; int iterations = data.getSize() - 2; rank2rank.setSize(data.getSize()); rank2rank.allowGrowth(0); rank2rank[reference] = 0; rank2values.setSize(data.getSize()); rank2values.allowGrowth(0); rank2values[reference] = 1.0; Array mask; mask.setSize(data.getSize()); mask.allowGrowth(0); mask.setAll(1); mask[reference] = 0; int iter; int bestmatch = reference; double bestarea = 0.0; int rankcounter = 1; for (iter=0; iter > >& correlationset, int index, double value) { int i; for (i=0; i > >& correlationset, Array& mask, int& bestmatch, double& bestarea) { bestmatch = -1; bestarea = -1; Array counts; counts.setSize(correlationset.getSize()); counts.allowGrowth(0); counts.setAll(0); int mastercount = 0; int marker = 0; if (correlationset[0].getSize() == 0) { marker = 1; } int length = correlationset[marker].getSize(); int i, j; int datawidth; int bestindex; for (i=0; i counts[bestindex]) { bestindex = i; } } bestmatch = bestindex; bestarea = (double)counts[bestmatch]/mastercount; } ////////////////////////////// // // calculateCorrelations -- // void calculateCorrelations(Array >& correlations, Array& a, Array& b) { int i, j; correlations.setSize(a.getSize()); for (i=0; i > >& correlations, int i, int j, Array& mask) { int firstindex = -1; int lastindex = -1; int bestindex = -1; int ii; for (ii=0; ii 0) { firstindex = ii; break; } } for (ii=mask.getSize()-1; ii>=0; ii--) { if (mask[ii] > 0) { lastindex = ii; break; } } bestindex = firstindex; if (bestindex < 0) { cerr << "Error: no data to measure best correlation from" << endl; exit(1); } for (ii=firstindex; ii<=lastindex; ii++) { if (mask[ii] == 0) { continue; } if (correlations[ii][i][j] > correlations[bestindex][i][j]) { bestindex = ii; } } return bestindex; } ////////////////////////////// // // smoothSequence -- smooth the sequence with a // symmetric exponential smoothing filter (applied in the forward // and reverse directions with the specified input gain. // // Difference equation for smoothing: y[n] = k * x[n] + (1-k) * y[n-1] // void smoothSequence(Array& sequence, double gain) { double oneminusgain = 1.0 - gain; int i; int ssize = sequence.getSize(); // reverse filtering first for (i=ssize-2; i>=0; i--) { sequence[i] = gain*sequence[i] + oneminusgain*sequence[i+1]; } // then forward filtering for (i=1; i& sequence, double gain) { Array smoothed = sequence; smoothSequence(smoothed, gain); int i; for (i=0; i