// // Programmer: Craig Stuart Sapp // Creation Date: Thu Apr 8 17:06:42 PDT 2004 // Last Modified: Fri Apr 9 00:01:00 PDT 2004 // Filename: ...sig/examples/all/ttuprofile.cpp // Web Address: http://sig.sapp.org/examples/museinfo/humdrum/ttuprofile.cpp // Syntax: C++; museinfo // // Description: // #include "humdrum.h" #include #include #include #include /* for qsort and bsearch functions */ using namespace std; /////////////////////////////////////////////////////////////////////////// class Feature { public: Feature(void) { clear(); } Feature(Feature& aFeature) { copyFeature(aFeature); } Feature& operator=(Feature& aFeature) { return copyFeature(aFeature); } Feature& copyFeature(Feature& aFeature) { if (this == &aFeature) { return *this; } strcpy(istn, aFeature.istn); line = aFeature.line; int i, j; feature.setSize(aFeature.feature.getSize()); for (i=0; i > feature; // feature vector int line; // line number in original file }; /////////////////////////////////////////////////////////////////////////// // function declarations void checkOptions (Options& opts, int argc, char* argv[]); void example (void); void usage (const char* command); int buildFeaturesArray (Array& features, HumdrumFile& infile); int getFeature (Feature& tempfeature, HumdrumRecord& record); void buildAnalysisData (Array& analysis, Array& features); void printAnalysisData (Array& analysis, Array& ttu, Array& tts, Array& features, Array& sortlines); void sortFeatures (Array& features); int featuresort (const void* A, const void* B); int featuresort2 (const void* A, const void* B); int getMatchCount (Array& features, int index, int level); void generateTTData (Array& analysis, Array& ttu, Array& tts); void sortLines (Array& sortlines); int linesort (const void* A, const void* B); int unanchoredmatch (Feature& query, Feature& datum); void buildAnalysisDataUnanchored(Array& analysis, Array& features); int getMatchCountUnanchored(Feature& query, Array& features); void buildQuery (Feature& query, Feature& feature, int start, int length); void printAnalysisDataUnanchored(Array& analysis, Array& features); void printAnchorType (int index, int size); int getFeatureStateCount(int& maxstate, Array& states, Array& features); // global variables Options options; // database for command-line arguments int debugQ = 0; // used with the --debug option int numinputs = 0; int depth = -1; // used with -d option int sufficient = 10; // used with -s option int totalcount = 0; // total number of features for all elements int sortQ = 0; // used with -o option int appendQ = 0; // used with -a option int anchorQ = 1; // used with -A option int stateQ = 1; // used with -S option int eprofileQ = 0; // used with -e option int mprofileQ = 1; // used with -P option int unanchoredQ = 0; // used with -u option Array features; /////////////////////////////////////////////////////////////////////////// int main(int argc, char* argv[]) { HumdrumFile infile; // process the command-line options checkOptions(options, argc, argv); infile.clear(); // if no command-line arguments read data file from standard input if (numinputs < 1) { infile.read(cin); } else { infile.read(options.getArg(1)); } Array sortlines; totalcount = buildFeaturesArray(features, infile); sortlines.setSize(features.getSize()); int i; for (i=0; i analysis; Array ttu; Array tts; if (anchorQ) { sortFeatures(features); sortLines(sortlines); buildAnalysisData(analysis, features); generateTTData(analysis, ttu, tts); printAnalysisData(analysis, ttu, tts, features, sortlines); } else { buildAnalysisDataUnanchored(analysis, features); printAnalysisDataUnanchored(analysis, features); } return 0; } /////////////////////////////////////////////////////////////////////////// ////////////////////////////// // // generateTTData -- ttu = time to unique; tts = time to sufficient // negative tts value means sufficiency was not reached. // void generateTTData(Array& analysis, Array& ttu, Array& tts) { ttu.setSize(analysis.getSize()); tts.setSize(analysis.getSize()); ttu.setAll(0); tts.setAll(0); int i, j; int isize = 0; int testvalue = 0; int tempttu = 0; int temptts = 0; // find the time-to-uniqueness (ttu) value: for (i=0; i=0; j--) { if (analysis[i].feature[0][j] != testvalue) { tempttu = j+1; break; } tempttu = j; } if (tempttu == isize - 1) { if (analysis[i].feature[0][isize-1] == 1) { // found uniq element on the last try... ttu[i] = isize; } else { // unsure if a uniq element was found ttu[i] = -(tempttu + 1); } } else { ttu[i] = tempttu+1; } } // find the time-to-sufficiency (tts) value: for (i=0; i sufficient) { tts[i] = -isize; continue; } temptts = isize-1; j = isize-2; while (j>=0) { if (analysis[i].feature[0][j] <= sufficient) { j--; } else { temptts = j+1; break; } } if (j < 0) { temptts = 0; } tts[i] = temptts+1; } } ////////////////////////////// // // printAnchorType -- // void printAnchorType(int index, int size) { index = index+1; int maxdigits = ((int)log10((double)size))+1; int digits = ((int)log10((double)index))+1; int i; for (i=0; i& analysis, Array& features) { int i, j, k; cout << "**istn\t**anchor\t**length"; if (mprofileQ) { cout << "\t**mprofile"; } if (eprofileQ) { cout << "\t**eprofile"; } if (appendQ) { cout << "\t**feature"; } cout << "\n"; double entropy = 0.0; double entropysum = 0.0; for (i=0; i& states, Array& features) { #define MAXSTATE 500000 states.setSize(MAXSTATE); states.setAll(0); int warnnegative = 0; int warnmaximum = 0; int index; maxstate = 0; int i, j; for (i=0; i= MAXSTATE) { warnmaximum++; } states[index]++; if (index > maxstate) { maxstate = index; } } } if (warnnegative) { cout << "!! Warning: there were " << warnnegative << " uncounted negative states in the data set\n"; } if (warnmaximum) { cout << "!! Warning: there were " << warnmaximum << " uncounted states exceeding 99,999 in the data set\n"; } int count = 0; for (i=0; i<=maxstate; i++) { if (states[i]) { count++; } } return count; } ////////////////////////////// // // printAnalysisData -- // void printAnalysisData(Array& analysis, Array& ttu, Array& tts, Array& features, Array& sortlines) { int i, j; int sorti; int ttufail = 0; int ttsfail = 0; int ttusum = 0; int ttssum = 0; int ttumax = 0; int ttumin = 0; int ttsmax = 0; int ttsmin = 0; int minfeaturecount = features[0].feature[0].getSize(); int maxfeaturecount = features[0].feature[0].getSize(); for (i=1; i features[i].feature[0].getSize()) { minfeaturecount = features[i].feature[0].getSize(); } if (maxfeaturecount < features[i].feature[0].getSize()) { maxfeaturecount = features[i].feature[0].getSize(); } } for (i=0; i 0) { ttusum+= ttu[i]; } if (tts[i] > 0) { ttssum+= tts[i]; } if (ttumax == 0) { ttumax = ttu[i]; } if (ttumin == 0) { ttumin = ttu[i]; } if (ttsmax == 0) { ttsmax = tts[i]; } if (ttsmin == 0) { ttsmin = tts[i]; } if (ttumax < ttu[i]) { ttumax = ttu[i]; } if (ttumin > ttu[i]) { ttumin = ttu[i]; } if (ttsmax < tts[i]) { ttsmax = tts[i]; } if (ttsmin > tts[i]) { ttsmin = tts[i]; } } double ttuaverage = (double)ttusum / (ttu.getSize() - ttufail); double ttsaverage = (double)ttssum / (tts.getSize() - ttsfail); double entropy = 0.0; // for entropy profile double entropysum = 0.0; // for entropy profile Array states; int maxstate; int statecount = getFeatureStateCount(maxstate, states, features); cout << "!!!theme_count:\t\t\t" << analysis.getSize() << " themes\n"; cout << "!!!symbol_count:\t\t" << totalcount << " notes\n"; cout << "!!!state_count:\t\t\t" << statecount << " states/symbol\n"; if (stateQ) { // print state distribution statistics for (i=0; i<=maxstate; i++) { if (states[i]) { cout << "!!!state_" << i << "_count:\t\t" << states[i] << " symbols (" << (double)states[i]/totalcount*100.0 << "% of total symbols)\n"; } } } cout << "!!!average_symbols_per_theme:\t" << (double)totalcount/analysis.getSize() << " symbols\n"; cout << "!!!average_entropy_rate:\t" << log((double)analysis.getSize())/log(2.0)/ttuaverage << " bits/symbol\n"; cout << "!!!max_theoretical_entropy_rt:\t" << log((double)statecount)/log(2.0) << " bits/symbol\n"; cout << "!!!min_theme_symbol_count:\t" << minfeaturecount << " symbols\n"; cout << "!!!max_theme_symbol_count:\t" << maxfeaturecount << " symbols\n"; cout << "!!!search_type:\t\t\t"; if (anchorQ) { if (unanchoredQ) { cout << "unanchored"; } else { cout << "anchored"; } } else { cout << "doubly unanchored"; } cout << "\n"; cout << "!!!sufficiency_level:\t\t" << sufficient << " matches\n"; cout << "!!!ttu_failure_count:\t\t" << ttufail << "\t(" << (double)ttufail/ttu.getSize() * 100 << "%)" << "\n"; cout << "!!!tts_failure_count:\t\t" << ttsfail << "\t(" << (double)ttsfail/tts.getSize() * 100 << "%)" << "\n"; cout << "!!!ttu_mean:\t\t\t" << ttuaverage << " symbols\n"; cout << "!!!ttu_min:\t\t\t" << ttumin << " symbols\n"; cout << "!!!ttu_max:\t\t\t" << ttumax << " symbols\n"; cout << "!!!tts_mean:\t\t\t" << ttsaverage << " symbols\n"; cout << "!!!tts_min:\t\t\t" << ttsmin << " symbols\n"; cout << "!!!tts_max:\t\t\t" << ttsmax << " symbols\n"; cout << "!!\n"; cout << "!! The music for each theme can be viewed in Themefinder by\n"; cout << "!! changing xxxxxxxxxxxx to a ISTN value given below in this web address:\n"; cout << "!! http://themefinder.org/cgi-bin/themeinfo?istn=xxxxxxxxxxxx\n"; cout << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"; cout << "\n"; cout << "**istn\t**length\t**ttu\t**tts"; if (mprofileQ) { cout << "\t**mprofile"; } if (eprofileQ) { cout << "\t**eprofile"; } if (appendQ) { cout << "\t**feature"; } cout << "\n"; for (i=0; i& analysis, Array& features) { analysis.setSize(features.getSize()); analysis.allowGrowth(0); int i, j, k; int searchsize = 0; Feature query; for (i=0; i 0) && (searchsize > depth)) { searchsize = depth; } for (k=0; k& features) { int count = 0; int i; for (i=0; i& analysis, Array& features) { analysis.setSize(features.getSize()); analysis.allowGrowth(0); int i, j; int searchsize = 0; Feature query; query.feature.setSize(1); query.feature[0].setSize(0); for (i=0; i 0) && (searchsize > depth)) { searchsize = depth; } analysis[i].feature[0].setSize(searchsize); query.feature[0].setSize(features[i].feature[0].getSize()); query.feature[0].setSize(0); for (j=0; j& features, int index, int level) { Feature* ptr = NULL; Feature searchfeature = features[index]; if (searchfeature.feature[0].getSize() > level+1) { searchfeature.feature[0].setSize(level+1); } /////////////////////////////////////////////// ptr = &(features[index]); /* ptr = (Feature*)bsearch(&searchfeature, features.getBase(), features.getSize(), sizeof(Feature), featuresort2); if (ptr == NULL) { return 0; } */ /////////////////////////////////////////////// // count the number of exact matches int matchindex = ((long long)ptr - (long long)(features.getBase()))/sizeof(Feature); int forward = matchindex; int backward = matchindex; backward--; while ((backward >= 0) && (featuresort2((void*)&searchfeature, (void*)(&(features[backward]))) == 0)) { backward--; } backward++; forward++; while ((forward < features.getSize()) && (featuresort2((void*)&searchfeature, (void*)(&(features[forward]))) == 0)) { forward++; } forward--; return forward - backward + 1; } ////////////////////////////// // // sortFeatures -- // void sortFeatures(Array& features) { qsort(features.getBase(), features.getSize(), sizeof(Feature), featuresort); } ////////////////////////////// // // sortLines -- // void sortLines(Array& sortlines) { qsort(sortlines.getBase(), sortlines.getSize(), sizeof(int), linesort); } ////////////////////////////// // // linesort -- sort items by feature space elements. // int linesort(const void* A, const void* B) { int indexa = *((int*)A); int indexb = *((int*)B); if (features[indexa].line < features[indexb].line) { return -1; } else if (features[indexa].line > features[indexb].line) { return +1; } else { return 0; } } ////////////////////////////// // // featuresort -- sort items by feature space elements. // int featuresort(const void* A, const void* B) { Feature& a = *((Feature*)A); Feature& b = *((Feature*)B); int i; int sizea = a.feature[0].getSize(); int sizeb = b.feature[0].getSize(); int minsize = sizea < sizeb ? sizea : sizeb; for (i=0; i b.feature[0][i]) { return +1; } } if (sizea > sizeb) { return +1; } else if (sizea < sizeb) { return -1; } else { return 0; } } ////////////////////////////// // // featuresort2 -- sort for count matches limit to the size of the first // element // int featuresort2(const void* A, const void* B) { Feature& a = *((Feature*)A); Feature& b = *((Feature*)B); int i; int sizea = a.feature[0].getSize(); int sizeb = b.feature[0].getSize(); int minsize = sizea < sizeb ? sizea : sizeb; for (i=0; i b.feature[0][i]) { return +1; } } // all of the elements in the search range are equal return 0; } ////////////////////////////// // // buildFeaturesArray -- returns the total number of features // int buildFeaturesArray(Array& features, HumdrumFile& infile) { int i; Feature tempfeature; features.setSize(infile.getNumLines()); features.setSize(0); int totalfeatures = 0; for (i=0; i 0) { tempfeature.feature[0].append(code); total++; } ptr = strtok(NULL, " "); } } } // do generalized features here. return total; } ////////////////////////////// // // checkOptions -- validate and process command-line options. // void checkOptions(Options& opts, int argc, char* argv[]) { opts.define("d|depth=i:-1", "set the maximum search depth"); opts.define("a|append=b", "append original data to the analysis"); opts.define("s|sufficient=i:10", "set the sufficiency size"); opts.define("S|no-state=b", "do not print state distributions"); opts.define("e|entropy-profile=b", "print entropy profile"); opts.define("o|order|sort=b", "sort the output by features"); opts.define("P|no-match-profile=b", "do not output match profile"); opts.define("u|unanchored=b", "search for feature anywhere in sequences"); opts.define("p|permutations=b", "search for feature anywhere in sequences"); opts.define("debug=b"); // determine bad input line num opts.define("author=b"); // author of program opts.define("version=b"); // compilation info opts.define("example=b"); // example usages opts.define("h|help=b"); // short description opts.process(argc, argv); // handle basic options: if (opts.getBoolean("author")) { cout << "Written by Craig Stuart Sapp, " << "craig@ccrma.stanford.edu, April 2004" << endl; exit(0); } else if (opts.getBoolean("version")) { cout << argv[0] << ", version: 15 April 2004" << endl; cout << "compiled: " << __DATE__ << endl; cout << MUSEINFO_VERSION << endl; exit(0); } else if (opts.getBoolean("help")) { usage(opts.getCommand().c_str()); exit(0); } else if (opts.getBoolean("example")) { example(); exit(0); } eprofileQ = opts.getBoolean("entropy-profile"); numinputs = opts.getArgCount(); debugQ = opts.getBoolean("debug"); depth = opts.getInteger("depth"); sufficient = opts.getInteger("sufficient"); sortQ = opts.getBoolean("order"); appendQ = opts.getBoolean("append"); anchorQ = !opts.getBoolean("permutations"); unanchoredQ = opts.getBoolean("unanchored"); stateQ = !opts.getBoolean("no-state"); mprofileQ = !opts.getBoolean("no-match-profile"); if (sufficient <= 0) { sufficient = 1; } } ////////////////////////////// // // example -- example usage of the program // void example(void) { cout << " \n" << endl; } ////////////////////////////// // // usage -- gives the usage statement for the program // void usage(const char* command) { cout << " \n" << endl; } // md5sum: af2ba11b7e41b0d761eea1a0c6e04905 ttuprofile.cpp [20160320]