//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Thu Jun 2 13:29:07 PDT 2011
// Last Modified: Mon Jun 6 14:16:16 PDT 2011
// Filename: ...sig/examples/all/xmlparse.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/xml/xmlparse.cpp
// Syntax: C++; museinfo
//
// Description: Very simple parse of the contents of an XML file this parsing
// does not use a class. See xmlparse2 for same technique
// for parsing, but using the XmlFileBasic class.
//
#include <math.h>
#include "humdrum.h"
#ifndef OLDCPP
#include <iostream>
#include <fstream>
#else
#include <iostream.h>
#include <fstream.h>
using namespace std;
#endif
#include "SigCollection.h"
// function declarations:
void checkOptions(Options& opts, int argc, char** argv);
void example(void);
void usage(const char* command);
void parseXmlFile(const char* filename);
void parseXmlStream(istream& input);
void extractTag(istream& input);
void extractWhiteSpace(istream& input);
void extractText(istream& input);
// User interface variables:
Options options;
//////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv) {
// process the command-line options
checkOptions(options, argc, argv);
int i;
if (options.getArgCount() >= 1) {
for (i=1; i<=options.getArgCount(); i++) {
parseXmlFile(options.getArg(i));
}
} else {
parseXmlStream(cin);
}
return 0;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// extractText --
//
void extractText(istream& input) {
static Array<char> whitespace;
whitespace.setSize(128);
whitespace.setSize(0);
whitespace.setGrowth(1123123);
cout << "TEXT:";
int i;
int ch;
ch = input.get();
char cch;
while ((!input.eof()) && ((char)ch != '<')) {
cch = (char)ch;
if (isspace(cch)) {
whitespace.append(cch);
} else {
for (i=0; i<whitespace.getSize(); i++) {
cout << (char)(*(whitespace.getBase()+i));
}
whitespace.setSize(0);
cout << cch;
}
ch = input.get();
}
if ((char)ch == '<') {
input.putback((char)ch);
}
cout << endl;
if (whitespace.getSize() > 0) {
cout << "WHITESPACE:>>";
for (i=0; i<whitespace.getSize(); i++) {
cout << (char)(*(whitespace.getBase()+i));
}
cout << "<<" << endl;
whitespace.setSize(0);
}
}
//////////////////////////////
//
// parseXmlFile --
//
void parseXmlFile(const char* filename) {
fstream input;
input.open(filename, ios::in);
parseXmlStream(input);
}
//////////////////////////////
//
// parseXmlStream --
//
void parseXmlStream(istream& input) {
int ch;
while (!input.eof()) {
ch = input.peek();
if (ch < 0) {
// end of data stream
break;
}
if (isspace((char)ch)) {
extractWhiteSpace(input);
} else if ((char)ch == '<') {
extractTag(input);
} else {
extractText(input);
}
}
}
//////////////////////////////
//
// extractTag -- assumes first character in stream at this point is "<";
//
// read through until a ">" is found. Whenever an "=" is found, switch
// to an attribute-reading mode, looking for paired double or single quotes
//
void extractTag(istream& input) {
int attributeMode = 0;
int parenState = 'x';
int finishedQ = 0;
int commentQ = 0;
int secondchar = -1;
int thirdchar = -1;
int fourthchar = -1;
int charcount = 0;
int lastchar = -1;
int lastlastchar = -1;
cout << "TAG:";
int ch = -1;
lastlastchar = lastchar;
lastchar = ch;
ch = input.get();
charcount++;
while ((!input.eof()) && (!finishedQ)) {
if (charcount == 2) { secondchar = ch; }
else if (charcount == 3) { thirdchar = ch; }
else if (charcount == 4) { fourthchar = ch;
if ((secondchar == '!') && (thirdchar == '-') && (fourthchar == '-')) {
commentQ = 1;
}
}
switch (attributeMode) {
case 0: // in tag but outside of an attribute value
if ((int)ch == '=') {
if (!commentQ) {
// attributeMode = 1;
}
} else if ((int)ch == '>') {
if (commentQ) {
if ((lastlastchar == '-') && (lastchar == '-')) {
finishedQ = 1;
}
} else {
finishedQ = 1;
}
}
cout << (char)ch;
break;
case 1: // start of attribute value but not inside of parens
if (isspace((char)ch)) {
cout << (char)ch;
} else if ((char)ch == '\'') {
parenState = '\'';
attributeMode = 2;
} else if ((char)ch == '"') {
parenState = '"';
attributeMode = 2;
} else {
cerr << "ERROR READING ATTRIBUTE VALUE\n";
exit(1);
}
if (commentQ) {
attributeMode = 0;
parenState = 'x';
}
case 2: // in attribute value within parens.
if ((char)ch == parenState) {
attributeMode = 0;
}
if ((char)ch == '>') {
cout << "ERROR: '>' cannot occur within an attribute value\n";
exit(1);
} if ((char)ch == '<') {
cout << "ERROR: '<' cannot occur within an attribute value\n";
exit(1);
}
cout << (char)ch;
}
if (finishedQ) {
break;
}
lastlastchar = lastchar;
lastchar = ch;
ch = input.get();
charcount++;
}
cout << endl;
}
//////////////////////////////
//
// extractWhiteSpace --
//
void extractWhiteSpace(istream& input) {
cout << "WHITESPACE:>>";
int ch;
ch = input.get();
while (!input.eof() && isspace((char)ch)) {
cout << (char)ch;
ch = input.get();
}
if (!input.eof()) {
input.putback((char)ch);
}
cout << "<< " << endl;
}
//////////////////////////////
//
// checkOptions --
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("author=b", "author of program");
opts.define("version=b", "compilation info");
opts.define("example=b", "example usages");
opts.define("help=b", "short description");
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, June 2011" << endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: 2 Jan 2011" << endl;
cout << "compiled: " << __DATE__ << endl;
cout << MUSEINFO_VERSION << endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
}
//////////////////////////////
//
// example --
//
void example(void) {
}
//////////////////////////////
//
// usage --
//
void usage(const char* command) {
}
// md5sum: 00471a1743462c431825fdcf34037f50 xmlparse.cpp [20110711]