// // Programmer: Craig Stuart Sapp // Creation Date: Mon May 3 21:54:58 PDT 2010 // Last Modified: Thu May 6 23:16:19 PDT 2010 // Filename: ...sig/examples/all/make64.cpp // Web Address: http://sig.sapp.org/examples/museinfo/humdrum/humpdf.cpp // Syntax: C++; museinfo // // Description: Embed a Humdrum file into a PDF file as an attachment. // // Links: // PDF 1.7 reference (ISO 32000.1 2008): // http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf // http://blogs.adobe.com/pdfdevjunkie/PDF_Inside_and_Out.pdf // PDF 1.4 reference: // http://www.adobe.com/devnet/pdf/pdfs/PDFReference.pdf // section 3.10.3 Embedded File Streams, page 123 // // // http://feliam.wordpress.com/2010/01/13/generic-pdf-exploit-hider-embedpdf-py-and-goodbye-av-detection-012010/ // #include "humdrum.h" #include "PerlRegularExpression.h" #ifndef OLDCPP #include #define SSTREAM stringstream #define CSTRING str().c_str() #include #include #include #include #include using namespace std; #else #ifdef VISUAL #include /* for windows 95 */ #else #include #endif #include #include #include #include #include #define SSTREAM strstream #define CSTRING str() #endif #include #include #include void checkOptions (Options& opts, int argc, char* argv[]); void example (void); void usage (const char* command); int getFileSize (ifstream& file); int getXrefOffset (ifstream& file); void printMimeEncoding (ostream& out, int count, char char1, char char2, char char3); void createStreamData (ostream& out, SSTREAM& datatoencode, const char* filename); int printStreamObject (ostream& out, int objnum, SSTREAM& datatoencode, const char* filename, Array& objectindex, Array& offsetindex, int initialoffset); void getOriginalTrailer (SSTREAM& trailer, int filesize, int xref, ifstream& file); int createFileEntry (SSTREAM& out, HumdrumFile& infile, const char* filename, int nextobject, Array& objectindex, Array& offsetindex, int initialoffset); int generateNewXref (SSTREAM& out, Array& objectindex, Array& offsetindex, int filesize); void getMD5Sum (ostream& out, SSTREAM& data); void printPdfDate (ostream& out, struct tm* date); void addTrailerPrev (Array& trailerstring, int newprevoffset); int linkToRootObject (ostream& out, Array& objectindex, Array& offsetindex, int initialoffset, Array trailerstring, int xrefoffset, ifstream& file, int nextobject); void processXrefList (Array& objectoffsets, Array& objectversions, ifstream& file, char firstdigit); int getPreviousOffset (ifstream& file); void buildIndirectObjectIndex(Array& objectoffsets, Array& objectversions, ifstream& file, int xrefoffset); void getObject (ostream& out, ifstream& file, int offset); int updateNamesObject (ostream& out, Array& objectindex, Array& offsetindex, int initialoffset, ostream& file, int nextobject, int ndoffset); void updateRootObject (ostream& out, int rootobjnum, int initialoffset, Array& objectoffsets, Array& objectversions, Array& rootstring, Array& objectindex, Array& offsetindex, int embedcount); void addDictionaryEntry (Array& objectstring, Array& entry); int getSequentialObjectCount(Array& list, int starti); // global variables: Options options; const char* pdffilename = ""; // used with -p option int appendQ = 0; // used with -A option int keepdirQ = 0; // used with -D option int hiddenQ = 0; // used with --hidden option (not active) int debugQ = 0; // used with --debug option int prefixQ = 0; // used with -P option const char* prefix = ""; // used with -P option /////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { // process the command-line options checkOptions(options, argc, argv); ifstream file(pdffilename, ios::in | ios::binary); if (!file.is_open()) { cerr << "ERROR: cannot open file: " << pdffilename << "\n"; exit(1); } int filesize = getFileSize(file); int xrefoffset = getXrefOffset(file); if (xrefoffset < 0) { cerr << "ERROR: no xref offset found in file " << pdffilename << endl; } // cout << "The size of the file in bytes is: " << filesize << endl; // cout << "Xref offset is: " << xrefoffset << endl; HumdrumFile infile; int i; // figure out the number of input files to process int numinputs = options.getArgCount(); Array filesegments; if (numinputs <= 0) { filesegments.setSize(1); } else { filesegments.setSize(numinputs); } filesegments.allowGrowth(0); for (i=0; i trailerstring; trailerstring.setSize(originaltrailer.str().length()+1); strcpy(trailerstring.getBase(), originaltrailer.CSTRING); int nextobject = -1; PerlRegularExpression pre; pre.search(originaltrailer.CSTRING, "/Size\\s+(\\d+)", ""); if (!sscanf(pre.getSubmatch(1), "%d", &nextobject)) { cerr << "ERROR: cannot find object count in trailer" << endl; exit(1); } Array objectindex; // object number (presuming 0 for second number) Array offsetindex; // offsets from end of old file in bytes objectindex.setSize(numinputs*4); offsetindex.setSize(numinputs*4); objectindex.setSize(0); offsetindex.setSize(0); // int initialoffset = filesize; int initialoffset = 0; const char* filename = ""; int fcounter = 0; for (i=0; istr().length(); fcounter++; } if (!appendQ) { // print initial contents of input PDF char chariot; file.seekg(0, ios::beg); file.get(chariot); while (!file.eof()) { cout << chariot; file.get(chariot); } } cout << flush; file.clear(); // pretend eof was not reached. file.seekg(0, ios::beg); for (i=0; istr() << flush; } if (!hiddenQ) { // If "hidden", the file will disappear if Save As... is used to save, // because the document root does not know about it. SSTREAM rootlink; nextobject = linkToRootObject(rootlink, objectindex, offsetindex, initialoffset, trailerstring, xrefoffset, file, nextobject); cout << rootlink.str() << flush; initialoffset += rootlink.str().length(); } // update the object count (/Size) in the trailer: char replacement[128] = {0}; sprintf(replacement, "/Size %d", nextobject); pre.sar(trailerstring, "\\/Size\\s+\\d+", replacement); // remove any /Prev entry in trailer, and replace with // new one addTrailerPrev(trailerstring, xrefoffset); SSTREAM xrefstream; int newxrefoffset = generateNewXref(xrefstream, objectindex, offsetindex, filesize); newxrefoffset += initialoffset; cout << xrefstream.str(); cout << trailerstring.getBase() << endl; cout << "startxref\n"; // byte location of new xref goes here: cout << newxrefoffset + filesize << endl; cout << "%%EOF" << endl; for (i=0; i> // The value for /Names entry is an array of two items: // (A) the name of the object in UTF-16, usually called // "_Untitled Object" // (B) An indirect object reference point to an object which // contains the /Type /FileSpec in the dictionary // (which contains the true filesystem's filename, and // an indirect object reference to a stream object which // contains the actual contents of a file. // // General Map of attaching the embedded file to the Root object: // Root object --> Names Dictionary --> Embedded List --> Embedded File // Specification --> Embedded File Stream // // * Root object has a /Names entry in its dictionary which points to // an indirect object that gives the Names dictionary of the Root object. // (create a /Names entry if it does not already exist and update // the Root object in the PDF file; otherwise, leave the original // Root entry unchanged, and go to the Name Dictionary object for // further processing. Example Root object: // 1 0 obj<> endobj // or without a /Names entry in the dictionary: // 1 0 obj<> endobj // * Names Dictionary should have a dictionary entry called /EmbeddedFiles // which points to an indirect object which will list the embedded files. // Example Names Dictionary: // 14 0 obj << /EmbeddedFiles 15 0 R >> // This entry says that the list of embedded files is found in indirect // object 15. If the Names Dictionary does not contain an /EmbeddedFiles // entry, then update the object to add one. // * Embedded List: A dictionary with a /Names entry which contains an // array of pairs of entries which list the embedded files. The first // of the pair is a Unicode-16 string giving the embedded name of the // data (not the filename, and not really used for anything that I can // figure out. The typical name is "_Untitled Object". The second // value of the pair is a reference to an indirect object. In the example // below, the indirect object is #13. This is a link to the /FileSpec // entry for the embedded file. // 15 0 obj // << /Names [(_^@U^@n^@t^@i^@t^@l^@e^@d^@ ^@O^@b^@j^@e^@c^@t) 13 0 R] >> // endobj // * Embedded File Specification: // 13 0 obj // << /Type /Filespec // /F (file.krn) // /EF << /F 12 0 R >> // /Desc (Short Description of File) // >> // endobj // The Embedded file specification lists the name of the file in the /F // entry, the embedded content stream is listed as an indirect object // in the /F entry in the dictionary of the /EF entry (in this case // object #12). // * Embedded File Stream: // Contains the actual contents of the embedded file plus some // file content information: // // 12 0 obj // << // /Type /EmbeddedFile // /SubType /application#2fx-humdrum // /Length 34 // /Params // << // /CreationDate (D:20100510042439-08'00') // /ModDate (D:20100510042439-08'00') // /EmbedDate (D:19991214040506-08'00') // /Size 34 // /CheckSum // >> // >> // stream // **kern // *M4/4 // *k[] // *c: // =- // 1c // == // *- // endstream // endobj // // The /Length field is required and gives the number of bytes // between the string "stream\n" and endstream. "stream" should // have the newline 0x0a or "0x0d 0x0a" after it (but not 0x0d alone). // An optional newline before "endstream" is allowed, and will not be // considered part of the data inside of the stream. // // int linkToRootObject(ostream& out, Array& objectindex, Array& offsetindex, int initialoffset, Array trailerstring, int xrefoffset, ifstream& file, int nextobject) { // when this function is called, only embedded files have been // added to the PDF. There are two indirect objects for each // embedded file (the /FileSpec entry and the actual contents, // So embedcount is the number of files which have been included: int embedcount = objectindex.getSize() / 2; //// Locate the Root indirect object which is the value for the symbol //// /Root in the trailer. PerlRegularExpression pre; if (!pre.search(trailerstring.getBase(), "/Root\\s+(\\d+)\\s+(\\d+)\\s*R[^A-Za-z0-9]", "")) { cerr << "ERROR: Could not find /Root in trailer: " << endl; cerr << trailerstring.getBase() << endl; exit(1); } int rootobject = atol(pre.getSubmatch(1)); // int rootversion = atol(pre.getSubmatch(2)); // presuming always 0 for now // now that the indirect object number of the Root is known, go find it... //// First build an byte offset table for each indirect object in the file: Array objectoffsets; Array objectversions; if (!pre.search(trailerstring.getBase(), "/Size\\s+(\\d+)", "")) { cerr << "ERROR: could not find /Size entry in PDF trailer:" << endl; cerr << trailerstring.getBase() << endl; exit(1); } int objectcount = atol(pre.getSubmatch(1)); if (objectcount <= 0) { cerr << "ERROR: unusual indirect object count in PDF: " << objectcount << endl; exit(1); } objectoffsets.setSize(objectcount); objectversions.setSize(objectcount); objectoffsets.setAll(-1); objectversions.setAll(0); objectoffsets.allowGrowth(0); objectversions.allowGrowth(0); buildIndirectObjectIndex(objectoffsets, objectversions, file, xrefoffset); //// Now go to root object and check to see if there is a /Names entry int i; if (debugQ) { cerr << ">>> Indirect object byte offset table:" << endl; for (i=0; i>> " << i << ":\t" << objectoffsets[i] << endl; } } int rootoffset = objectoffsets[rootobject]; SSTREAM rootstream; getObject(rootstream, file, rootoffset); Array rootstring; rootstring.setSize(rootstream.str().length()+1); for (i=0; i<(int)rootstream.str().length(); i++) { rootstring[i] = rootstream.str()[i]; } rootstring[rootstring.getSize()-1] = '\0'; //// if there is a /Names entry in dictionary, then don't bother updating //// the Root entry and instead go directly to the /Names object and modify. //// If there is not a /Names entry, then add one as indirect object //// and also insert a revised Root object. if (pre.search(rootstring.getBase(), "/Names\\s+(\\d+)\\s+(\\d)\\s+R", "")) { // int namesobj = atol(pre.getSubmatch(1)); // int namesver = atol(pre.getSubmatch(2)); // ggg // nextobject = updateNamesObject(out, objectindex, offsetindex, // initialoffset, file, nextobject, objectoffsets[namesobj]); } else { // update Root dictionary to add /Names entry, create Names dictionary // and create the list of Embedded files in another object. SSTREAM newroot; updateRootObject(newroot, rootobject, initialoffset, objectoffsets, objectversions, rootstring, objectindex, offsetindex, embedcount); initialoffset += newroot.str().length(); out << newroot.str() << flush; } return nextobject; } ////////////////////////////// // // updateRootObject -- // newroot == output data stream for revised root object // rootobjnum == root indirect object number (second value presumed 0) // initialoffset == byte offset from the start of the file to start // of newroot stream. // objectoffsets == storage for indicating byte offset for first // number at start of object. (object is index). // objectversions == storage for indicating version number of object // (object is index) // rootstring == The original Root object entry which needs to have // an added name dictionary, etc. // objectindex == List of indirect objects created by this program // (index in order of creation). // offsetindex == List of byte offsets for objects created by this // program. (index in order of creation). // embedcount == number of embedded files added by program. // void updateRootObject(ostream& out, int rootobjnum, int initialoffset, Array& objectoffsets, Array& objectversions, Array& rootstring, Array& objectindex, Array& offsetindex, int embedcount) { SSTREAM newroot; newroot << "\n"; // objectoffsets[rootobjnum] = initialoffset + newroot.str().length(); objectindex.append(rootobjnum); int tval = newroot.str().length() + initialoffset; offsetindex.append(tval); int ndobjectnumber = objectoffsets.getSize() + embedcount*2; int ndversion = 0; Array entry; entry.setSize(1024); sprintf(entry.getBase(), " /Names %d %d R ", ndobjectnumber, ndversion); entry.setSize(strlen(entry.getBase())+1); addDictionaryEntry(rootstring, entry); int i; for (i=0; i>\n"; namedict << "endobj\n"; initialoffset += namedict.str().length(); out << namedict.str() << flush; // add the embedded file listing: objectindex.append(embedlistobjnum); SSTREAM embedlist; embedlist << "\n"; tempoffset = initialoffset + embedlist.str().length(); offsetindex.append(tempoffset); embedlist << embedlistobjnum << " 0 obj\n"; embedlist << "<<\n"; embedlist << " /Names [\n"; int tempobjnum; char nullchar = (char)0; for (i=0; i>\n"; embedlist << "endobj\n"; initialoffset += embedlist.str().length(); out << embedlist.str() << flush; } ////////////////////////////// // // updateNamesObject -- Adds an EmbeddedFiles entry into the Root's // Name Dictionary, or creates an EmbeddedFiles entry if none // exists. // 261 0 obj // << // /Dests 254 0 R // /EmbeddedFiles 400 0 R % Added Embedded Files list // >> // endobj // int updateNamesObject(ostream& out, Array& objectindex, Array& offsetindex, int initialoffset, ifstream& file, int nextobject, int ndoffset) { SSTREAM ndstream; getObject(ndstream, file, ndoffset); Array ndstring; ndstring.setSize(ndstream.str().length()+1); int i; for (i=0; i<(int)ndstream.str().length(); i++) { ndstring[i] = ndstream.str()[i]; } ndstring[ndstring.getSize()-1] = '\0'; PerlRegularExpression pre; // char buffer[128] = {0}; Array entry; entry.setSize(1000); if (pre.search(ndstring.getBase(), "/EmbeddedFiles\\s+(\\d+)\\s+(\\d+)\\s+R", "")) { // nothing to change in Name Dictionary, just // go to the list of embedded files... // int iobject = atol(pre.getSubmatch(1)); // nextobject = updateEmbeddedFileList(out, objectindex, offsetindex, // initialoffset, file, nextobject, offsetindex[iobject]) // ggg } else { // Add an /EmbeddedFiles entry to the Name Dictionary int assignednum = nextobject++; int version = 0; sprintf(entry.getBase(), " /EmbeddedFiles %d %d R ", assignednum, version); entry.setSize(strlen(entry.getBase())+1); addDictionaryEntry(ndstring, entry); // print the new Root's name dictionay object // and create a list of embedded files. // ggg } return nextobject; } ////////////////////////////// // // addDictionaryEntry -- add an entry to a dictionary. You should // do a check before calling this function to make sure that // the name key is not already in the dictionary. // void addDictionaryEntry(Array& objectstring, Array& entry) { int level = 0; Array newobject; newobject.setSize(objectstring.getSize()+entry.getSize()+100); newobject.setGrowth(1000); newobject.setSize(0); int inserted = 0; char ch; int i, j; // char buffer[128] = {0}; // int plen; for (i=0; i') { level--; } if (inserted || (level != 2)) { continue; } inserted = 1; for (j=0; j') { level--; } if (level <= 0) { if (endstate[endindex] == ch) { endindex++; } else { endindex = 0; } if (endindex == endtarget) { out << ch; return; } } out << ch; file.get(ch); } cerr << "ERROR: end of Object had strange error" << endl; exit(1); return; } ////////////////////////////// // // buildIndirectObjectIndex -- make a list of all of the indirect objects // found in the file (listed in the xref section(s). Ignore indirect // objects which have been updated. Currently object versions is not // used, but might need to be in a fully general (and rare) case. // void buildIndirectObjectIndex(Array& objectoffsets, Array& objectversions, ifstream& file, int xrefoffset) { char ch; file.seekg(xrefoffset, ios::beg); file.get(ch); if (ch != 'x') { cerr << "ERROR: expected 'x' from xref, but found: " << ch << endl; cerr << "int value is: " << (int)ch << endl; cerr << "Position in file: " << file.tellg() << endl; cerr << "xref offset is: " << xrefoffset << endl; exit(1); } file.get(ch); if (ch != 'r') { cerr << "ERROR: expected 'r' from xref, but found: " << ch << endl; exit(1); } file.get(ch); if (ch != 'e') { cerr << "ERROR: expected 'e' from xref, but found: " << ch << endl; exit(1); } file.get(ch); if (ch != 'f') { cerr << "ERROR: expected 'f' from xref, but found: " << ch << endl; exit(1); } while (!file.eof()) { file.get(ch); while (!file.eof() && !isdigit(ch) && (ch != 't')) { file.get(ch); } if (file.eof() || (ch == 't')) { break; } else { processXrefList(objectoffsets, objectversions, file, ch); } } if (file.eof()) { return; } if (ch != 't') { cerr << "ERROR: expecting trailer after xref, but find: " << ch << endl; exit(1); } // Now search the trailer for an entry of the form: // /Prev 144425 // This is a byte offset of the previous xref entry, so call this // function again with that offset value. If no /Prev entry is // found, before "%%EOF" string is found, then return to calling function. int previous = getPreviousOffset(file); if (previous >= 0) { buildIndirectObjectIndex(objectoffsets, objectversions, file, previous); } else{ return; } } ////////////////////////////// // // getPreviousOffset -- search through a trailer until "%%EOF" is found, // looking for a /Prev entry. If there is a /Prev entry, then return // the number following it. The file is set to the first 'r' in "trailer" // when this function is called. // int getPreviousOffset(ifstream& file) { char ch; if (file.eof()) { cerr << "ERROR: file EOF while starting /Prev search" << endl; exit(1); } file.get(ch); if (ch != 'r') { cerr << "ERROR: unexpected character reading trailer: " << ch << endl; exit(1); } int level = 0; int eofindex = 0; int previndex = 0; char eofstate[128] = {0}; char prevstate[128] = {0}; strcpy(eofstate, "%%EOF"); strcpy(prevstate, "/Prev"); int prevtarget = strlen(prevstate); int eoftarget = strlen(eofstate); file.get(ch); while (!file.eof()) { if (ch == '<') { level++; } else if (ch == '>') { level--; } if (level == 2) { if (prevstate[previndex] == ch) { previndex++; } else { previndex = 0; } if (previndex == prevtarget) { break; } } else if (level <= 0) { if (eofstate[eofindex] == ch) { eofindex++; } else { eofindex = 0; } if (eofindex == eoftarget) { // found %%EOF, but did not find /Prev entry return -1; } } file.get(ch); } if (previndex != prevtarget) { cerr << "ERROR: strange thing happened in getPreviousOffset()" << endl; exit(1); } // skip any non-digit characters: file.get(ch); while (!file.eof() && !isdigit(ch)) { if (!isspace(ch)) { cerr << "ERROR: found a strange character after /Prev in trailer: " << ch << endl; } file.get(ch); } if (file.eof()) { return -1; } int value = ch - '0'; file.get(ch); while (!file.eof() && isdigit(ch)) { value *= 10; value += ch - '0'; file.get(ch); } return value; } ////////////////////////////// // // processXrefList -- get a byte offset listing which start // with two numbers: the first object in the following list, // then the count of the objects listed. Following these two numbers, // the list offsets for each entry are given. // Example: // xref // 0 1 // 0000000000 65535 f // 12 2 // 0000003054 00000 n // 0000003206 00000 n // void processXrefList(Array& objectoffsets, Array& objectversions, ifstream& file, char firstdigit) { char ch; int startindex = firstdigit - '0'; file.get(ch); while (!file.eof() && isdigit(ch)) { startindex *= 10; startindex += ch - '0'; file.get(ch); } // now read the count of objects; // previous character should be a space and is not a digit. int objectcount = 0; file.get(ch); while (!file.eof() && isdigit(ch)) { objectcount *= 10; objectcount += ch - '0'; file.get(ch); } int currentobject = startindex; char buffer[128] = {0}; int offset = 0; int tempval = 0; // now read the list of objects: int i; while (!file.eof() && ((ch == 0x0a) || (ch == 0x0d))) { file.get(ch); } if (ch != '0') { cerr << "ERROR: funny error reading xref entry: " << ch << endl; exit(1); } tempval = file.tellg(); tempval--; file.seekg(tempval, ios::beg); for (i=0; i& objectindex, Array& offsetindex, int filesize) { SSTREAM out; // temporary code for testing: // filesize = 0; int output = 0; out << "\n"; output = out.str().length(); out << "xref\n"; // don't need the null object: // out << "0 1\n"; // out << "0000000000 65535 f" << (char)0x0d << (char)0x0a; // output the starting object number in a sequence // and then how many follow, then the offset and version numbers // followed by " n" 0x0d 0x0a. if (objectindex.getSize() == 0) { return output; } int i; int currenti = 0; int currentlen = getSequentialObjectCount(objectindex, currenti); int value; while (currenti < objectindex.getSize()) { out << objectindex[currenti] << " " << currentlen << "\n"; for (i=currenti; i& list, int starti) { int i; int output = 1; for (i=starti+1; i& trailerstring, int newprevoffset) { int level = 0; Array newtrailer; newtrailer.setSize(trailerstring.getSize()+1000); newtrailer.setGrowth(1000); newtrailer.setSize(0); PerlRegularExpression pre; int prevprinted = 0; char ch; int i, j; char buffer[128] = {0}; int plen; for (i=0; i') { level--; } if (level != 2) { newtrailer.append(trailerstring[i]); continue; } if (trailerstring[i] != '/') { newtrailer.append(trailerstring[i]); continue; } if (pre.search(trailerstring.getBase()+i, "^(/Prev\\s+\\d+)", "")) { const char* ptr = pre.getSubmatch(1); plen = strlen(ptr); i+= plen-1; sprintf(buffer, "/Prev %d", newprevoffset); j = 0; while (buffer[j] != '\0') { newtrailer.append(buffer[j++]); } prevprinted = 1; continue; } newtrailer.append(trailerstring[i]); continue; } ch = '\0'; newtrailer.append(ch); if (!prevprinted) { // need to insert a /Prev entry (at end of dictionary) level = 0; for (i=newtrailer.getSize()-1; i>=0; i--) { if (newtrailer[i] == '>') { level++; } if (newtrailer[i] == '<') { level--; } if (level != 2) { continue; } sprintf(buffer, " /Prev %d\n", newprevoffset); plen = strlen(buffer); int oldlen = newtrailer.getSize(); newtrailer.setSize(oldlen+plen); int newlen = newtrailer.getSize(); for (j=0; j& objectindex, Array& offsetindex, int initialoffset) { SSTREAM datatoencode; infile.write(datatoencode); nextobject = printStreamObject(out, nextobject, datatoencode, filename, objectindex, offsetindex, initialoffset); return nextobject; } ////////////////////////////// // // getOriginalTrailer -- // void getOriginalTrailer(SSTREAM& trailer, int filesize, int xref, ifstream& file) { int i = filesize - 5; int level = 0; int founddictionary = 0; int endindex = -1; int startindex = -1; char ch; for (i=filesize-15; i>0; i--) { file.seekg(i, ios::beg); file.get(ch); if (ch == '>') { level++; if ((level == 1) && (founddictionary == 0)) { founddictionary = 1; endindex = i; } continue; } if (ch == '<') { level--; } if (level > 0) { continue; } if (level < 0) { cerr << "ERROR: unbalanced brackets in PDF trailer at byte offset " << i << endl; exit(1); } if (ch == 'r') { // checking for "trailer" ==> "reliart" i--; file.seekg(i, ios::beg); file.get(ch); if (!(ch == 'e')) { continue; } i--; file.seekg(i, ios::beg); file.get(ch); if (!(ch == 'l')) { continue; } i--; file.seekg(i, ios::beg); file.get(ch); if (!(ch == 'i')) { continue; } i--; file.seekg(i, ios::beg); file.get(ch); if (!(ch == 'a')) { continue; } i--; file.seekg(i, ios::beg); file.get(ch); if (!(ch == 'r')) { continue; } i--; file.seekg(i, ios::beg); file.get(ch); if (!(ch == 't')) { continue; } startindex = i; break; } } if (startindex <= 0) { cerr << "ERROR: Could not find the start of the trailer" << endl; exit(1); } file.seekg(startindex, ios::beg); for (i=startindex; i<=endindex; i++) { file.get(ch); trailer << ch; } } ///////////////////////////// // // printStreamObject -- // int printStreamObject(ostream& finalout, int objnum, SSTREAM& datatoencode, const char* filename, Array& objectindex, Array& offsetindex, int initialoffset) { SSTREAM streamcontents; SSTREAM out; createStreamData(streamcontents, datatoencode, filename); int contentsize = streamcontents.str().length(); char newline = 0x0a; int version = 0; int initiallen = out.str().length(); // print the embedded file content stream object /////////////////// out << "\n"; objectindex.append(objnum); int offset = out.str().length() - initiallen; offset += initialoffset; offsetindex.append(offset); out << objnum++ << " " << version << " obj\n"; out << "<<\n"; out << " /Type /EmbeddedFile\n"; out << " /SubType /application#2fx-humdrum\n"; out << " /Length " << contentsize << "\n"; struct stat attrib; stat(filename, &attrib); struct tm* moddate; moddate = gmtime(&(attrib.st_mtime)); out << " /Params\n"; out << " <<\n"; out << " /CreationDate ("; printPdfDate(out, moddate); // example: D:20050727132644-04'00' out << ")\n"; out << " /ModDate ("; printPdfDate(out, moddate); // example D:20050727143111-04'00' out << ")\n"; out << " /Size " << datatoencode.str().length() << "\n"; out << " /CheckSum <"; getMD5Sum(out, datatoencode); // such as 5C94A7BE7C695C70271E29A26B5705C1 out << ">\n"; out << " >>\n"; out << ">>\n"; out << "stream" << newline; out << streamcontents.str(); int len1 = streamcontents.str().length(); if ((streamcontents.str()[len1-1] != 0x0a) && (streamcontents.str()[len1-1] != 0x0d)) { out << "\n"; } out << "endstream\n"; out << "endobj\n"; // print the file spec object: ///////////////////////////////////// Array outfilename; int len = strlen(filename); outfilename.setSize(1000 + len); outfilename.setGrowth(1000); strcpy(outfilename.getBase(), filename); outfilename.setSize(len+1); PerlRegularExpression pre; if (!keepdirQ) { pre.sar(outfilename, ".*/", "", "g"); } out << "\n"; objectindex.append(objnum); offset = out.str().length() - initiallen; offset += initialoffset; offsetindex.append(offset); out << objnum << " " << version << " obj\n"; objnum++; out << "<<\n"; out << " /Type /Filespec\n"; out << " /F ("; if (prefixQ) { out << prefix; } out << outfilename.getBase(); out << ")\n"; out << " /EF << /F " << objnum-2 << " 0 R >>\n"; // object with contents out << " /Desc (Embedded Humdrum File)\n"; // descripion of file out << ">>\n"; out << "endobj\n"; finalout << out.str(); return objnum; } ////////////////////////////// // // printPdfDate -- time is printed in UTC plus deviation from UTC for // localtime. // example D:20050727143111-04'00' // D:yyyymmddhhmmss-HH'MM' // void printPdfDate(ostream& out, struct tm* date) { char buffer[128] = {0}; strftime(buffer, 128, "D:%Y%m%d%H%M%S", date); out << buffer; // print time zone information (need to check on daylight savings) int value = timezone; char sign = '-'; if (timezone < 0) { value = -timezone; } int hour = value / 3600; int min = value - hour * 3600; if (min < 0) { min = 0; } out << sign; if (hour < 10) { out << "0"; } out << hour << "'"; if (min < 10) { out << "0"; } out << min << "'"; } ////////////////////////////// // // createStreamData -- // void createStreamData(ostream& out, SSTREAM& datatoencode, const char* filename) { out << datatoencode.str(); } void createStreamDataOld(ostream& out, SSTREAM& datatoencode, const char* filename) { datatoencode << ends; out << "\n"; string sss = datatoencode.str(); // out << "Length of string " << sss.length() << endl; int count = sss.length() - 1; int packets = count / 3; // deal with leftovers later... int i; for (i=0; i\n"; } ////////////////////////////// // // getFileSize -- // int getFileSize(ifstream& file) { file.seekg(0, ios::end); int filesize = (int)file.tellg(); file.seekg(0, ios::beg); return filesize; } ////////////////////////////// // // getXrefOffset -- return the offset of the xref referred to in the trailer. // int getXrefOffset(ifstream& file) { int output = -1; file.seekg(0, ios::end); int filesize = (int)file.tellg(); int i = filesize - 4; char ch; file.seekg(i, ios::beg); file.get(ch); while ((i > 0) && (ch != '%')) { i--; file.seekg(i, ios::beg); file.get(ch); } if (i <= 0) { cerr << "ERROR extracting xref offset in file " << endl; exit(1); } file.seekg(i-1, ios::beg); file.get(ch); if (ch != '%') { cerr << "ERROR1 extracting xref offset in file " << endl; cerr << "Expected %, but found " << ch << endl; exit(1); } file.seekg(i+1, ios::beg); file.get(ch); if (ch != 'E') { cerr << "ERROR2 extracting xref offset in file " << endl; cerr << "Expected E, but found " << ch << endl; exit(1); } file.seekg(i+2, ios::beg); file.get(ch); if (ch != 'O') { cerr << "ERROR3 extracting xref offset in file " << endl; cerr << "Expected O, but found " << ch << endl; exit(1); } file.seekg(i+3, ios::beg); file.get(ch); if (ch != 'F') { cerr << "ERROR4 extracting xref offset in file " << endl; cerr << "Expected F, but found " << ch << endl; exit(1); } int bindex = 100; char buffer[128] = {0}; i = i-2; file.seekg(i, ios::beg); file.get(ch); while ((i>0) && (!isdigit(ch))) { i--; file.seekg(i, ios::beg); file.get(ch); } if (!isdigit(ch)) { cerr << "ERROR5 extracting xref offset in file " << endl; cerr << "Expected digit, but found " << ch << endl; exit(1); } buffer[bindex--] = ch; i--; file.seekg(i, ios::beg); file.get(ch); while ((bindex > 0) && (i>0) && (isdigit(ch))) { buffer[bindex--] = ch; i--; file.seekg(i, ios::beg); file.get(ch); } int status = sscanf(&buffer[bindex+1], "%d", &output); if (status != 1) { cerr << "ERROR6 reading number from string" << endl; cerr << "string is " << &buffer[bindex+1] << endl; exit(1); } return output; } ////////////////////////////// // // printMimeEncoding -- // void printMimeEncoding(ostream& out, int count, char char1, char char2, char char3) { static char table[64] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; out << table[char1 >> 2]; out << table[((char1 & 0x03) << 4) | (char2 >> 4)]; if (count == 1) { out << "=="; return; } out << table[((char2 & 0x0f) << 2) | (char3 >> 6)]; if (count == 2) { out << "="; return; } out << table[char3 & 0x3f]; } ////////////////////////////// // // checkOptions -- validate and process command-line options. // void checkOptions(Options& opts, int argc, char* argv[]) { opts.define("p|pdf=s:", "PDF file on which file(s) will be attached"); opts.define("A|append-only=b", "output only data to append to PDF"); opts.define("debug=b", "print debugging statements to standard error"); opts.define("D|keep-directory=b", "keep directory in filename"); opts.define("d|directory=b", "append directory path to filename"); opts.define("P|prefix=s:", "prepend path to written filename"); opts.define("author=b", "author of program"); opts.define("version=b", "compilation info"); opts.define("example=b", "example usages"); opts.define("help=b", "short description"); opts.process(argc, argv); // handle basic options: if (opts.getBoolean("author")) { cout << "Written by Craig Stuart Sapp, " << "craig@ccrma.stanford.edu, May 2010" << endl; exit(0); } else if (opts.getBoolean("version")) { cout << argv[0] << ", version: 5 May 2010" << endl; cout << "compiled: " << __DATE__ << endl; cout << MUSEINFO_VERSION << endl; exit(0); } else if (opts.getBoolean("help")) { usage(opts.getCommand()); exit(0); } else if (opts.getBoolean("example")) { example(); exit(0); } if (opts.getBoolean("pdf")) { pdffilename = opts.getString("pdf"); } else { cerr << "Error: -p file.pdf option is required." << endl; exit(1); } appendQ = opts.getBoolean("append-only"); debugQ = opts.getBoolean("debug"); keepdirQ = opts.getBoolean("keep-directory"); prefixQ = opts.getBoolean("prefix"); if (prefixQ) { prefix = opts.getString("prefix"); keepdirQ = 0; } } ////////////////////////////// // // example -- example usage of the quality program // void example(void) { cout << " \n" << endl; } ////////////////////////////// // // usage -- gives the usage statement for the meter program // void usage(const char* command) { cout << " \n" << endl; } /////////////////////////////////////////////////////////////////////////////// // // MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm // // Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All rights // reserved. // // License to copy and use this software is granted provided that it is // identified as the "RSA Data Security, Inc. MD5 Message-Digest Algorithm" in // all material mentioning or referencing this software or this function. // // License is also granted to make and use derivative works provided that such // works are identified as "derived from the RSA Data Security, Inc. MD5 // Message-Digest Algorithm" in all material mentioning or referencing the // derived work. // // RSA Data Security, Inc. makes no representations concerning either the // merchantability of this software or the suitability of this software for // any particular purpose. It is provided "as is" without express or implied // warranty of any kind. // // These notices must be retained in any copies of any part of this // documentation and/or software. // // #include // #include // using namespace std; struct MD5_CTX { // MD5 context unsigned long state[4]; // state (ABCD) unsigned long count[2]; // number of bits, modulo 2^64 (lsb first) unsigned char buffer[64]; // input buffer }; // Constants for MD5Transform routine. #define S11 7 #define S12 12 #define S13 17 #define S14 22 #define S21 5 #define S22 9 #define S23 14 #define S24 20 #define S31 4 #define S32 11 #define S33 16 #define S34 23 #define S41 6 #define S42 10 #define S43 15 #define S44 21 void MD5Init (MD5_CTX *context); void MD5Update (MD5_CTX *context, unsigned char *input, unsigned int inputLen); void MD5Final (unsigned char digest[16], MD5_CTX *context); void MD5Transform (unsigned long state[4], unsigned char block[64]); void Encode (unsigned char *output, unsigned long *input, unsigned int len); void Decode (unsigned long *output, unsigned char *input, unsigned int len); static unsigned char PADDING[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // F, G, H and I are basic MD5 functions. #define F(x, y, z) (((x) & (y)) | ((~x) & (z))) #define G(x, y, z) (((x) & (z)) | ((y) & (~z))) #define H(x, y, z) ((x) ^ (y) ^ (z)) #define I(x, y, z) ((y) ^ ((x) | (~z))) // ROTATE_LEFT rotates x left n bits. #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) // FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. // Rotation is separate from addition to prevent recomputation. #define FF(a, b, c, d, x, s, ac) { \ (a) += F ((b), (c), (d)) + (x) + (unsigned long)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define GG(a, b, c, d, x, s, ac) { \ (a) += G ((b), (c), (d)) + (x) + (unsigned long)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define HH(a, b, c, d, x, s, ac) { \ (a) += H ((b), (c), (d)) + (x) + (unsigned long)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define II(a, b, c, d, x, s, ac) { \ (a) += I ((b), (c), (d)) + (x) + (unsigned long)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } ////////////////////////////// // // MD5Init -- MD5 initialization. Begins an MD5 operation, writing a // new context. // void MD5Init (MD5_CTX *context) { context->count[0] = context->count[1] = 0; // Load magic initialization constants. context->state[0] = 0x67452301; context->state[1] = 0xefcdab89; context->state[2] = 0x98badcfe; context->state[3] = 0x10325476; } ////////////////////////////// // // MD5Update -- MD5 block update operation. Continues an MD5 message-digest // operation, processing another message block, and updating the context. // void MD5Update(MD5_CTX *context, unsigned char *input, unsigned int inputLen) { unsigned int i, index, partLen; // Compute number of bytes mod 64 index = (unsigned int)((context->count[0] >> 3) & 0x3F); // Update number of bits if ((context->count[0] += ((unsigned long)inputLen << 3)) < ((unsigned long)inputLen << 3)) { context->count[1]++; } context->count[1] += ((unsigned long)inputLen >> 29); partLen = 64 - index; // Transform as many times as possible. if (inputLen >= partLen) { memcpy(&context->buffer[index], input, partLen); MD5Transform (context->state, context->buffer); for (i=partLen; i+63 < inputLen; i+=64) { MD5Transform (context->state, &input[i]); } index = 0; } else { i = 0; } // Buffer remaining input memcpy (&context->buffer[index], &input[i], inputLen-i); } ////////////////////////////// // // MD5Final -- MD5 finalization. Ends an MD5 message-digest operation, // writing the the message digest and zeroizing the context. // void MD5Final(unsigned char digest[16], MD5_CTX *context) { unsigned char bits[8]; unsigned int index, padLen; // Save number of bits Encode(bits, context->count, 8); // Pad out to 56 mod 64. index = (unsigned int)((context->count[0] >> 3) & 0x3f); padLen = (index < 56) ? (56 - index) : (120 - index); MD5Update(context, PADDING, padLen); // Append length (before padding) MD5Update(context, bits, 8); // Store state in digest Encode(digest, context->state, 16); // Zeroize sensitive information. assert(sizeof(*context) == sizeof(MD5_CTX)); memset(context, 0, sizeof(*context)); } ////////////////////////////// // // MD5Transform -- MD5 basic transformation. Transforms state based on block. // void MD5Transform(unsigned long state[4], unsigned char block[64]) { unsigned long a = state[0], b = state[1], c = state[2], d = state[3], x[16]; Decode (x, block, 64); // Round 1 FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ // Round 2 GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */ GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ // Round 3 HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ // Round 4 II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ state[0] += a; state[1] += b; state[2] += c; state[3] += d; // Zeroize sensitive information. memset(x, 0, sizeof(x)); } ////////////////////////////// // // Encode -- Encodes input (unsigned long) into output (unsigned char). // Assumes len is a multiple of 4. // void Encode(unsigned char *output, unsigned long *input, unsigned int len) { unsigned int i, j; for (i=0, j=0; j> 8) & 0xff); output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); } } ////////////////////////////// // // Decode -- Decodes input (unsigned char) into output (unsigned long). // Assumes len is a multiple of 4. void Decode (unsigned long *output, unsigned char *input, unsigned int len) { unsigned int i, j; for (i=0, j=0; j buffer; buffer.setSize(data.str().length()+1); buffer[buffer.getSize()-1] = '\0'; strncpy((char*)buffer.getBase(), data.CSTRING, data.str().length()); MD5_CTX context; MD5Init(&context); int i; int count = (buffer.getSize()-1) / 64; int leftover = (buffer.getSize()-1) % 64; //leftover--; for (i=0; i 0) { MD5Update(&context, buffer.getBase() + i * count, leftover); } unsigned char digest[16] = {0}; MD5Final(digest, &context); for (i=0; i<16; i++) { out << hex << (int)digest[i] << dec; } } // md5sum: 24eb50e7fd68620d0b3f765335ac9934 humpdf.cpp [20100505]