Index: poppler/Dict.cc =================================================================== RCS file: /cvs/poppler/poppler/poppler/Dict.cc,v retrieving revision 1.4 diff -u -8 -p -r1.4 Dict.cc --- poppler/Dict.cc 18 Jan 2006 22:32:13 -0000 1.4 +++ poppler/Dict.cc 17 Aug 2006 12:52:01 -0000 @@ -50,16 +50,47 @@ void Dict::add(const UGooString &key, Ob } entries = (DictEntry *)greallocn(entries, size, sizeof(DictEntry)); } entries[length].key = new UGooString(key); entries[length].val = *val; ++length; } +void Dict::remove(const UGooString &key) { + int i; + bool found = false; + DictEntry tmp; + if(length == 0) return; + + for(i=0; ival.free(); + e->val = *val; + } else { + add (key, val); + } +} + inline DictEntry *Dict::find(const UGooString &key) { int i; for (i = 0; i < length; ++i) { if (!key.cmp(entries[i].key)) return &entries[i]; } return NULL; Index: poppler/Dict.h =================================================================== RCS file: /cvs/poppler/poppler/poppler/Dict.h,v retrieving revision 1.3 diff -u -8 -p -r1.3 Dict.h --- poppler/Dict.h 18 Jan 2006 22:32:13 -0000 1.3 +++ poppler/Dict.h 17 Aug 2006 12:52:01 -0000 @@ -38,16 +38,20 @@ public: int incRef() { return ++ref; } int decRef() { return --ref; } // Get number of entries. int getLength() { return length; } // Add an entry void add(const UGooString &key, Object *val); + // Update the value of an existing entry, otherwise create it + void set(const UGooString &key, Object *val); + // Remove an entry. This invalidate indexes + void remove(const UGooString &key); // Check if dictionary is of specified type. GBool is(char *type); // Look up an entry and return the value. Returns a null object // if is not in the dictionary. Object *lookup(const UGooString &key, Object *obj); Object *lookupNF(const UGooString &key, Object *obj); Index: poppler/Makefile.am =================================================================== RCS file: /cvs/poppler/poppler/poppler/Makefile.am,v retrieving revision 1.24 diff -u -8 -p -r1.24 Makefile.am --- poppler/Makefile.am 24 Jul 2006 19:49:51 -0000 1.24 +++ poppler/Makefile.am 17 Aug 2006 12:52:01 -0000 @@ -85,17 +85,17 @@ noinst_LTLIBRARIES = $(poppler_cairo) \ libpoppler_la_LIBADD = \ $(top_builddir)/goo/libgoo.la \ $(top_builddir)/fofi/libfofi.la \ $(splash_libs) \ $(libjpeg_libs) \ $(zlib_libs) \ $(FREETYPE_LIBS) \ - $(FONTCONFIG_LIBS) + $(FONTCONFIG_LIBS) libpoppler_la_LDFLAGS = -version-info 1:0:0 if ENABLE_XPDF_HEADERS poppler_includedir = $(includedir)/poppler poppler_include_HEADERS = \ $(splash_headers) \ Index: poppler/Object.h =================================================================== RCS file: /cvs/poppler/poppler/poppler/Object.h,v retrieving revision 1.2 diff -u -8 -p -r1.2 Object.h --- poppler/Object.h 18 Jan 2006 22:32:13 -0000 1.2 +++ poppler/Object.h 17 Aug 2006 12:52:01 -0000 @@ -160,16 +160,17 @@ public: int arrayGetLength(); void arrayAdd(Object *elem); Object *arrayGet(int i, Object *obj); Object *arrayGetNF(int i, Object *obj); // Dict accessors. int dictGetLength(); void dictAdd(const UGooString &key, Object *val); + void dictSet(const UGooString &key, Object *val); GBool dictIs(char *dictType); Object *dictLookup(const UGooString &key, Object *obj); Object *dictLookupNF(const UGooString &key, Object *obj); UGooString *dictGetKey(int i); Object *dictGetVal(int i, Object *obj); Object *dictGetValNF(int i, Object *obj); // Stream accessors. @@ -237,16 +238,19 @@ inline Object *Object::arrayGetNF(int i, #include "Dict.h" inline int Object::dictGetLength() { return dict->getLength(); } inline void Object::dictAdd(const UGooString &key, Object *val) { dict->add(key, val); } +inline void Object::dictSet(const UGooString &key, Object *val) + { dict->set(key, val); } + inline GBool Object::dictIs(char *dictType) { return dict->is(dictType); } inline GBool Object::isDict(char *dictType) { return type == objDict && dictIs(dictType); } inline Object *Object::dictLookup(const UGooString &key, Object *obj) { return dict->lookup(key, obj); } Index: poppler/PDFDoc.cc =================================================================== RCS file: /cvs/poppler/poppler/poppler/PDFDoc.cc,v retrieving revision 1.10 diff -u -8 -p -r1.10 PDFDoc.cc --- poppler/PDFDoc.cc 18 Jan 2006 22:32:13 -0000 1.10 +++ poppler/PDFDoc.cc 17 Aug 2006 12:52:02 -0000 @@ -456,31 +456,260 @@ GBool PDFDoc::isLinearized() { obj4.free(); obj3.free(); obj2.free(); obj1.free(); delete parser; return lin; } -GBool PDFDoc::saveAs(GooString *name) { +GBool PDFDoc::saveAs(GooString *name, GBool forceIncr) { FILE *f; - int c; if (!(f = fopen(name->getCString(), "wb"))) { error(-1, "Couldn't open file '%s'", name->getCString()); return gFalse; } + + if (forceIncr) + saveIncrementalUpdate(f); + else + saveCompleteRewrite(f); + + + fclose(f); + return gTrue; +} + +void PDFDoc::saveIncrementalUpdate (FILE *f) +{ + XRef *uxref; + int c; + //copy the original file str->reset(); while ((c = str->getChar()) != EOF) { fputc(c, f); } str->close(); - fclose(f); - return gTrue; + + uxref = new XRef(); + uxref->add(0, 65535, 0, gFalse); + int objectsCount = 0; //count the number of objects in the XRef(s) + for(int i=0; igetNumObjects(); i++) { + if ((xref->getEntry(i)->type == xrefEntryFree) && + (xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects + continue; + objectsCount++; + if (xref->getEntry(i)->obj) { //we have an updated object + Object obj1; + Ref ref; + ref.num = i; + ref.gen = xref->getEntry(i)->gen; + xref->fetch(ref.num, ref.gen, &obj1); + Guint offset = writeObject(&obj1, &ref, f); + uxref->add(ref.num, ref.gen, offset, gTrue); + obj1.free(); + } + } + if (uxref->getSize() == 0) { //we have nothing to update + delete uxref; + return; + } + + Guint uxrefOffset = ftell(f); + uxref->writeToFile(f); + + Ref catRef; + catRef.gen = xref->getRootGen(); + catRef.num = xref->getRootNum(); + writeTrailer(uxrefOffset, objectsCount, f, &catRef, gTrue); + + delete uxref; +} + +void PDFDoc::saveCompleteRewrite (FILE *f) +{ + fprintf(f, "%%PDF-%.1f\r\n",pdfVersion); + XRef *uxref = new XRef(); + uxref->add(0, 65535, 0, gFalse); + for(int i=0; igetNumObjects(); i++) { + Object obj1; + Ref ref; + if (xref->getEntry(i)->type == xrefEntryFree) { + ref.num = i; + ref.gen = xref->getEntry(i)->gen; + /* the XRef class add a lot of unrelevant free entries, we only want the significant one + and we don't want the one with num=0 because it has already been added (gen = 65535)*/ + if (ref.gen > 0 && ref.num > 0) + uxref->add(ref.num, ref.gen, 0, gFalse); + } else { + ref.num = i; + ref.gen = xref->getEntry(i)->gen; + xref->fetch(ref.num, ref.gen, &obj1); + Guint offset = writeObject(&obj1, &ref, f); + uxref->add(ref.num, ref.gen, offset, gTrue); + obj1.free(); + } + } + Guint uxrefOffset = ftell(f); + uxref->writeToFile(f); + + Ref catRef; + catRef.gen = xref->getRootGen(); + catRef.num = xref->getRootNum(); + writeTrailer(uxrefOffset, uxref->getSize(), f, &catRef, gFalse); + + + delete uxref; + +} + +void PDFDoc::writeDictionnary (Dict* dict, FILE *f) +{ + Object obj1; + fprintf(f,"<< "); + for (int i=0; igetLength(); i++) { + fprintf(f,"/%s ", dict->getKey(i)->getCString()); + writeObject(dict->getValNF(i, &obj1), NULL, f); + fprintf(f,"\r\n"); + + obj1.free(); + } + fprintf(f,">>\r\n"); +} + +void PDFDoc::writeStream (Stream* str, FILE *f) +{ + int c; + fprintf(f,"stream\r\n"); + str->reset(); + for (int c=str->getChar(); c!= EOF; c=str->getChar()) { + fprintf(f,"%c", c); + } + fprintf(f,"\r\nendstream\r\n"); +} + +void PDFDoc::writeString (GooString* s, FILE* f) +{ + //write hexa string + const char* c = s->getCString(); + fprintf(f, "<"); + while(*c!='\0') { + fprintf(f, "%02x", *c); + c++; + } + fprintf(f, "> "); +} + +Guint PDFDoc::writeObject (Object* obj, Ref* ref, FILE *f) +{ + Array *array; + Object obj1; + Guint offset = ftell(f); + int tmp; + + if(ref) + fprintf(f,"%i %i obj\r\n", ref->num, ref->gen); + + switch (obj->getType()) { + case objBool: + fprintf(f,"%s ", obj->getBool()?"true":"false"); + break; + case objInt: + fprintf(f,"%i ", obj->getInt()); + break; + case objReal: + fprintf(f,"%f ", obj->getReal()); + break; + case objString: + writeString(obj->getString(), f); + break; + case objName: + fprintf(f,"/%s ", obj->getName()); + break; + case objNull: + fprintf(f, "null\r\n"); + break; + case objArray: + array = obj->getArray(); + fprintf(f,"["); + for (int i=0; igetLength(); i++) { + writeObject(array->getNF(i, &obj1), NULL,f); + obj1.free(); + } + fprintf(f,"]"); + break; + case objDict: + writeDictionnary (obj->getDict(),f); + break; + case objStream: + //we write the stream unencoded => TODO: write stream encoder + obj->getStream()->reset(); + //recalculate stream length + tmp = 0; + for (int c=obj->getStream()->getChar(); c!=EOF; c=obj->getStream()->getChar()) { + tmp++; + } + obj1.initInt(tmp); + obj->getStream()->getDict()->set("Length", &obj1); + + //Remove Stream encoding + obj->getStream()->getDict()->remove("Filter"); + obj->getStream()->getDict()->remove("DecodeParms"); + + writeDictionnary (obj->getStream()->getDict(),f); + writeStream (obj->getStream(),f); + obj1.free(); + break; + case objRef: + fprintf(f,"%i %i R ", obj->getRef().num, obj->getRef().gen); + break; + case objCmd: + fprintf(f,"null\r\n"); + break; + case objError: + fprintf(f,"null\r\n"); + break; + case objEOF: + fprintf(f,"null\r\n"); + break; + case objNone: + fprintf(f,"null\r\n"); + break; + default: + error(-1,"Unhandled objType : %i, please report a bug with a testcase\r\n", obj->getType()); + break; + } + if (ref) + fprintf(f,"endobj\r\n\r\n"); + return offset; +} + +void PDFDoc::writeTrailer (Guint uxrefOffset, int uxrefSize, FILE* f, Ref* rootRef, GBool incrUpdate) +{ + Dict* trailerDict = xref->getTrailerDict()->getDict(); + Object obj1; + obj1.initInt(uxrefSize); + trailerDict->set("Size", &obj1); + obj1.free(); + if (incrUpdate) { + obj1.initInt(xref->getLastXRefPos()); + trailerDict->set("Prev", &obj1); + obj1.free(); + } + if(rootRef) { + obj1.initRef(rootRef->num, rootRef->gen); + trailerDict->set("Root", &obj1); + obj1.free(); + } + fprintf(f, "trailer\r\n"); + writeDictionnary(trailerDict, f); + fprintf(f, "startxref\r\n"); + fprintf(f, "%i\r\n", uxrefOffset); + fprintf(f, "%%%%EOF\r\n"); } void PDFDoc::getLinks(Page *page) { Object obj; links = new Links(page->getAnnots(&obj), catalog->getBaseURI()); obj.free(); } Index: poppler/PDFDoc.h =================================================================== RCS file: /cvs/poppler/poppler/poppler/PDFDoc.h,v retrieving revision 1.7 diff -u -8 -p -r1.7 PDFDoc.h --- poppler/PDFDoc.h 18 Jan 2006 22:32:13 -0000 1.7 +++ poppler/PDFDoc.h 17 Aug 2006 12:52:02 -0000 @@ -158,23 +158,31 @@ public: // Return the document's Info dictionary (if any). Object *getDocInfo(Object *obj) { return xref->getDocInfo(obj); } Object *getDocInfoNF(Object *obj) { return xref->getDocInfoNF(obj); } // Return the PDF version specified by the file. double getPDFVersion() { return pdfVersion; } - // Save this file with another name. - GBool saveAs(GooString *name); + // Save this file with another name + GBool saveAs(GooString *name, GBool forceIncr=gFalse); // Return a pointer to the GUI (XPDFCore or WinPDFCore object). void *getGUIData() { return guiData; } private: + // Add object to current file stream and return the offset of the beginning of the object + Guint writeObject (Object *obj, Ref *ref, FILE* f); + void writeDictionnary (Dict* dict, FILE* f); + void writeStream (Stream* str, FILE* f); + void writeTrailer (Guint uxrefOffset, int uxrefSize, FILE* f, Ref* rootRef, GBool incrUpdate); + void writeString (GooString* s, FILE* f); + void saveIncrementalUpdate (FILE* f); + void saveCompleteRewrite (FILE* f); GBool setup(GooString *ownerPassword, GooString *userPassword); GBool checkFooter(); void checkHeader(); GBool checkEncryption(GooString *ownerPassword, GooString *userPassword); void getLinks(Page *page); GooString *fileName; Index: poppler/XRef.cc =================================================================== RCS file: /cvs/poppler/poppler/poppler/XRef.cc,v retrieving revision 1.13 diff -u -8 -p -r1.13 XRef.cc --- poppler/XRef.cc 1 Apr 2006 11:25:57 -0000 1.13 +++ poppler/XRef.cc 17 Aug 2006 12:52:04 -0000 @@ -196,16 +196,26 @@ Object *ObjectStream::getObject(int objI } return objs[objIdx].copy(obj); } //------------------------------------------------------------------------ // XRef //------------------------------------------------------------------------ +XRef::XRef() { + ok = gTrue; + errCode = errNone; + entries = NULL; + size = 0; + streamEnds = NULL; + streamEndsLen = 0; + objStr = NULL; +} + XRef::XRef(BaseStream *strA) { Guint pos; Object obj; ok = gTrue; errCode = errNone; size = 0; entries = NULL; @@ -259,17 +269,22 @@ XRef::XRef(BaseStream *strA) { } // now set the trailer dictionary's xref pointer so we can fetch // indirect objects from it trailerDict.getDict()->setXRef(this); } XRef::~XRef() { + for(int i=0; igetObj(&obj)->isInt()) { goto err1; } entry.offset = (Guint)obj.getInt(); obj.free(); if (!parser->getObj(&obj)->isInt()) { goto err1; } entry.gen = obj.getInt(); + entry.obj = NULL; obj.free(); parser->getObj(&obj); if (obj.isCmd("n")) { entry.type = xrefEntryUncompressed; } else if (obj.isCmd("f")) { entry.type = xrefEntryFree; } else { goto err1; @@ -502,16 +519,17 @@ GBool XRef::readXRefStream(Stream *xrefS if (newSize * (int)sizeof(XRefEntry)/sizeof(XRefEntry) != newSize) { error(-1, "Invalid 'size' parameter."); return gFalse; } entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); for (i = size; i < newSize; ++i) { entries[i].offset = 0xffffffff; entries[i].type = xrefEntryFree; + entries[i].obj = NULL; } size = newSize; } if (!dict->lookupNF("W", &obj)->isArray() || obj.arrayGetLength() < 3) { goto err1; } @@ -596,16 +614,17 @@ GBool XRef::readXRefStreamSection(Stream if (newSize*(int)sizeof(XRefEntry)/sizeof(XRefEntry) != newSize) { error(-1, "Invalid 'size' inside xref table."); return gFalse; } entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); for (i = size; i < newSize; ++i) { entries[i].offset = 0xffffffff; entries[i].type = xrefEntryFree; + entries[i].obj = NULL; } size = newSize; } for (i = first; i < first + n; ++i) { if (w[0] == 0) { type = 1; } else { for (type = 0, j = 0; j < w[0]; ++j) { @@ -736,16 +755,17 @@ GBool XRef::constructXRef() { error(-1, "Invalid 'obj' parameters."); return gFalse; } entries = (XRefEntry *) greallocn(entries, newSize, sizeof(XRefEntry)); for (i = size; i < newSize; ++i) { entries[i].offset = 0xffffffff; entries[i].type = xrefEntryFree; + entries[i].obj = NULL; } size = newSize; } if (entries[num].type == xrefEntryFree || gen >= entries[num].gen) { entries[num].offset = pos - start; entries[num].gen = gen; entries[num].type = xrefEntryUncompressed; @@ -843,18 +863,22 @@ Object *XRef::fetch(int num, int gen, Ob XRefEntry *e; Parser *parser; Object obj1, obj2, obj3; // check for bogus ref - this can happen in corrupted PDF files if (num < 0 || num >= size) { goto err; } - + e = &entries[num]; + if(e->obj) { //check for updated object + obj = e->obj->copy(obj); + return obj; + } switch (e->type) { case xrefEntryUncompressed: if (e->gen != gen) { goto err; } obj1.initNull(); parser = new Parser(this, @@ -962,8 +986,70 @@ Guint XRef::strToUnsigned(char *s) { int i; x = 0; for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) { x = 10 * x + (*p - '0'); } return x; } + +void XRef::add(int num, int gen, Guint offs, GBool used) { + size += 1; + entries = (XRefEntry *)greallocn(entries, size, sizeof(XRefEntry)); + XRefEntry *e = &entries[size-1]; + + e->gen = gen; + e->num = num; + e->obj = NULL; + if (used) { + e->type = xrefEntryUncompressed; + e->offset = offs; + } else { + e->type = xrefEntryFree; + e->offset = 0; + } +} + +void XRef::setModifiedObject (Object* o, Ref r) { + if (r.num < 0 || r.num >= size) { + error(-1,"XRef::setModifiedObject on unknown ref: %i, %i\n", r.num, r.gen); + return; + } + entries[r.num].obj = new Object(); + o->copy(entries[r.num].obj); +} + +//used to sort the entries +int compare (const void* a, const void* b) +{ + return (((XRefEntry*)a)->num - ((XRefEntry*)b)->num); +} + +void XRef::writeToFile(FILE* file) { + qsort(entries, size, sizeof(XRefEntry), compare); + //create free entries linked-list + if (entries[0].gen != 65535) { + error(-1, "XRef::writeToFile, entry 0 of the XRef is invalid (gen != 65535)\n"); + } + int lastFreeEntry = 0; + for (int i=0; i array int rootNum, rootGen; // catalog dict Index: test/Makefile.am =================================================================== RCS file: /cvs/poppler/poppler/test/Makefile.am,v retrieving revision 1.7 diff -u -8 -p -r1.7 Makefile.am --- test/Makefile.am 12 Apr 2006 02:07:07 -0000 1.7 +++ test/Makefile.am 17 Aug 2006 12:52:04 -0000 @@ -21,26 +21,29 @@ pdf_inspector = \ cairo_includes = \ $(CAIRO_CFLAGS) \ $(FREETYPE_CFLAGS) endif endif +pdf_modification_test = \ + pdf-modification-test + INCLUDES = \ -I$(top_srcdir) \ -I$(top_srcdir)/poppler \ -I$(top_srcdir)/glib \ -I$(top_builddir)/glib \ $(cairo_includes) \ $(GTK_TEST_CFLAGS) -noinst_PROGRAMS = $(gtk_splash_test) $(gtk_cairo_test) $(pdf_inspector) +noinst_PROGRAMS = $(gtk_splash_test) $(gtk_cairo_test) $(pdf_inspector) $(pdf_modification_test) gtk_splash_test_SOURCES = \ gtk-splash-test.cc gtk_splash_test_LDADD = \ $(top_builddir)/poppler/libpoppler.la \ $(GTK_TEST_LIBS) @@ -58,10 +61,17 @@ pdf_inspector_SOURCES = \ pdf_inspector_LDADD = \ $(top_builddir)/poppler/libpoppler.la \ $(top_builddir)/poppler/libpoppler-cairo.la \ $(CAIRO_LIBS) \ $(FREETYPE_LIBS) \ $(GTK_TEST_LIBS) +pdf_modification_test_SOURCES = \ + pdf-modification-test.cc + +pdf_modification_test_LDADD = \ + $(top_builddir)/poppler/libpoppler.la + + EXTRA_DIST = \ pdf-operators.c Index: test/pdf-modification-test.cc =================================================================== RCS file: test/pdf-modification-test.cc diff -N test/pdf-modification-test.cc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ test/pdf-modification-test.cc 17 Aug 2006 12:52:04 -0000 @@ -0,0 +1,87 @@ +//======================================================================== +// +// pdftotext.cc +// +// Copyright 2006 Julien Rebetez +// +//======================================================================== + +#include "config.h" +#include +#include +#include +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "TextOutputDev.h" +#include "CharTypes.h" +#include "UnicodeMap.h" +#include "Error.h" +#include "UGooString.h" + +void modifyCatalogLang(PDFDoc *doc, GooString* lang); + +int main(int argc, char **argv) +{ + PDFDoc *doc; + GBool ok; + GooString *inputFileName; + GooString *newLang; + GooString *outputs[3]; + + // parse args + if (argc < 3 || argc > 4) { + fprintf(stderr, "pdf-modification-test\n"); + printf("usage: \n"); + return 0; + } + + inputFileName = new GooString(argv[1]); + newLang = new GooString(argv[3]); + outputs[0] = new GooString(argv[2]); outputs[0]->append("/no_update.pdf"); + outputs[1] = new GooString(argv[2]); outputs[1]->append("/update_full_rewrite.pdf"); + outputs[2] = new GooString(argv[2]); outputs[2]->append("/update_incremental.pdf"); + doc = new PDFDoc(inputFileName, NULL, NULL); + doc->saveAs(outputs[0]); + modifyCatalogLang(doc, newLang); + doc->saveAs(outputs[1], gFalse); + doc->saveAs(outputs[2], gTrue); + + delete inputFileName; + delete newLang; + for(int i=0; i<3; i++) delete outputs[i]; + delete doc; +} + +void modifyCatalogLang(PDFDoc* doc, GooString* lang) +{ + Object *catalog; + XRef *xref = doc->getXRef(); + //set the "Lang" entry of the Catalog to fr-CH + catalog = xref->getCatalog(catalog); + + //print current lang + Object o; + catalog->dictLookup("Lang", &o); + if(o.isString()) printf("input Lang: %s\n", o.getString()->getCString()); + if(catalog->isDict()) { + Object* val = new Object(); + val->initString(lang); + printf("output Lang: %s\n", val->getString()->getCString()); + catalog->dictSet("Lang", val); + } + Ref newRef; + newRef.num = xref->getRootNum(); + newRef.gen = xref->getRootGen(); + xref->setModifiedObject(catalog, newRef); +} + +