1//========================================================================
2//
3// pdf-fullrewrite.cc
4//
5// Copyright 2007 Julien Rebetez
6// Copyright 2012 Fabio D'Urso
7// Copyright 2022 Albert Astals Cid <aacid@kde.org>
8//
9//========================================================================
10
11#include "GlobalParams.h"
12#include "Error.h"
13#include "Object.h"
14#include "PDFDoc.h"
15#include "XRef.h"
16#include "goo/GooString.h"
17#include "utils/parseargs.h"
18
19static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc);
20static bool compareObjects(const Object *objA, const Object *objB);
21
22static char ownerPassword[33] = "\001";
23static char userPassword[33] = "\001";
24static bool forceIncremental = false;
25static bool checkOutput = false;
26static bool printHelp = false;
27
28static const ArgDesc argDesc[] = { { .arg: "-opw", .kind: argString, .val: ownerPassword, .size: sizeof(ownerPassword), .usage: "owner password (for encrypted files)" },
29 { .arg: "-upw", .kind: argString, .val: userPassword, .size: sizeof(userPassword), .usage: "user password (for encrypted files)" },
30 { .arg: "-i", .kind: argFlag, .val: &forceIncremental, .size: 0, .usage: "incremental update mode" },
31 { .arg: "-check", .kind: argFlag, .val: &checkOutput, .size: 0, .usage: "verify the generated document" },
32 { .arg: "-h", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
33 { .arg: "-help", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
34 { .arg: "--help", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
35 { .arg: "-?", .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" },
36 {} };
37
38int main(int argc, char *argv[])
39{
40 PDFDoc *doc = nullptr;
41 PDFDoc *docOut = nullptr;
42 std::optional<GooString> ownerPW;
43 std::optional<GooString> userPW;
44 int res = 0;
45
46 // parse args
47 bool ok = parseArgs(args: argDesc, argc: &argc, argv);
48 if (!ok || (argc < 3) || printHelp) {
49 printUsage(program: argv[0], otherArgs: "INPUT-FILE OUTPUT-FILE", args: argDesc);
50 if (!printHelp) {
51 res = 1;
52 }
53 goto done;
54 }
55
56 if (ownerPassword[0] != '\001') {
57 ownerPW = GooString(ownerPassword);
58 }
59 if (userPassword[0] != '\001') {
60 userPW = GooString(userPassword);
61 }
62
63 // load input document
64 globalParams = std::make_unique<GlobalParams>();
65 doc = new PDFDoc(std::make_unique<GooString>(args&: argv[1]), ownerPW, userPW);
66 if (!doc->isOk()) {
67 fprintf(stderr, format: "Error loading input document\n");
68 res = 1;
69 goto done;
70 }
71
72 // save it back (in rewrite or incremental update mode)
73 if (doc->saveAs(name: *doc->getFileName(), mode: forceIncremental ? writeForceIncremental : writeForceRewrite) != 0) {
74 fprintf(stderr, format: "Error saving document\n");
75 res = 1;
76 goto done;
77 }
78
79 if (checkOutput) {
80 // open the generated document to verify it
81 docOut = new PDFDoc(std::make_unique<GooString>(args&: argv[2]), ownerPW, userPW);
82 if (!docOut->isOk()) {
83 fprintf(stderr, format: "Error loading generated document\n");
84 res = 1;
85 } else if (!compareDocuments(origDoc: doc, newDoc: docOut)) {
86 fprintf(stderr, format: "Verification failed\n");
87 res = 1;
88 }
89 }
90
91done:
92 delete docOut;
93 delete doc;
94 return res;
95}
96
97static bool compareDictionaries(Dict *dictA, Dict *dictB)
98{
99 const int length = dictA->getLength();
100 if (dictB->getLength() != length) {
101 return false;
102 }
103
104 /* Check that every key in dictA is contained in dictB.
105 * Since keys are unique and we've already checked that dictA and dictB
106 * contain the same number of entries, we don't need to check that every key
107 * in dictB is also contained in dictA */
108 for (int i = 0; i < length; ++i) {
109 const char *key = dictA->getKey(i);
110 const Object &valA = dictA->getValNF(i);
111 const Object &valB = dictB->lookupNF(key);
112 if (!compareObjects(objA: &valA, objB: &valB)) {
113 return false;
114 }
115 }
116
117 return true;
118}
119
120static bool compareObjects(const Object *objA, const Object *objB)
121{
122 switch (objA->getType()) {
123 case objBool: {
124 if (objB->getType() != objBool) {
125 return false;
126 } else {
127 return (objA->getBool() == objB->getBool());
128 }
129 }
130 case objInt:
131 case objInt64:
132 case objReal: {
133 if (!objB->isNum()) {
134 return false;
135 } else {
136 // Fuzzy comparison
137 const double diff = objA->getNum() - objB->getNum();
138 return (-0.01 < diff) && (diff < 0.01);
139 }
140 }
141 case objString: {
142 if (objB->getType() != objString) {
143 return false;
144 } else {
145 const GooString *strA = objA->getString();
146 const GooString *strB = objB->getString();
147 return (strA->cmp(str: strB) == 0);
148 }
149 }
150 case objName: {
151 if (objB->getType() != objName) {
152 return false;
153 } else {
154 GooString nameA(objA->getName());
155 GooString nameB(objB->getName());
156 return (nameA.cmp(str: &nameB) == 0);
157 }
158 }
159 case objNull: {
160 if (objB->getType() != objNull) {
161 return false;
162 } else {
163 return true;
164 }
165 }
166 case objArray: {
167 if (objB->getType() != objArray) {
168 return false;
169 } else {
170 Array *arrayA = objA->getArray();
171 Array *arrayB = objB->getArray();
172 const int length = arrayA->getLength();
173 if (arrayB->getLength() != length) {
174 return false;
175 } else {
176 for (int i = 0; i < length; ++i) {
177 const Object &elemA = arrayA->getNF(i);
178 const Object &elemB = arrayB->getNF(i);
179 if (!compareObjects(objA: &elemA, objB: &elemB)) {
180 return false;
181 }
182 }
183 return true;
184 }
185 }
186 }
187 case objDict: {
188 if (objB->getType() != objDict) {
189 return false;
190 } else {
191 Dict *dictA = objA->getDict();
192 Dict *dictB = objB->getDict();
193 return compareDictionaries(dictA, dictB);
194 }
195 }
196 case objStream: {
197 if (objB->getType() != objStream) {
198 return false;
199 } else {
200 Stream *streamA = objA->getStream();
201 Stream *streamB = objB->getStream();
202 if (!compareDictionaries(dictA: streamA->getDict(), dictB: streamB->getDict())) {
203 return false;
204 } else {
205 int c;
206 streamA->reset();
207 streamB->reset();
208 do {
209 c = streamA->getChar();
210 if (c != streamB->getChar()) {
211 return false;
212 }
213 } while (c != EOF);
214 return true;
215 }
216 }
217 return true;
218 }
219 case objRef: {
220 if (objB->getType() != objRef) {
221 return false;
222 } else {
223 const Ref refA = objA->getRef();
224 const Ref refB = objB->getRef();
225 return refA == refB;
226 }
227 }
228 default: {
229 fprintf(stderr, format: "compareObjects failed: unexpected object type %u\n", objA->getType());
230 return false;
231 }
232 }
233}
234
235static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc)
236{
237 bool result = true;
238 XRef *origXRef = origDoc->getXRef();
239 XRef *newXRef = newDoc->getXRef();
240
241 // Make sure that special flags are set in both documents
242 origXRef->scanSpecialFlags();
243 newXRef->scanSpecialFlags();
244
245 // Compare XRef tables' size
246 const int origNumObjects = origXRef->getNumObjects();
247 const int newNumObjects = newXRef->getNumObjects();
248 if (forceIncremental && origXRef->isXRefStream()) {
249 // In case of incremental update, expect a new entry to be appended to store the new XRef stream
250 if (origNumObjects + 1 != newNumObjects) {
251 fprintf(stderr, format: "XRef table: Unexpected number of entries (%d+1 != %d)\n", origNumObjects, newNumObjects);
252 result = false;
253 }
254 } else {
255 // In all other cases the number of entries must be the same
256 if (origNumObjects != newNumObjects) {
257 fprintf(stderr, format: "XRef table: Different number of entries (%d != %d)\n", origNumObjects, newNumObjects);
258 result = false;
259 }
260 }
261
262 // Compare each XRef entry
263 const int numObjects = (origNumObjects < newNumObjects) ? origNumObjects : newNumObjects;
264 for (int i = 0; i < numObjects; ++i) {
265 XRefEntryType origType = origXRef->getEntry(i)->type;
266 XRefEntryType newType = newXRef->getEntry(i)->type;
267 const int origGenNum = (origType != xrefEntryCompressed) ? origXRef->getEntry(i)->gen : 0;
268 const int newGenNum = (newType != xrefEntryCompressed) ? newXRef->getEntry(i)->gen : 0;
269
270 // Check that DontRewrite entries are freed in full rewrite mode
271 if (!forceIncremental && origXRef->getEntry(i)->getFlag(flag: XRefEntry::DontRewrite)) {
272 if (newType != xrefEntryFree || origGenNum + 1 != newGenNum) {
273 fprintf(stderr, format: "XRef entry %u: DontRewrite entry was not freed correctly\n", i);
274 result = false;
275 }
276 continue; // There's nothing left to check for this entry
277 }
278
279 // Compare generation numbers
280 // Object num 0 should always have gen 65535 according to specs, but some
281 // documents have it set to 0. We always write 65535 in output
282 if (i != 0) {
283 if (origGenNum != newGenNum) {
284 fprintf(stderr, format: "XRef entry %u: generation numbers differ (%d != %d)\n", i, origGenNum, newGenNum);
285 result = false;
286 continue;
287 }
288 } else {
289 if (newGenNum != 65535) {
290 fprintf(stderr, format: "XRef entry %u: generation number was expected to be 65535 (%d != 65535)\n", i, newGenNum);
291 result = false;
292 continue;
293 }
294 }
295
296 // Compare object flags. A failure shows that there's some error in XRef::scanSpecialFlags()
297 if (origXRef->getEntry(i)->flags != newXRef->getEntry(i)->flags) {
298 fprintf(stderr, format: "XRef entry %u: flags detected by scanSpecialFlags differ (%d != %d)\n", i, origXRef->getEntry(i)->flags, newXRef->getEntry(i)->flags);
299 result = false;
300 }
301
302 // Check that either both are free or both are in use
303 if ((origType == xrefEntryFree) != (newType == xrefEntryFree)) {
304 const char *origStatus = (origType == xrefEntryFree) ? "free" : "in use";
305 const char *newStatus = (newType == xrefEntryFree) ? "free" : "in use";
306 fprintf(stderr, format: "XRef entry %u: usage status differs (%s != %s)\n", i, origStatus, newStatus);
307 result = false;
308 continue;
309 }
310
311 // Skip free entries
312 if (origType == xrefEntryFree) {
313 continue;
314 }
315
316 // Compare contents
317 Object origObj = origXRef->fetch(num: i, gen: origGenNum);
318 Object newObj = newXRef->fetch(num: i, gen: newGenNum);
319 if (!compareObjects(objA: &origObj, objB: &newObj)) {
320 fprintf(stderr, format: "XRef entry %u: contents differ\n", i);
321 result = false;
322 }
323 }
324
325 return result;
326}
327

source code of poppler/test/pdf-fullrewrite.cc