1 | //======================================================================== |
2 | // |
3 | // pdf-fullrewrite.cc |
4 | // |
5 | // Copyright 2007 Julien Rebetez |
6 | // Copyright 2012 Fabio D'Urso |
7 | // Copyright 2022 Albert Astals Cid <aacid@kde.org> |
8 | // |
9 | //======================================================================== |
10 | |
11 | #include "GlobalParams.h" |
12 | #include "Error.h" |
13 | #include "Object.h" |
14 | #include "PDFDoc.h" |
15 | #include "XRef.h" |
16 | #include "goo/GooString.h" |
17 | #include "utils/parseargs.h" |
18 | |
19 | static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc); |
20 | static bool compareObjects(const Object *objA, const Object *objB); |
21 | |
22 | static char ownerPassword[33] = "\001" ; |
23 | static char userPassword[33] = "\001" ; |
24 | static bool forceIncremental = false; |
25 | static bool checkOutput = false; |
26 | static bool printHelp = false; |
27 | |
28 | static const ArgDesc argDesc[] = { { .arg: "-opw" , .kind: argString, .val: ownerPassword, .size: sizeof(ownerPassword), .usage: "owner password (for encrypted files)" }, |
29 | { .arg: "-upw" , .kind: argString, .val: userPassword, .size: sizeof(userPassword), .usage: "user password (for encrypted files)" }, |
30 | { .arg: "-i" , .kind: argFlag, .val: &forceIncremental, .size: 0, .usage: "incremental update mode" }, |
31 | { .arg: "-check" , .kind: argFlag, .val: &checkOutput, .size: 0, .usage: "verify the generated document" }, |
32 | { .arg: "-h" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
33 | { .arg: "-help" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
34 | { .arg: "--help" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
35 | { .arg: "-?" , .kind: argFlag, .val: &printHelp, .size: 0, .usage: "print usage information" }, |
36 | {} }; |
37 | |
38 | int main(int argc, char *argv[]) |
39 | { |
40 | PDFDoc *doc = nullptr; |
41 | PDFDoc *docOut = nullptr; |
42 | std::optional<GooString> ownerPW; |
43 | std::optional<GooString> userPW; |
44 | int res = 0; |
45 | |
46 | // parse args |
47 | bool ok = parseArgs(args: argDesc, argc: &argc, argv); |
48 | if (!ok || (argc < 3) || printHelp) { |
49 | printUsage(program: argv[0], otherArgs: "INPUT-FILE OUTPUT-FILE" , args: argDesc); |
50 | if (!printHelp) { |
51 | res = 1; |
52 | } |
53 | goto done; |
54 | } |
55 | |
56 | if (ownerPassword[0] != '\001') { |
57 | ownerPW = GooString(ownerPassword); |
58 | } |
59 | if (userPassword[0] != '\001') { |
60 | userPW = GooString(userPassword); |
61 | } |
62 | |
63 | // load input document |
64 | globalParams = std::make_unique<GlobalParams>(); |
65 | doc = new PDFDoc(std::make_unique<GooString>(args&: argv[1]), ownerPW, userPW); |
66 | if (!doc->isOk()) { |
67 | fprintf(stderr, format: "Error loading input document\n" ); |
68 | res = 1; |
69 | goto done; |
70 | } |
71 | |
72 | // save it back (in rewrite or incremental update mode) |
73 | if (doc->saveAs(name: *doc->getFileName(), mode: forceIncremental ? writeForceIncremental : writeForceRewrite) != 0) { |
74 | fprintf(stderr, format: "Error saving document\n" ); |
75 | res = 1; |
76 | goto done; |
77 | } |
78 | |
79 | if (checkOutput) { |
80 | // open the generated document to verify it |
81 | docOut = new PDFDoc(std::make_unique<GooString>(args&: argv[2]), ownerPW, userPW); |
82 | if (!docOut->isOk()) { |
83 | fprintf(stderr, format: "Error loading generated document\n" ); |
84 | res = 1; |
85 | } else if (!compareDocuments(origDoc: doc, newDoc: docOut)) { |
86 | fprintf(stderr, format: "Verification failed\n" ); |
87 | res = 1; |
88 | } |
89 | } |
90 | |
91 | done: |
92 | delete docOut; |
93 | delete doc; |
94 | return res; |
95 | } |
96 | |
97 | static bool compareDictionaries(Dict *dictA, Dict *dictB) |
98 | { |
99 | const int length = dictA->getLength(); |
100 | if (dictB->getLength() != length) { |
101 | return false; |
102 | } |
103 | |
104 | /* Check that every key in dictA is contained in dictB. |
105 | * Since keys are unique and we've already checked that dictA and dictB |
106 | * contain the same number of entries, we don't need to check that every key |
107 | * in dictB is also contained in dictA */ |
108 | for (int i = 0; i < length; ++i) { |
109 | const char *key = dictA->getKey(i); |
110 | const Object &valA = dictA->getValNF(i); |
111 | const Object &valB = dictB->lookupNF(key); |
112 | if (!compareObjects(objA: &valA, objB: &valB)) { |
113 | return false; |
114 | } |
115 | } |
116 | |
117 | return true; |
118 | } |
119 | |
120 | static bool compareObjects(const Object *objA, const Object *objB) |
121 | { |
122 | switch (objA->getType()) { |
123 | case objBool: { |
124 | if (objB->getType() != objBool) { |
125 | return false; |
126 | } else { |
127 | return (objA->getBool() == objB->getBool()); |
128 | } |
129 | } |
130 | case objInt: |
131 | case objInt64: |
132 | case objReal: { |
133 | if (!objB->isNum()) { |
134 | return false; |
135 | } else { |
136 | // Fuzzy comparison |
137 | const double diff = objA->getNum() - objB->getNum(); |
138 | return (-0.01 < diff) && (diff < 0.01); |
139 | } |
140 | } |
141 | case objString: { |
142 | if (objB->getType() != objString) { |
143 | return false; |
144 | } else { |
145 | const GooString *strA = objA->getString(); |
146 | const GooString *strB = objB->getString(); |
147 | return (strA->cmp(str: strB) == 0); |
148 | } |
149 | } |
150 | case objName: { |
151 | if (objB->getType() != objName) { |
152 | return false; |
153 | } else { |
154 | GooString nameA(objA->getName()); |
155 | GooString nameB(objB->getName()); |
156 | return (nameA.cmp(str: &nameB) == 0); |
157 | } |
158 | } |
159 | case objNull: { |
160 | if (objB->getType() != objNull) { |
161 | return false; |
162 | } else { |
163 | return true; |
164 | } |
165 | } |
166 | case objArray: { |
167 | if (objB->getType() != objArray) { |
168 | return false; |
169 | } else { |
170 | Array *arrayA = objA->getArray(); |
171 | Array *arrayB = objB->getArray(); |
172 | const int length = arrayA->getLength(); |
173 | if (arrayB->getLength() != length) { |
174 | return false; |
175 | } else { |
176 | for (int i = 0; i < length; ++i) { |
177 | const Object &elemA = arrayA->getNF(i); |
178 | const Object &elemB = arrayB->getNF(i); |
179 | if (!compareObjects(objA: &elemA, objB: &elemB)) { |
180 | return false; |
181 | } |
182 | } |
183 | return true; |
184 | } |
185 | } |
186 | } |
187 | case objDict: { |
188 | if (objB->getType() != objDict) { |
189 | return false; |
190 | } else { |
191 | Dict *dictA = objA->getDict(); |
192 | Dict *dictB = objB->getDict(); |
193 | return compareDictionaries(dictA, dictB); |
194 | } |
195 | } |
196 | case objStream: { |
197 | if (objB->getType() != objStream) { |
198 | return false; |
199 | } else { |
200 | Stream *streamA = objA->getStream(); |
201 | Stream *streamB = objB->getStream(); |
202 | if (!compareDictionaries(dictA: streamA->getDict(), dictB: streamB->getDict())) { |
203 | return false; |
204 | } else { |
205 | int c; |
206 | streamA->reset(); |
207 | streamB->reset(); |
208 | do { |
209 | c = streamA->getChar(); |
210 | if (c != streamB->getChar()) { |
211 | return false; |
212 | } |
213 | } while (c != EOF); |
214 | return true; |
215 | } |
216 | } |
217 | return true; |
218 | } |
219 | case objRef: { |
220 | if (objB->getType() != objRef) { |
221 | return false; |
222 | } else { |
223 | const Ref refA = objA->getRef(); |
224 | const Ref refB = objB->getRef(); |
225 | return refA == refB; |
226 | } |
227 | } |
228 | default: { |
229 | fprintf(stderr, format: "compareObjects failed: unexpected object type %u\n" , objA->getType()); |
230 | return false; |
231 | } |
232 | } |
233 | } |
234 | |
235 | static bool compareDocuments(PDFDoc *origDoc, PDFDoc *newDoc) |
236 | { |
237 | bool result = true; |
238 | XRef *origXRef = origDoc->getXRef(); |
239 | XRef *newXRef = newDoc->getXRef(); |
240 | |
241 | // Make sure that special flags are set in both documents |
242 | origXRef->scanSpecialFlags(); |
243 | newXRef->scanSpecialFlags(); |
244 | |
245 | // Compare XRef tables' size |
246 | const int origNumObjects = origXRef->getNumObjects(); |
247 | const int newNumObjects = newXRef->getNumObjects(); |
248 | if (forceIncremental && origXRef->isXRefStream()) { |
249 | // In case of incremental update, expect a new entry to be appended to store the new XRef stream |
250 | if (origNumObjects + 1 != newNumObjects) { |
251 | fprintf(stderr, format: "XRef table: Unexpected number of entries (%d+1 != %d)\n" , origNumObjects, newNumObjects); |
252 | result = false; |
253 | } |
254 | } else { |
255 | // In all other cases the number of entries must be the same |
256 | if (origNumObjects != newNumObjects) { |
257 | fprintf(stderr, format: "XRef table: Different number of entries (%d != %d)\n" , origNumObjects, newNumObjects); |
258 | result = false; |
259 | } |
260 | } |
261 | |
262 | // Compare each XRef entry |
263 | const int numObjects = (origNumObjects < newNumObjects) ? origNumObjects : newNumObjects; |
264 | for (int i = 0; i < numObjects; ++i) { |
265 | XRefEntryType origType = origXRef->getEntry(i)->type; |
266 | XRefEntryType newType = newXRef->getEntry(i)->type; |
267 | const int origGenNum = (origType != xrefEntryCompressed) ? origXRef->getEntry(i)->gen : 0; |
268 | const int newGenNum = (newType != xrefEntryCompressed) ? newXRef->getEntry(i)->gen : 0; |
269 | |
270 | // Check that DontRewrite entries are freed in full rewrite mode |
271 | if (!forceIncremental && origXRef->getEntry(i)->getFlag(flag: XRefEntry::DontRewrite)) { |
272 | if (newType != xrefEntryFree || origGenNum + 1 != newGenNum) { |
273 | fprintf(stderr, format: "XRef entry %u: DontRewrite entry was not freed correctly\n" , i); |
274 | result = false; |
275 | } |
276 | continue; // There's nothing left to check for this entry |
277 | } |
278 | |
279 | // Compare generation numbers |
280 | // Object num 0 should always have gen 65535 according to specs, but some |
281 | // documents have it set to 0. We always write 65535 in output |
282 | if (i != 0) { |
283 | if (origGenNum != newGenNum) { |
284 | fprintf(stderr, format: "XRef entry %u: generation numbers differ (%d != %d)\n" , i, origGenNum, newGenNum); |
285 | result = false; |
286 | continue; |
287 | } |
288 | } else { |
289 | if (newGenNum != 65535) { |
290 | fprintf(stderr, format: "XRef entry %u: generation number was expected to be 65535 (%d != 65535)\n" , i, newGenNum); |
291 | result = false; |
292 | continue; |
293 | } |
294 | } |
295 | |
296 | // Compare object flags. A failure shows that there's some error in XRef::scanSpecialFlags() |
297 | if (origXRef->getEntry(i)->flags != newXRef->getEntry(i)->flags) { |
298 | fprintf(stderr, format: "XRef entry %u: flags detected by scanSpecialFlags differ (%d != %d)\n" , i, origXRef->getEntry(i)->flags, newXRef->getEntry(i)->flags); |
299 | result = false; |
300 | } |
301 | |
302 | // Check that either both are free or both are in use |
303 | if ((origType == xrefEntryFree) != (newType == xrefEntryFree)) { |
304 | const char *origStatus = (origType == xrefEntryFree) ? "free" : "in use" ; |
305 | const char *newStatus = (newType == xrefEntryFree) ? "free" : "in use" ; |
306 | fprintf(stderr, format: "XRef entry %u: usage status differs (%s != %s)\n" , i, origStatus, newStatus); |
307 | result = false; |
308 | continue; |
309 | } |
310 | |
311 | // Skip free entries |
312 | if (origType == xrefEntryFree) { |
313 | continue; |
314 | } |
315 | |
316 | // Compare contents |
317 | Object origObj = origXRef->fetch(num: i, gen: origGenNum); |
318 | Object newObj = newXRef->fetch(num: i, gen: newGenNum); |
319 | if (!compareObjects(objA: &origObj, objB: &newObj)) { |
320 | fprintf(stderr, format: "XRef entry %u: contents differ\n" , i); |
321 | result = false; |
322 | } |
323 | } |
324 | |
325 | return result; |
326 | } |
327 | |