1 | //======================================================================== |
2 | // |
3 | // PDFDoc.cc |
4 | // |
5 | // Copyright 1996-2003 Glyph & Cog, LLC |
6 | // |
7 | //======================================================================== |
8 | |
9 | //======================================================================== |
10 | // |
11 | // Modified under the Poppler project - http://poppler.freedesktop.org |
12 | // |
13 | // All changes made under the Poppler project to this file are licensed |
14 | // under GPL version 2 or later |
15 | // |
16 | // Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net> |
17 | // Copyright (C) 2005, 2007-2009, 2011-2024 Albert Astals Cid <aacid@kde.org> |
18 | // Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org> |
19 | // Copyright (C) 2008, 2010 Pino Toscano <pino@kde.org> |
20 | // Copyright (C) 2008, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org> |
21 | // Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca> |
22 | // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net> |
23 | // Copyright (C) 2009, 2011 Axel Struebing <axel.struebing@freenet.de> |
24 | // Copyright (C) 2010-2012, 2014 Hib Eris <hib@hiberis.nl> |
25 | // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net> |
26 | // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com> |
27 | // Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com> |
28 | // Copyright (C) 2010 Philip Lorenz <lorenzph+freedesktop@gmail.com> |
29 | // Copyright (C) 2011-2016 Thomas Freitag <Thomas.Freitag@alfa.de> |
30 | // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it> |
31 | // Copyright (C) 2013, 2014, 2017 Adrian Johnson <ajohnson@redneon.com> |
32 | // Copyright (C) 2013, 2018 Adam Reichold <adamreichold@myopera.com> |
33 | // Copyright (C) 2014 Bogdan Cristea <cristeab@gmail.com> |
34 | // Copyright (C) 2015 Li Junling <lijunling@sina.com> |
35 | // Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com> |
36 | // Copyright (C) 2015 André Esser <bepandre@hotmail.com> |
37 | // Copyright (C) 2016, 2020 Jakub Alba <jakubalba@gmail.com> |
38 | // Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr> |
39 | // Copyright (C) 2017 Fredrik Fornwall <fredrik@fornwall.net> |
40 | // Copyright (C) 2018 Ben Timby <btimby@gmail.com> |
41 | // Copyright (C) 2018 Evangelos Foutras <evangelos@foutrelis.com> |
42 | // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
43 | // Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org> |
44 | // Copyright (C) 2018 Philipp Knechtges <philipp-dev@knechtges.com> |
45 | // Copyright (C) 2019 Christian Persch <chpe@src.gnome.org> |
46 | // Copyright (C) 2020 Nelson Benítez León <nbenitezl@gmail.com> |
47 | // Copyright (C) 2020 Thorsten Behrens <Thorsten.Behrens@CIB.de> |
48 | // Copyright (C) 2020 Adam Sampson <ats@offog.org> |
49 | // Copyright (C) 2021-2024 Oliver Sander <oliver.sander@tu-dresden.de> |
50 | // Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com> |
51 | // Copyright (C) 2021 RM <rm+git@arcsin.org> |
52 | // Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net. |
53 | // Copyright (C) 2021-2022 Marek Kasik <mkasik@redhat.com> |
54 | // Copyright (C) 2022 Felix Jung <fxjung@posteo.de> |
55 | // Copyright (C) 2022 crt <chluo@cse.cuhk.edu.hk> |
56 | // Copyright (C) 2022 Erich E. Hoover <erich.e.hoover@gmail.com> |
57 | // Copyright (C) 2023 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
58 | // |
59 | // To see a description of the changes please see the Changelog file that |
60 | // came with your tarball or type make ChangeLog if you are building from git |
61 | // |
62 | //======================================================================== |
63 | |
64 | #include <config.h> |
65 | #include <poppler-config.h> |
66 | |
67 | #include <cctype> |
68 | #include <clocale> |
69 | #include <cstdio> |
70 | #include <cerrno> |
71 | #include <climits> |
72 | #include <cstdlib> |
73 | #include <cstddef> |
74 | #include <cstring> |
75 | #include <ctime> |
76 | #include <iomanip> |
77 | #include <regex> |
78 | #include <sstream> |
79 | #include <sys/stat.h> |
80 | #include "goo/glibc.h" |
81 | #include "goo/gstrtod.h" |
82 | #include "goo/GooString.h" |
83 | #include "goo/gfile.h" |
84 | #include "poppler-config.h" |
85 | #include "GlobalParams.h" |
86 | #include "Page.h" |
87 | #include "Catalog.h" |
88 | #include "Stream.h" |
89 | #include "XRef.h" |
90 | #include "Linearization.h" |
91 | #include "Link.h" |
92 | #include "OutputDev.h" |
93 | #include "Error.h" |
94 | #include "Lexer.h" |
95 | #include "Parser.h" |
96 | #include "SecurityHandler.h" |
97 | #include "Decrypt.h" |
98 | #include "Outline.h" |
99 | #include "PDFDoc.h" |
100 | #include "Hints.h" |
101 | #include "UTF.h" |
102 | #include "FlateEncoder.h" |
103 | #include "JSInfo.h" |
104 | #include "ImageEmbeddingUtils.h" |
105 | |
106 | //------------------------------------------------------------------------ |
107 | |
108 | struct FILECloser |
109 | { |
110 | void operator()(FILE *f) { fclose(stream: f); } |
111 | }; |
112 | |
113 | //------------------------------------------------------------------------ |
114 | |
115 | #define \ |
116 | 1024 // read this many bytes at beginning of |
117 | // file to look for '%PDF' |
118 | #define pdfIdLength 32 // PDF Document IDs (PermanentId, UpdateId) length |
119 | |
120 | #define linearizationSearchSize \ |
121 | 1024 // read this many bytes at beginning of |
122 | // file to look for linearization |
123 | // dictionary |
124 | |
125 | #define xrefSearchSize \ |
126 | 1024 // read this many bytes at end of file |
127 | // to look for 'startxref' |
128 | |
129 | //------------------------------------------------------------------------ |
130 | // PDFDoc |
131 | //------------------------------------------------------------------------ |
132 | |
133 | #define pdfdocLocker() const std::scoped_lock locker(mutex) |
134 | |
135 | PDFDoc::PDFDoc() { } |
136 | |
137 | PDFDoc::PDFDoc(std::unique_ptr<GooString> &&fileNameA, const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback) |
138 | : fileName(std::move(fileNameA)), guiData(guiDataA) |
139 | { |
140 | #ifdef _WIN32 |
141 | const int n = fileName->getLength(); |
142 | fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t)); |
143 | for (int i = 0; i < n; ++i) { |
144 | fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff); |
145 | } |
146 | fileNameU[n] = L'\0'; |
147 | |
148 | wchar_t *wFileName = (wchar_t *)utf8ToUtf16(fileName->c_str()); |
149 | file = GooFile::open(wFileName); |
150 | gfree(wFileName); |
151 | #else |
152 | file = GooFile::open(fileName: fileName->toStr()); |
153 | #endif |
154 | |
155 | if (!file) { |
156 | // fopen() has failed. |
157 | // Keep a copy of the errno returned by fopen so that it can be |
158 | // referred to later. |
159 | fopenErrno = errno; |
160 | error(category: errIO, pos: -1, msg: "Couldn't open file '{0:t}': {1:s}." , fileName.get(), strerror(errno)); |
161 | errCode = errOpenFile; |
162 | return; |
163 | } |
164 | |
165 | // create stream |
166 | str = new FileStream(file.get(), 0, false, file->size(), Object(objNull)); |
167 | |
168 | ok = setup(ownerPassword, userPassword, xrefReconstructedCallback); |
169 | } |
170 | |
171 | #ifdef _WIN32 |
172 | PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback) : guiData(guiDataA) |
173 | { |
174 | OSVERSIONINFO version; |
175 | |
176 | // save both Unicode and 8-bit copies of the file name |
177 | GooString *fileNameG = new GooString(); |
178 | fileNameU = (wchar_t *)gmallocn(fileNameLen + 1, sizeof(wchar_t)); |
179 | for (int i = 0; i < fileNameLen; ++i) { |
180 | fileNameG->append((char)fileNameA[i]); |
181 | fileNameU[i] = fileNameA[i]; |
182 | } |
183 | fileName.reset(fileNameG); |
184 | fileNameU[fileNameLen] = L'\0'; |
185 | |
186 | // try to open file |
187 | // NB: _wfopen is only available in NT |
188 | version.dwOSVersionInfoSize = sizeof(version); |
189 | GetVersionEx(&version); |
190 | if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) { |
191 | file = GooFile::open(fileNameU); |
192 | } else { |
193 | file = GooFile::open(fileName->toStr()); |
194 | } |
195 | if (!file) { |
196 | error(errIO, -1, "Couldn't open file '{0:t}'" , fileName.get()); |
197 | errCode = errOpenFile; |
198 | return; |
199 | } |
200 | |
201 | // create stream |
202 | str = new FileStream(file.get(), 0, false, file->size(), Object(objNull)); |
203 | |
204 | ok = setup(ownerPassword, userPassword, xrefReconstructedCallback); |
205 | } |
206 | #endif |
207 | |
208 | PDFDoc::PDFDoc(BaseStream *strA, const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, void *guiDataA, const std::function<void()> &xrefReconstructedCallback) : guiData(guiDataA) |
209 | { |
210 | if (strA->getFileName()) { |
211 | fileName.reset(p: strA->getFileName()->copy()); |
212 | #ifdef _WIN32 |
213 | const int n = fileName->getLength(); |
214 | fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t)); |
215 | for (int i = 0; i < n; ++i) { |
216 | fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff); |
217 | } |
218 | fileNameU[n] = L'\0'; |
219 | #endif |
220 | } |
221 | str = strA; |
222 | ok = setup(ownerPassword, userPassword, xrefReconstructedCallback); |
223 | } |
224 | |
225 | bool PDFDoc::setup(const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, const std::function<void()> &xrefReconstructedCallback) |
226 | { |
227 | pdfdocLocker(); |
228 | |
229 | if (str->getLength() <= 0) { |
230 | error(category: errSyntaxError, pos: -1, msg: "Document stream is empty" ); |
231 | errCode = errDamaged; |
232 | return false; |
233 | } |
234 | |
235 | str->setPos(pos: 0, dir: -1); |
236 | if (str->getPos() < 0) { |
237 | error(category: errSyntaxError, pos: -1, msg: "Document base stream is not seekable" ); |
238 | errCode = errFileIO; |
239 | return false; |
240 | } |
241 | |
242 | str->reset(); |
243 | |
244 | // check footer |
245 | // Adobe does not seem to enforce %%EOF, so we do the same |
246 | // if (!checkFooter()) return false; |
247 | |
248 | // check header |
249 | checkHeader(); |
250 | |
251 | bool wasReconstructed = false; |
252 | |
253 | // read xref table |
254 | xref = new XRef(str, getStartXRef(), getMainXRefEntriesOffset(), &wasReconstructed, false, xrefReconstructedCallback); |
255 | if (!xref->isOk()) { |
256 | if (wasReconstructed) { |
257 | delete xref; |
258 | startXRefPos = -1; |
259 | xref = new XRef(str, getStartXRef(tryingToReconstruct: true), getMainXRefEntriesOffset(tryingToReconstruct: true), &wasReconstructed, false, xrefReconstructedCallback); |
260 | } |
261 | if (!xref->isOk()) { |
262 | error(category: errSyntaxError, pos: -1, msg: "Couldn't read xref table" ); |
263 | errCode = xref->getErrorCode(); |
264 | return false; |
265 | } |
266 | } |
267 | |
268 | // check for encryption |
269 | if (!checkEncryption(ownerPassword, userPassword)) { |
270 | errCode = errEncrypted; |
271 | return false; |
272 | } |
273 | |
274 | // read catalog |
275 | catalog = new Catalog(this); |
276 | if (catalog && !catalog->isOk()) { |
277 | if (!wasReconstructed) { |
278 | // try one more time to construct the Catalog, maybe the problem is damaged XRef |
279 | delete catalog; |
280 | delete xref; |
281 | xref = new XRef(str, 0, 0, nullptr, true, xrefReconstructedCallback); |
282 | catalog = new Catalog(this); |
283 | } |
284 | |
285 | if (catalog && !catalog->isOk()) { |
286 | error(category: errSyntaxError, pos: -1, msg: "Couldn't read page catalog" ); |
287 | errCode = errBadCatalog; |
288 | return false; |
289 | } |
290 | } |
291 | |
292 | // Extract PDF Subtype information |
293 | extractPDFSubtype(); |
294 | |
295 | // done |
296 | return true; |
297 | } |
298 | |
299 | PDFDoc::~PDFDoc() |
300 | { |
301 | if (pageCache) { |
302 | for (int i = 0; i < getNumPages(); i++) { |
303 | if (pageCache[i]) { |
304 | delete pageCache[i]; |
305 | } |
306 | } |
307 | gfree(p: pageCache); |
308 | } |
309 | delete secHdlr; |
310 | delete outline; |
311 | delete catalog; |
312 | delete xref; |
313 | delete hints; |
314 | delete linearization; |
315 | delete str; |
316 | #ifdef _WIN32 |
317 | gfree(fileNameU); |
318 | #endif |
319 | } |
320 | |
321 | // Check for a %%EOF at the end of this stream |
322 | bool PDFDoc::() |
323 | { |
324 | // we look in the last 1024 chars because Adobe does the same |
325 | char *eof = new char[1025]; |
326 | Goffset pos = str->getPos(); |
327 | str->setPos(pos: 1024, dir: -1); |
328 | int i, ch; |
329 | for (i = 0; i < 1024; i++) { |
330 | ch = str->getChar(); |
331 | if (ch == EOF) { |
332 | break; |
333 | } |
334 | eof[i] = ch; |
335 | } |
336 | eof[i] = '\0'; |
337 | |
338 | bool found = false; |
339 | for (i = i - 5; i >= 0; i--) { |
340 | if (strncmp(s1: &eof[i], s2: "%%EOF" , n: 5) == 0) { |
341 | found = true; |
342 | break; |
343 | } |
344 | } |
345 | if (!found) { |
346 | error(category: errSyntaxError, pos: -1, msg: "Document has not the mandatory ending %%EOF" ); |
347 | errCode = errDamaged; |
348 | delete[] eof; |
349 | return false; |
350 | } |
351 | delete[] eof; |
352 | str->setPos(pos); |
353 | return true; |
354 | } |
355 | |
356 | // Check for a PDF header on this stream. Skip past some garbage |
357 | // if necessary. |
358 | void PDFDoc::() |
359 | { |
360 | char hdrBuf[headerSearchSize + 1]; |
361 | char *p; |
362 | char *tokptr; |
363 | int i; |
364 | int bytesRead; |
365 | |
366 | headerPdfMajorVersion = 0; |
367 | headerPdfMinorVersion = 0; |
368 | |
369 | // read up to headerSearchSize bytes from the beginning of the document |
370 | for (i = 0; i < headerSearchSize; ++i) { |
371 | const int c = str->getChar(); |
372 | if (c == EOF) { |
373 | break; |
374 | } |
375 | hdrBuf[i] = c; |
376 | } |
377 | bytesRead = i; |
378 | hdrBuf[bytesRead] = '\0'; |
379 | |
380 | // find the start of the PDF header if it exists and parse the version |
381 | bool = false; |
382 | for (i = 0; i < bytesRead - 5; ++i) { |
383 | if (!strncmp(s1: &hdrBuf[i], s2: "%PDF-" , n: 5)) { |
384 | headerFound = true; |
385 | break; |
386 | } |
387 | } |
388 | if (!headerFound) { |
389 | error(category: errSyntaxWarning, pos: -1, msg: "May not be a PDF file (continuing anyway)" ); |
390 | return; |
391 | } |
392 | str->moveStart(delta: i); |
393 | if (!(p = strtok_r(s: &hdrBuf[i + 5], delim: " \t\n\r" , save_ptr: &tokptr))) { |
394 | error(category: errSyntaxWarning, pos: -1, msg: "May not be a PDF file (continuing anyway)" ); |
395 | return; |
396 | } |
397 | sscanf(s: p, format: "%d.%d" , &headerPdfMajorVersion, &headerPdfMinorVersion); |
398 | // We don't do the version check. Don't add it back in. |
399 | } |
400 | |
401 | bool PDFDoc::checkEncryption(const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword) |
402 | { |
403 | bool encrypted; |
404 | bool ret; |
405 | |
406 | Object encrypt = xref->getTrailerDict()->dictLookup(key: "Encrypt" ); |
407 | if ((encrypted = encrypt.isDict())) { |
408 | if ((secHdlr = SecurityHandler::make(docA: this, encryptDictA: &encrypt))) { |
409 | if (secHdlr->isUnencrypted()) { |
410 | // no encryption |
411 | ret = true; |
412 | } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) { |
413 | // authorization succeeded |
414 | xref->setEncryption(permFlagsA: secHdlr->getPermissionFlags(), ownerPasswordOkA: secHdlr->getOwnerPasswordOk(), fileKeyA: secHdlr->getFileKey(), keyLengthA: secHdlr->getFileKeyLength(), encVersionA: secHdlr->getEncVersion(), encRevisionA: secHdlr->getEncRevision(), encAlgorithmA: secHdlr->getEncAlgorithm()); |
415 | ret = true; |
416 | } else { |
417 | // authorization failed |
418 | ret = false; |
419 | } |
420 | } else { |
421 | // couldn't find the matching security handler |
422 | ret = false; |
423 | } |
424 | } else { |
425 | // document is not encrypted |
426 | ret = true; |
427 | } |
428 | return ret; |
429 | } |
430 | |
431 | static PDFSubtypePart pdfPartFromString(PDFSubtype subtype, const std::string &pdfsubver) |
432 | { |
433 | const std::regex regex("PDF/(?:A|X|VT|E|UA)-([[:digit:]])(?:[[:alpha:]]{1,2})?:?([[:digit:]]{4})?" ); |
434 | std::smatch match; |
435 | PDFSubtypePart subtypePart = subtypePartNone; |
436 | |
437 | if (std::regex_search(s: pdfsubver, m&: match, e: regex)) { |
438 | int date = 0; |
439 | const int part = std::stoi(str: match.str(sub: 1)); |
440 | |
441 | if (match[2].matched) { |
442 | date = std::stoi(str: match.str(sub: 2)); |
443 | } |
444 | |
445 | switch (subtype) { |
446 | case subtypePDFX: |
447 | switch (part) { |
448 | case 1: |
449 | switch (date) { |
450 | case 2001: |
451 | default: |
452 | subtypePart = subtypePart1; |
453 | break; |
454 | case 2003: |
455 | subtypePart = subtypePart4; |
456 | break; |
457 | } |
458 | break; |
459 | case 2: |
460 | subtypePart = subtypePart5; |
461 | break; |
462 | case 3: |
463 | switch (date) { |
464 | case 2002: |
465 | default: |
466 | subtypePart = subtypePart3; |
467 | break; |
468 | case 2003: |
469 | subtypePart = subtypePart6; |
470 | break; |
471 | } |
472 | break; |
473 | case 4: |
474 | subtypePart = subtypePart7; |
475 | break; |
476 | case 5: |
477 | subtypePart = subtypePart8; |
478 | break; |
479 | } |
480 | break; |
481 | default: |
482 | subtypePart = (PDFSubtypePart)part; |
483 | break; |
484 | } |
485 | } |
486 | |
487 | return subtypePart; |
488 | } |
489 | |
490 | static PDFSubtypeConformance pdfConformanceFromString(const std::string &pdfsubver) |
491 | { |
492 | const std::regex regex("PDF/(?:A|X|VT|E|UA)-[[:digit:]]([[:alpha:]]+)" ); |
493 | std::smatch match; |
494 | PDFSubtypeConformance pdfConf = subtypeConfNone; |
495 | |
496 | // match contains the PDF conformance (A, B, G, N, P, PG or U) |
497 | if (std::regex_search(s: pdfsubver, m&: match, e: regex)) { |
498 | GooString *conf = new GooString(match.str(sub: 1)); |
499 | // Convert to lowercase as the conformance may appear in both cases |
500 | conf->lowerCase(); |
501 | if (conf->cmp(sA: "a" ) == 0) { |
502 | pdfConf = subtypeConfA; |
503 | } else if (conf->cmp(sA: "b" ) == 0) { |
504 | pdfConf = subtypeConfB; |
505 | } else if (conf->cmp(sA: "g" ) == 0) { |
506 | pdfConf = subtypeConfG; |
507 | } else if (conf->cmp(sA: "n" ) == 0) { |
508 | pdfConf = subtypeConfN; |
509 | } else if (conf->cmp(sA: "p" ) == 0) { |
510 | pdfConf = subtypeConfP; |
511 | } else if (conf->cmp(sA: "pg" ) == 0) { |
512 | pdfConf = subtypeConfPG; |
513 | } else if (conf->cmp(sA: "u" ) == 0) { |
514 | pdfConf = subtypeConfU; |
515 | } else { |
516 | pdfConf = subtypeConfNone; |
517 | } |
518 | delete conf; |
519 | } |
520 | |
521 | return pdfConf; |
522 | } |
523 | |
524 | void PDFDoc::() |
525 | { |
526 | pdfSubtype = subtypeNull; |
527 | pdfPart = subtypePartNull; |
528 | pdfConformance = subtypeConfNull; |
529 | |
530 | std::unique_ptr<GooString> pdfSubtypeVersion; |
531 | // Find PDF InfoDict subtype key if any |
532 | if ((pdfSubtypeVersion = getDocInfoStringEntry(key: "GTS_PDFA1Version" ))) { |
533 | pdfSubtype = subtypePDFA; |
534 | } else if ((pdfSubtypeVersion = getDocInfoStringEntry(key: "GTS_PDFEVersion" ))) { |
535 | pdfSubtype = subtypePDFE; |
536 | } else if ((pdfSubtypeVersion = getDocInfoStringEntry(key: "GTS_PDFUAVersion" ))) { |
537 | pdfSubtype = subtypePDFUA; |
538 | } else if ((pdfSubtypeVersion = getDocInfoStringEntry(key: "GTS_PDFVTVersion" ))) { |
539 | pdfSubtype = subtypePDFVT; |
540 | } else if ((pdfSubtypeVersion = getDocInfoStringEntry(key: "GTS_PDFXVersion" ))) { |
541 | pdfSubtype = subtypePDFX; |
542 | } else { |
543 | pdfSubtype = subtypeNone; |
544 | pdfPart = subtypePartNone; |
545 | pdfConformance = subtypeConfNone; |
546 | return; |
547 | } |
548 | |
549 | // Extract part from version string |
550 | pdfPart = pdfPartFromString(subtype: pdfSubtype, pdfsubver: pdfSubtypeVersion->toStr()); |
551 | |
552 | // Extract conformance from version string |
553 | pdfConformance = pdfConformanceFromString(pdfsubver: pdfSubtypeVersion->toStr()); |
554 | } |
555 | |
556 | static void addSignatureFieldsToVector(FormField *ff, std::vector<FormFieldSignature *> &res) |
557 | { |
558 | if (ff->getNumChildren() == 0) { |
559 | if (ff->getType() == formSignature) { |
560 | res.push_back(x: static_cast<FormFieldSignature *>(ff)); |
561 | } |
562 | } else { |
563 | for (int i = 0; i < ff->getNumChildren(); ++i) { |
564 | FormField *children = ff->getChildren(i); |
565 | addSignatureFieldsToVector(ff: children, res); |
566 | } |
567 | } |
568 | } |
569 | |
570 | std::vector<FormFieldSignature *> PDFDoc::getSignatureFields() |
571 | { |
572 | // Unfortunately there's files with signatures in Forms but not in Annots |
573 | // and files with signatures in Annots but no in forms so we need to search both |
574 | std::vector<FormFieldSignature *> res; |
575 | |
576 | // First search |
577 | const Form *f = catalog->getForm(); |
578 | if (f) { |
579 | const int nRootFields = f->getNumFields(); |
580 | for (int i = 0; i < nRootFields; ++i) { |
581 | FormField *ff = f->getRootField(i); |
582 | addSignatureFieldsToVector(ff, res); |
583 | } |
584 | } |
585 | |
586 | // Second search |
587 | for (int page = 1; page <= getNumPages(); ++page) { |
588 | Page *p = getPage(page); |
589 | if (p) { |
590 | const std::unique_ptr<FormPageWidgets> pw = p->getFormWidgets(); |
591 | for (int i = 0; i < pw->getNumWidgets(); ++i) { |
592 | FormWidget *fw = pw->getWidget(i); |
593 | if (fw->getType() == formSignature) { |
594 | assert(fw->getField()->getType() == formSignature); |
595 | FormFieldSignature *ffs = static_cast<FormFieldSignature *>(fw->getField()); |
596 | if (std::find(first: res.begin(), last: res.end(), val: ffs) == res.end()) { |
597 | res.push_back(x: ffs); |
598 | } |
599 | } |
600 | } |
601 | } |
602 | } |
603 | |
604 | return res; |
605 | } |
606 | |
607 | void PDFDoc::displayPage(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data), void *abortCheckCbkData, |
608 | bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData, bool copyXRef) |
609 | { |
610 | if (globalParams->getPrintCommands()) { |
611 | printf(format: "***** page %d *****\n" , page); |
612 | } |
613 | |
614 | if (getPage(page)) { |
615 | getPage(page)->display(out, hDPI, vDPI, rotate, useMediaBox, crop, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef); |
616 | } |
617 | } |
618 | |
619 | void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data), void *abortCheckCbkData, |
620 | bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData) |
621 | { |
622 | int page; |
623 | |
624 | for (page = firstPage; page <= lastPage; ++page) { |
625 | displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData); |
626 | } |
627 | } |
628 | |
629 | void PDFDoc::displayPageSlice(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, int sliceX, int sliceY, int sliceW, int sliceH, bool (*abortCheckCbk)(void *data), |
630 | void *abortCheckCbkData, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data), void *annotDisplayDecideCbkData, bool copyXRef) |
631 | { |
632 | if (getPage(page)) { |
633 | getPage(page)->displaySlice(out, hDPI, vDPI, rotate, useMediaBox, crop, sliceX, sliceY, sliceW, sliceH, printing, abortCheckCbk, abortCheckCbkData, annotDisplayDecideCbk, annotDisplayDecideCbkData, copyXRef); |
634 | } |
635 | } |
636 | |
637 | std::unique_ptr<Links> PDFDoc::getLinks(int page) |
638 | { |
639 | Page *p = getPage(page); |
640 | if (!p) { |
641 | return std::make_unique<Links>(args: nullptr); |
642 | } |
643 | return p->getLinks(); |
644 | } |
645 | |
646 | void PDFDoc::processLinks(OutputDev *out, int page) |
647 | { |
648 | if (getPage(page)) { |
649 | getPage(page)->processLinks(out); |
650 | } |
651 | } |
652 | |
653 | Linearization *PDFDoc::getLinearization() |
654 | { |
655 | if (!linearization) { |
656 | linearization = new Linearization(str); |
657 | linearizationState = 0; |
658 | } |
659 | return linearization; |
660 | } |
661 | |
662 | bool PDFDoc::checkLinearization() |
663 | { |
664 | if (linearization == nullptr) { |
665 | return false; |
666 | } |
667 | if (linearizationState == 1) { |
668 | return true; |
669 | } |
670 | if (linearizationState == 2) { |
671 | return false; |
672 | } |
673 | if (!hints) { |
674 | hints = new Hints(str, linearization, getXRef(), secHdlr); |
675 | } |
676 | if (!hints->isOk()) { |
677 | linearizationState = 2; |
678 | return false; |
679 | } |
680 | for (int page = 1; page <= linearization->getNumPages(); page++) { |
681 | Ref ; |
682 | |
683 | pageRef.num = hints->getPageObjectNum(page); |
684 | if (!pageRef.num) { |
685 | linearizationState = 2; |
686 | return false; |
687 | } |
688 | |
689 | // check for bogus ref - this can happen in corrupted PDF files |
690 | if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) { |
691 | linearizationState = 2; |
692 | return false; |
693 | } |
694 | |
695 | pageRef.gen = xref->getEntry(i: pageRef.num)->gen; |
696 | Object obj = xref->fetch(ref: pageRef); |
697 | if (!obj.isDict(dictType: "Page" )) { |
698 | linearizationState = 2; |
699 | return false; |
700 | } |
701 | } |
702 | linearizationState = 1; |
703 | return true; |
704 | } |
705 | |
706 | bool PDFDoc::isLinearized(bool tryingToReconstruct) |
707 | { |
708 | if ((str->getLength()) && (getLinearization()->getLength() == str->getLength())) { |
709 | return true; |
710 | } else { |
711 | if (tryingToReconstruct) { |
712 | return getLinearization()->getLength() > 0; |
713 | } else { |
714 | return false; |
715 | } |
716 | } |
717 | } |
718 | |
719 | void PDFDoc::setDocInfoStringEntry(const char *key, GooString *value) |
720 | { |
721 | bool removeEntry = !value || value->getLength() == 0 || (value->toStr() == unicodeByteOrderMark); |
722 | if (removeEntry) { |
723 | delete value; |
724 | } |
725 | |
726 | Object infoObj = getDocInfo(); |
727 | if (infoObj.isNull() && removeEntry) { |
728 | // No info dictionary, so no entry to remove. |
729 | return; |
730 | } |
731 | |
732 | Ref infoObjRef; |
733 | infoObj = xref->createDocInfoIfNeeded(ref: &infoObjRef); |
734 | if (removeEntry) { |
735 | infoObj.dictSet(key, val: Object(objNull)); |
736 | } else { |
737 | infoObj.dictSet(key, val: Object(value)); |
738 | } |
739 | |
740 | if (infoObj.dictGetLength() == 0) { |
741 | // Info dictionary is empty. Remove it altogether. |
742 | removeDocInfo(); |
743 | } else { |
744 | xref->setModifiedObject(o: &infoObj, r: infoObjRef); |
745 | } |
746 | } |
747 | |
748 | std::unique_ptr<GooString> PDFDoc::getDocInfoStringEntry(const char *key) |
749 | { |
750 | Object infoObj = getDocInfo(); |
751 | if (!infoObj.isDict()) { |
752 | return {}; |
753 | } |
754 | |
755 | const Object entryObj = infoObj.dictLookup(key); |
756 | if (!entryObj.isString()) { |
757 | return {}; |
758 | } |
759 | |
760 | return std::unique_ptr<GooString>(entryObj.getString()->copy()); |
761 | } |
762 | |
763 | static bool get_id(const GooString *encodedidstring, GooString *id) |
764 | { |
765 | const char *encodedid = encodedidstring->c_str(); |
766 | char pdfid[pdfIdLength + 1]; |
767 | int n; |
768 | |
769 | if (encodedidstring->getLength() != pdfIdLength / 2) { |
770 | return false; |
771 | } |
772 | |
773 | n = sprintf(s: pdfid, format: "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" , encodedid[0] & 0xff, encodedid[1] & 0xff, encodedid[2] & 0xff, encodedid[3] & 0xff, encodedid[4] & 0xff, encodedid[5] & 0xff, encodedid[6] & 0xff, |
774 | encodedid[7] & 0xff, encodedid[8] & 0xff, encodedid[9] & 0xff, encodedid[10] & 0xff, encodedid[11] & 0xff, encodedid[12] & 0xff, encodedid[13] & 0xff, encodedid[14] & 0xff, encodedid[15] & 0xff); |
775 | if (n != pdfIdLength) { |
776 | return false; |
777 | } |
778 | |
779 | id->Set(newStr: pdfid, pdfIdLength); |
780 | return true; |
781 | } |
782 | |
783 | bool PDFDoc::getID(GooString *permanent_id, GooString *update_id) const |
784 | { |
785 | Object obj = xref->getTrailerDict()->dictLookup(key: "ID" ); |
786 | |
787 | if (obj.isArray() && obj.arrayGetLength() == 2) { |
788 | if (permanent_id) { |
789 | Object obj2 = obj.arrayGet(i: 0); |
790 | if (obj2.isString()) { |
791 | if (!get_id(encodedidstring: obj2.getString(), id: permanent_id)) { |
792 | return false; |
793 | } |
794 | } else { |
795 | error(category: errSyntaxError, pos: -1, msg: "Invalid permanent ID" ); |
796 | return false; |
797 | } |
798 | } |
799 | |
800 | if (update_id) { |
801 | Object obj2 = obj.arrayGet(i: 1); |
802 | if (obj2.isString()) { |
803 | if (!get_id(encodedidstring: obj2.getString(), id: update_id)) { |
804 | return false; |
805 | } |
806 | } else { |
807 | error(category: errSyntaxError, pos: -1, msg: "Invalid update ID" ); |
808 | return false; |
809 | } |
810 | } |
811 | |
812 | return true; |
813 | } |
814 | |
815 | return false; |
816 | } |
817 | |
818 | Hints *PDFDoc::getHints() |
819 | { |
820 | if (!hints && isLinearized()) { |
821 | hints = new Hints(str, getLinearization(), getXRef(), secHdlr); |
822 | } |
823 | |
824 | return hints; |
825 | } |
826 | |
827 | int PDFDoc::savePageAs(const GooString &name, int pageNo) |
828 | { |
829 | FILE *f; |
830 | |
831 | if (file && file->modificationTimeChangedSinceOpen()) { |
832 | return errFileChangedSinceOpen; |
833 | } |
834 | |
835 | int rootNum = getXRef()->getNumObjects() + 1; |
836 | |
837 | // Make sure that special flags are set, because we are going to read |
838 | // all objects, including Unencrypted ones. |
839 | xref->scanSpecialFlags(); |
840 | |
841 | unsigned char *fileKey; |
842 | CryptAlgorithm encAlgorithm; |
843 | int keyLength; |
844 | xref->getEncryptionParameters(fileKeyA: &fileKey, encAlgorithmA: &encAlgorithm, keyLengthA: &keyLength); |
845 | |
846 | if (pageNo < 1 || pageNo > getNumPages() || !getCatalog()->getPage(i: pageNo)) { |
847 | error(category: errInternal, pos: -1, msg: "Illegal pageNo: {0:d}({1:d})" , pageNo, getNumPages()); |
848 | return errOpenFile; |
849 | } |
850 | const PDFRectangle *cropBox = nullptr; |
851 | if (getCatalog()->getPage(i: pageNo)->isCropped()) { |
852 | cropBox = getCatalog()->getPage(i: pageNo)->getCropBox(); |
853 | } |
854 | replacePageDict(pageNo, rotate: getCatalog()->getPage(i: pageNo)->getRotate(), mediaBox: getCatalog()->getPage(i: pageNo)->getMediaBox(), cropBox); |
855 | Ref *refPage = getCatalog()->getPageRef(i: pageNo); |
856 | Object page = getXRef()->fetch(ref: *refPage); |
857 | |
858 | if (!(f = openFile(path: name.c_str(), mode: "wb" ))) { |
859 | error(category: errIO, pos: -1, msg: "Couldn't open file '{0:t}'" , &name); |
860 | return errOpenFile; |
861 | } |
862 | // Calls fclose on f when the fileCloser is destroyed because it goes out of scope |
863 | const std::unique_ptr<FILE, FILECloser> fileCloser(f); |
864 | const std::unique_ptr<OutStream> outStr = std::make_unique<FileOutStream>(args&: f, args: 0); |
865 | |
866 | const std::unique_ptr<XRef> yRef = std::make_unique<XRef>(args: getXRef()->getTrailerDict()); |
867 | |
868 | if (secHdlr != nullptr && !secHdlr->isUnencrypted()) { |
869 | yRef->setEncryption(permFlagsA: secHdlr->getPermissionFlags(), ownerPasswordOkA: secHdlr->getOwnerPasswordOk(), fileKeyA: fileKey, keyLengthA: keyLength, encVersionA: secHdlr->getEncVersion(), encRevisionA: secHdlr->getEncRevision(), encAlgorithmA: encAlgorithm); |
870 | } |
871 | const std::unique_ptr<XRef> countRef = std::make_unique<XRef>(); |
872 | Object *trailerObj = getXRef()->getTrailerDict(); |
873 | if (trailerObj->isDict()) { |
874 | markPageObjects(pageDict: trailerObj->getDict(), xRef: yRef.get(), countRef: countRef.get(), numOffset: 0, oldRefNum: refPage->num, newRefNum: rootNum + 2); |
875 | } |
876 | yRef->add(num: 0, gen: 65535, offs: 0, used: false); |
877 | writeHeader(outStr: outStr.get(), major: getPDFMajorVersion(), minor: getPDFMinorVersion()); |
878 | |
879 | // get and mark info dict |
880 | Object infoObj = getXRef()->getDocInfo(); |
881 | if (infoObj.isDict()) { |
882 | Dict *infoDict = infoObj.getDict(); |
883 | markPageObjects(pageDict: infoDict, xRef: yRef.get(), countRef: countRef.get(), numOffset: 0, oldRefNum: refPage->num, newRefNum: rootNum + 2); |
884 | if (trailerObj->isDict()) { |
885 | Dict *trailerDict = trailerObj->getDict(); |
886 | const Object &ref = trailerDict->lookupNF(key: "Info" ); |
887 | if (ref.isRef()) { |
888 | yRef->add(ref: ref.getRef(), offs: 0, used: true); |
889 | if (getXRef()->getEntry(i: ref.getRef().num)->type == xrefEntryCompressed) { |
890 | yRef->getEntry(i: ref.getRef().num)->type = xrefEntryCompressed; |
891 | } |
892 | } |
893 | } |
894 | } |
895 | |
896 | // get and mark output intents etc. |
897 | Object catObj = getXRef()->getCatalog(); |
898 | if (!catObj.isDict()) { |
899 | error(category: errSyntaxError, pos: -1, msg: "XRef's Catalog is not a dictionary" ); |
900 | return errOpenFile; |
901 | } |
902 | Dict *catDict = catObj.getDict(); |
903 | Object pagesObj = catDict->lookup(key: "Pages" ); |
904 | if (!pagesObj.isDict()) { |
905 | error(category: errSyntaxError, pos: -1, msg: "Catalog Pages is not a dictionary" ); |
906 | return errOpenFile; |
907 | } |
908 | Object afObj = catDict->lookupNF(key: "AcroForm" ).copy(); |
909 | if (!afObj.isNull()) { |
910 | markAcroForm(afObj: &afObj, xRef: yRef.get(), countRef: countRef.get(), numOffset: 0, oldRefNum: refPage->num, newRefNum: rootNum + 2); |
911 | } |
912 | Dict *pagesDict = pagesObj.getDict(); |
913 | Object resourcesObj = pagesDict->lookup(key: "Resources" ); |
914 | if (resourcesObj.isDict()) { |
915 | markPageObjects(pageDict: resourcesObj.getDict(), xRef: yRef.get(), countRef: countRef.get(), numOffset: 0, oldRefNum: refPage->num, newRefNum: rootNum + 2); |
916 | } |
917 | if (!markPageObjects(pageDict: catDict, xRef: yRef.get(), countRef: countRef.get(), numOffset: 0, oldRefNum: refPage->num, newRefNum: rootNum + 2)) { |
918 | error(category: errSyntaxError, pos: -1, msg: "markPageObjects failed" ); |
919 | return errDamaged; |
920 | } |
921 | |
922 | if (!page.isDict()) { |
923 | error(category: errSyntaxError, pos: -1, msg: "page is not a dictionary" ); |
924 | return errOpenFile; |
925 | } |
926 | Dict *pageDict = page.getDict(); |
927 | if (resourcesObj.isNull() && !pageDict->hasKey(key: "Resources" )) { |
928 | Object *resourceDictObject = getCatalog()->getPage(i: pageNo)->getResourceDictObject(); |
929 | if (resourceDictObject->isDict()) { |
930 | resourcesObj = resourceDictObject->copy(); |
931 | markPageObjects(pageDict: resourcesObj.getDict(), xRef: yRef.get(), countRef: countRef.get(), numOffset: 0, oldRefNum: refPage->num, newRefNum: rootNum + 2); |
932 | } |
933 | } |
934 | markPageObjects(pageDict, xRef: yRef.get(), countRef: countRef.get(), numOffset: 0, oldRefNum: refPage->num, newRefNum: rootNum + 2); |
935 | Object annotsObj = pageDict->lookupNF(key: "Annots" ).copy(); |
936 | if (!annotsObj.isNull()) { |
937 | markAnnotations(annots: &annotsObj, xRef: yRef.get(), countRef: countRef.get(), numOffset: 0, oldPageNum: refPage->num, newPageNum: rootNum + 2); |
938 | } |
939 | yRef->markUnencrypted(); |
940 | writePageObjects(outStr: outStr.get(), xRef: yRef.get(), numOffset: 0); |
941 | |
942 | yRef->add(num: rootNum, gen: 0, offs: outStr->getPos(), used: true); |
943 | outStr->printf(format: "%d 0 obj\n" , rootNum); |
944 | outStr->printf(format: "<< /Type /Catalog /Pages %d 0 R" , rootNum + 1); |
945 | for (int j = 0; j < catDict->getLength(); j++) { |
946 | const char *key = catDict->getKey(i: j); |
947 | if (strcmp(s1: key, s2: "Type" ) != 0 && strcmp(s1: key, s2: "Catalog" ) != 0 && strcmp(s1: key, s2: "Pages" ) != 0) { |
948 | if (j > 0) { |
949 | outStr->printf(format: " " ); |
950 | } |
951 | Object value = catDict->getValNF(i: j).copy(); |
952 | outStr->printf(format: "/%s " , key); |
953 | writeObject(obj: &value, outStr: outStr.get(), xref: getXRef(), numOffset: 0, fileKey: nullptr, encAlgorithm: cryptRC4, keyLength: 0, objNum: 0, objGen: 0); |
954 | } |
955 | } |
956 | outStr->printf(format: ">>\nendobj\n" ); |
957 | |
958 | yRef->add(num: rootNum + 1, gen: 0, offs: outStr->getPos(), used: true); |
959 | outStr->printf(format: "%d 0 obj\n" , rootNum + 1); |
960 | outStr->printf(format: "<< /Type /Pages /Kids [ %d 0 R ] /Count 1 " , rootNum + 2); |
961 | if (resourcesObj.isDict()) { |
962 | outStr->printf(format: "/Resources " ); |
963 | writeObject(obj: &resourcesObj, outStr: outStr.get(), xref: getXRef(), numOffset: 0, fileKey: nullptr, encAlgorithm: cryptRC4, keyLength: 0, objNum: 0, objGen: 0); |
964 | } |
965 | outStr->printf(format: ">>\n" ); |
966 | outStr->printf(format: "endobj\n" ); |
967 | |
968 | yRef->add(num: rootNum + 2, gen: 0, offs: outStr->getPos(), used: true); |
969 | outStr->printf(format: "%d 0 obj\n" , rootNum + 2); |
970 | outStr->printf(format: "<< " ); |
971 | for (int n = 0; n < pageDict->getLength(); n++) { |
972 | if (n > 0) { |
973 | outStr->printf(format: " " ); |
974 | } |
975 | const char *key = pageDict->getKey(i: n); |
976 | Object value = pageDict->getValNF(i: n).copy(); |
977 | if (strcmp(s1: key, s2: "Parent" ) == 0) { |
978 | outStr->printf(format: "/Parent %d 0 R" , rootNum + 1); |
979 | } else { |
980 | outStr->printf(format: "/%s " , key); |
981 | writeObject(obj: &value, outStr: outStr.get(), xref: getXRef(), numOffset: 0, fileKey: nullptr, encAlgorithm: cryptRC4, keyLength: 0, objNum: 0, objGen: 0); |
982 | } |
983 | } |
984 | outStr->printf(format: " >>\nendobj\n" ); |
985 | |
986 | Goffset uxrefOffset = outStr->getPos(); |
987 | Ref ref; |
988 | ref.num = rootNum; |
989 | ref.gen = 0; |
990 | Object trailerDict = createTrailerDict(uxrefSize: rootNum + 3, incrUpdate: false, startxRef: 0, root: &ref, xRef: getXRef(), fileName: name.c_str(), fileSize: uxrefOffset); |
991 | writeXRefTableTrailer(trailerDict: std::move(trailerDict), uxref: yRef.get(), writeAllEntries: false /* do not write unnecessary entries */, uxrefOffset, outStr: outStr.get(), xRef: getXRef()); |
992 | |
993 | outStr->close(); |
994 | |
995 | return errNone; |
996 | } |
997 | |
998 | int PDFDoc::saveAs(const GooString &name, PDFWriteMode mode) |
999 | { |
1000 | FILE *f; |
1001 | OutStream *outStr; |
1002 | int res; |
1003 | |
1004 | if (!(f = openFile(path: name.c_str(), mode: "wb" ))) { |
1005 | error(category: errIO, pos: -1, msg: "Couldn't open file '{0:t}'" , &name); |
1006 | return errOpenFile; |
1007 | } |
1008 | outStr = new FileOutStream(f, 0); |
1009 | res = saveAs(outStr, mode); |
1010 | delete outStr; |
1011 | fclose(stream: f); |
1012 | return res; |
1013 | } |
1014 | |
1015 | int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) |
1016 | { |
1017 | if (file && file->modificationTimeChangedSinceOpen()) { |
1018 | return errFileChangedSinceOpen; |
1019 | } |
1020 | |
1021 | if (!xref->isModified() && mode == writeStandard) { |
1022 | // simply copy the original file |
1023 | saveWithoutChangesAs(outStr); |
1024 | } else if (mode == writeForceRewrite) { |
1025 | saveCompleteRewrite(outStr); |
1026 | } else { |
1027 | saveIncrementalUpdate(outStr); |
1028 | } |
1029 | |
1030 | return errNone; |
1031 | } |
1032 | |
1033 | int PDFDoc::saveWithoutChangesAs(const GooString &name) |
1034 | { |
1035 | FILE *f; |
1036 | OutStream *outStr; |
1037 | int res; |
1038 | |
1039 | if (!(f = openFile(path: name.c_str(), mode: "wb" ))) { |
1040 | error(category: errIO, pos: -1, msg: "Couldn't open file '{0:t}'" , &name); |
1041 | return errOpenFile; |
1042 | } |
1043 | |
1044 | outStr = new FileOutStream(f, 0); |
1045 | res = saveWithoutChangesAs(outStr); |
1046 | delete outStr; |
1047 | |
1048 | fclose(stream: f); |
1049 | |
1050 | return res; |
1051 | } |
1052 | |
1053 | int PDFDoc::saveWithoutChangesAs(OutStream *outStr) |
1054 | { |
1055 | int c; |
1056 | |
1057 | if (file && file->modificationTimeChangedSinceOpen()) { |
1058 | return errFileChangedSinceOpen; |
1059 | } |
1060 | |
1061 | BaseStream *copyStr = str->copy(); |
1062 | copyStr->reset(); |
1063 | while ((c = copyStr->getChar()) != EOF) { |
1064 | outStr->put(c); |
1065 | } |
1066 | copyStr->close(); |
1067 | delete copyStr; |
1068 | |
1069 | return errNone; |
1070 | } |
1071 | |
1072 | void PDFDoc::saveIncrementalUpdate(OutStream *outStr) |
1073 | { |
1074 | XRef *uxref; |
1075 | int c; |
1076 | // copy the original file |
1077 | BaseStream *copyStr = str->copy(); |
1078 | copyStr->reset(); |
1079 | while ((c = copyStr->getChar()) != EOF) { |
1080 | outStr->put(c); |
1081 | } |
1082 | copyStr->close(); |
1083 | delete copyStr; |
1084 | |
1085 | unsigned char *fileKey; |
1086 | CryptAlgorithm encAlgorithm; |
1087 | int keyLength; |
1088 | xref->getEncryptionParameters(fileKeyA: &fileKey, encAlgorithmA: &encAlgorithm, keyLengthA: &keyLength); |
1089 | |
1090 | uxref = new XRef(); |
1091 | uxref->add(num: 0, gen: 65535, offs: 0, used: false); |
1092 | xref->lock(); |
1093 | for (int i = 0; i < xref->getNumObjects(); i++) { |
1094 | if ((xref->getEntry(i)->type == xrefEntryFree) && (xref->getEntry(i)->gen == 0)) { // we skip the irrelevant free objects |
1095 | continue; |
1096 | } |
1097 | |
1098 | if (xref->getEntry(i)->getFlag(flag: XRefEntry::Updated)) { // we have an updated object |
1099 | Ref ref; |
1100 | ref.num = i; |
1101 | ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen; |
1102 | if (xref->getEntry(i)->type != xrefEntryFree) { |
1103 | Object obj1 = xref->fetch(ref, recursion: 1 /* recursion */); |
1104 | Goffset offset = writeObjectHeader(ref: &ref, outStr); |
1105 | writeObject(obj: &obj1, outStr, fileKey, encAlgorithm, keyLength, ref); |
1106 | writeObjectFooter(outStr); |
1107 | uxref->add(ref, offs: offset, used: true); |
1108 | } else { |
1109 | uxref->add(ref, offs: 0, used: false); |
1110 | } |
1111 | } |
1112 | } |
1113 | xref->unlock(); |
1114 | // because of "uxref->add(0, 65535, 0, false);" uxref->getNumObjects() will |
1115 | // always be >= 1; if it is 1, it means there is nothing to update |
1116 | if (uxref->getNumObjects() == 1) { |
1117 | delete uxref; |
1118 | return; |
1119 | } |
1120 | |
1121 | Goffset uxrefOffset = outStr->getPos(); |
1122 | int numobjects = xref->getNumObjects(); |
1123 | const char *fileNameA = fileName ? fileName->c_str() : nullptr; |
1124 | Ref rootRef, uxrefStreamRef; |
1125 | rootRef.num = getXRef()->getRootNum(); |
1126 | rootRef.gen = getXRef()->getRootGen(); |
1127 | |
1128 | // Output a xref stream if there is a xref stream already |
1129 | bool xRefStream = xref->isXRefStream(); |
1130 | |
1131 | if (xRefStream) { |
1132 | // Append an entry for the xref stream itself |
1133 | uxrefStreamRef.num = numobjects++; |
1134 | uxrefStreamRef.gen = 0; |
1135 | uxref->add(ref: uxrefStreamRef, offs: uxrefOffset, used: true); |
1136 | } |
1137 | |
1138 | Object trailerDict = createTrailerDict(uxrefSize: numobjects, incrUpdate: true, startxRef: getStartXRef(), root: &rootRef, xRef: getXRef(), fileName: fileNameA, fileSize: uxrefOffset); |
1139 | if (xRefStream) { |
1140 | writeXRefStreamTrailer(trailerDict: std::move(trailerDict), uxref, uxrefStreamRef: &uxrefStreamRef, uxrefOffset, outStr, xRef: getXRef()); |
1141 | } else { |
1142 | writeXRefTableTrailer(trailerDict: std::move(trailerDict), uxref, writeAllEntries: false, uxrefOffset, outStr, xRef: getXRef()); |
1143 | } |
1144 | |
1145 | delete uxref; |
1146 | } |
1147 | |
1148 | void PDFDoc::saveCompleteRewrite(OutStream *outStr) |
1149 | { |
1150 | // Make sure that special flags are set, because we are going to read |
1151 | // all objects, including Unencrypted ones. |
1152 | xref->scanSpecialFlags(); |
1153 | |
1154 | unsigned char *fileKey; |
1155 | CryptAlgorithm encAlgorithm; |
1156 | int keyLength; |
1157 | xref->getEncryptionParameters(fileKeyA: &fileKey, encAlgorithmA: &encAlgorithm, keyLengthA: &keyLength); |
1158 | |
1159 | writeHeader(outStr, major: getPDFMajorVersion(), minor: getPDFMinorVersion()); |
1160 | XRef *uxref = new XRef(); |
1161 | uxref->add(num: 0, gen: 65535, offs: 0, used: false); |
1162 | xref->lock(); |
1163 | for (int i = 0; i < xref->getNumObjects(); i++) { |
1164 | Ref ref; |
1165 | XRefEntryType type = xref->getEntry(i)->type; |
1166 | if (type == xrefEntryFree) { |
1167 | ref.num = i; |
1168 | ref.gen = xref->getEntry(i)->gen; |
1169 | /* the XRef class adds a lot of irrelevant free entries, we only want the significant one |
1170 | and we don't want the one with num=0 because it has already been added (gen = 65535)*/ |
1171 | if (ref.gen > 0 && ref.num > 0) { |
1172 | uxref->add(ref, offs: 0, used: false); |
1173 | } |
1174 | } else if (xref->getEntry(i)->getFlag(flag: XRefEntry::DontRewrite)) { |
1175 | // This entry must not be written, put a free entry instead (with incremented gen) |
1176 | ref.num = i; |
1177 | ref.gen = xref->getEntry(i)->gen + 1; |
1178 | uxref->add(ref, offs: 0, used: false); |
1179 | } else if (type == xrefEntryUncompressed) { |
1180 | ref.num = i; |
1181 | ref.gen = xref->getEntry(i)->gen; |
1182 | Object obj1 = xref->fetch(ref, recursion: 1 /* recursion */); |
1183 | Goffset offset = writeObjectHeader(ref: &ref, outStr); |
1184 | // Write unencrypted objects in unencrypted form |
1185 | if (xref->getEntry(i)->getFlag(flag: XRefEntry::Unencrypted)) { |
1186 | writeObject(obj: &obj1, outStr, fileKey: nullptr, encAlgorithm: cryptRC4, keyLength: 0, objNum: 0, objGen: 0); |
1187 | } else { |
1188 | writeObject(obj: &obj1, outStr, fileKey, encAlgorithm, keyLength, ref); |
1189 | } |
1190 | writeObjectFooter(outStr); |
1191 | uxref->add(ref, offs: offset, used: true); |
1192 | } else if (type == xrefEntryCompressed) { |
1193 | ref.num = i; |
1194 | ref.gen = 0; // compressed entries have gen == 0 |
1195 | Object obj1 = xref->fetch(ref, recursion: 1 /* recursion */); |
1196 | Goffset offset = writeObjectHeader(ref: &ref, outStr); |
1197 | writeObject(obj: &obj1, outStr, fileKey, encAlgorithm, keyLength, ref); |
1198 | writeObjectFooter(outStr); |
1199 | uxref->add(ref, offs: offset, used: true); |
1200 | } |
1201 | } |
1202 | xref->unlock(); |
1203 | Goffset uxrefOffset = outStr->getPos(); |
1204 | writeXRefTableTrailer(uxrefOffset, uxref, writeAllEntries: true /* write all entries */, uxrefSize: uxref->getNumObjects(), outStr, incrUpdate: false /* complete rewrite */); |
1205 | delete uxref; |
1206 | } |
1207 | |
1208 | std::string PDFDoc::sanitizedName(const std::string &name) |
1209 | { |
1210 | std::string sanitizedName; |
1211 | |
1212 | for (const auto c : name) { |
1213 | if (c <= (char)0x20 || c >= (char)0x7f || c == ' ' || c == '(' || c == ')' || c == '<' || c == '>' || c == '[' || c == ']' || c == '{' || c == '}' || c == '/' || c == '%' || c == '#') { |
1214 | char buf[8]; |
1215 | sprintf(s: buf, format: "#%02x" , c & 0xff); |
1216 | sanitizedName.append(s: buf); |
1217 | } else { |
1218 | sanitizedName.push_back(c: c); |
1219 | } |
1220 | } |
1221 | |
1222 | return sanitizedName; |
1223 | } |
1224 | |
1225 | void PDFDoc::writeDictionary(Dict *dict, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts) |
1226 | { |
1227 | bool deleteSet = false; |
1228 | if (!alreadyWrittenDicts) { |
1229 | alreadyWrittenDicts = new std::set<Dict *>; |
1230 | deleteSet = true; |
1231 | } |
1232 | |
1233 | if (alreadyWrittenDicts->find(x: dict) != alreadyWrittenDicts->end()) { |
1234 | error(category: errSyntaxWarning, pos: -1, msg: "PDFDoc::writeDictionary: Found recursive dicts" ); |
1235 | if (deleteSet) { |
1236 | delete alreadyWrittenDicts; |
1237 | } |
1238 | return; |
1239 | } else { |
1240 | alreadyWrittenDicts->insert(x: dict); |
1241 | } |
1242 | |
1243 | outStr->printf(format: "<<" ); |
1244 | for (int i = 0; i < dict->getLength(); i++) { |
1245 | GooString keyName(dict->getKey(i)); |
1246 | outStr->printf(format: "/%s " , sanitizedName(name: keyName.toStr()).c_str()); |
1247 | Object obj1 = dict->getValNF(i).copy(); |
1248 | writeObject(obj: &obj1, outStr, xref: xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts); |
1249 | } |
1250 | outStr->printf(format: ">> " ); |
1251 | |
1252 | if (deleteSet) { |
1253 | delete alreadyWrittenDicts; |
1254 | } |
1255 | } |
1256 | |
1257 | void PDFDoc::writeStream(Stream *str, OutStream *outStr) |
1258 | { |
1259 | outStr->printf(format: "stream\r\n" ); |
1260 | str->reset(); |
1261 | for (int c = str->getChar(); c != EOF; c = str->getChar()) { |
1262 | outStr->printf(format: "%c" , c); |
1263 | } |
1264 | outStr->printf(format: "\r\nendstream\r\n" ); |
1265 | } |
1266 | |
1267 | void PDFDoc::writeRawStream(Stream *str, OutStream *outStr) |
1268 | { |
1269 | Object obj1 = str->getDict()->lookup(key: "Length" ); |
1270 | if (!obj1.isInt() && !obj1.isInt64()) { |
1271 | error(category: errSyntaxError, pos: -1, msg: "PDFDoc::writeRawStream, no Length in stream dict" ); |
1272 | return; |
1273 | } |
1274 | |
1275 | Goffset length; |
1276 | if (obj1.isInt()) { |
1277 | length = obj1.getInt(); |
1278 | } else { |
1279 | length = obj1.getInt64(); |
1280 | } |
1281 | |
1282 | outStr->printf(format: "stream\r\n" ); |
1283 | str->unfilteredReset(); |
1284 | for (Goffset i = 0; i < length; i++) { |
1285 | int c = str->getUnfilteredChar(); |
1286 | if (unlikely(c == EOF)) { |
1287 | error(category: errSyntaxError, pos: -1, msg: "PDFDoc::writeRawStream: EOF reading stream" ); |
1288 | break; |
1289 | } |
1290 | outStr->printf(format: "%c" , c); |
1291 | } |
1292 | str->reset(); |
1293 | outStr->printf(format: "\r\nendstream\r\n" ); |
1294 | } |
1295 | |
1296 | void PDFDoc::writeString(const GooString *s, OutStream *outStr, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref) |
1297 | { |
1298 | // Encrypt string if encryption is enabled |
1299 | GooString *sEnc = nullptr; |
1300 | if (fileKey) { |
1301 | EncryptStream *enc = new EncryptStream(new MemStream(s->c_str(), 0, s->getLength(), Object(objNull)), fileKey, encAlgorithm, keyLength, ref); |
1302 | sEnc = new GooString(); |
1303 | int c; |
1304 | enc->reset(); |
1305 | while ((c = enc->getChar()) != EOF) { |
1306 | sEnc->append(c: (char)c); |
1307 | } |
1308 | |
1309 | delete enc; |
1310 | s = sEnc; |
1311 | } |
1312 | |
1313 | // Write data |
1314 | if (hasUnicodeByteOrderMark(s: s->toStr())) { |
1315 | // unicode string don't necessary end with \0 |
1316 | const char *c = s->c_str(); |
1317 | std::stringstream stream; |
1318 | stream << std::setfill('0') << std::hex; |
1319 | for (int i = 0; i < s->getLength(); i++) { |
1320 | stream << std::setw(2) << (0xff & (unsigned int)*(c + i)); |
1321 | } |
1322 | outStr->printf(format: "<" ); |
1323 | outStr->printf(format: "%s" , stream.str().c_str()); |
1324 | outStr->printf(format: "> " ); |
1325 | } else { |
1326 | const char *c = s->c_str(); |
1327 | outStr->printf(format: "(" ); |
1328 | for (int i = 0; i < s->getLength(); i++) { |
1329 | char unescaped = *(c + i) & 0x000000ff; |
1330 | // escape if needed |
1331 | if (unescaped == '\r') { |
1332 | outStr->printf(format: "\\r" ); |
1333 | } else if (unescaped == '\n') { |
1334 | outStr->printf(format: "\\n" ); |
1335 | } else { |
1336 | if (unescaped == '(' || unescaped == ')' || unescaped == '\\') { |
1337 | outStr->printf(format: "%c" , '\\'); |
1338 | } |
1339 | outStr->printf(format: "%c" , unescaped); |
1340 | } |
1341 | } |
1342 | outStr->printf(format: ") " ); |
1343 | } |
1344 | |
1345 | delete sEnc; |
1346 | } |
1347 | |
1348 | Goffset PDFDoc::(Ref *ref, OutStream *outStr) |
1349 | { |
1350 | Goffset offset = outStr->getPos(); |
1351 | outStr->printf(format: "%i %i obj\r\n" , ref->num, ref->gen); |
1352 | return offset; |
1353 | } |
1354 | |
1355 | void PDFDoc::writeObject(Object *obj, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict *> *alreadyWrittenDicts) |
1356 | { |
1357 | writeObject(obj, outStr, xref: xRef, numOffset, fileKey, encAlgorithm, keyLength, ref: { .num: objNum, .gen: objGen }, alreadyWrittenDicts); |
1358 | } |
1359 | |
1360 | void PDFDoc::writeObject(Object *obj, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts) |
1361 | { |
1362 | Array *array; |
1363 | |
1364 | switch (obj->getType()) { |
1365 | case objBool: |
1366 | outStr->printf(format: "%s " , obj->getBool() ? "true" : "false" ); |
1367 | break; |
1368 | case objInt: |
1369 | outStr->printf(format: "%i " , obj->getInt()); |
1370 | break; |
1371 | case objInt64: |
1372 | outStr->printf(format: "%lli " , obj->getInt64()); |
1373 | break; |
1374 | case objReal: { |
1375 | GooString s; |
1376 | s.appendf(fmt: "{0:.10g}" , obj->getReal()); |
1377 | outStr->printf(format: "%s " , s.c_str()); |
1378 | break; |
1379 | } |
1380 | case objString: |
1381 | writeString(s: obj->getString(), outStr, fileKey, encAlgorithm, keyLength, ref); |
1382 | break; |
1383 | case objHexString: { |
1384 | const GooString *s = obj->getHexString(); |
1385 | outStr->printf(format: "<" ); |
1386 | for (int i = 0; i < s->getLength(); i++) { |
1387 | outStr->printf(format: "%02x" , s->getChar(i) & 0xff); |
1388 | } |
1389 | outStr->printf(format: "> " ); |
1390 | break; |
1391 | } |
1392 | case objName: { |
1393 | GooString name(obj->getName()); |
1394 | outStr->printf(format: "/%s " , sanitizedName(name: name.toStr()).c_str()); |
1395 | break; |
1396 | } |
1397 | case objNull: |
1398 | outStr->printf(format: "null " ); |
1399 | break; |
1400 | case objArray: |
1401 | array = obj->getArray(); |
1402 | outStr->printf(format: "[" ); |
1403 | for (int i = 0; i < array->getLength(); i++) { |
1404 | Object obj1 = array->getNF(i).copy(); |
1405 | writeObject(obj: &obj1, outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref); |
1406 | } |
1407 | outStr->printf(format: "] " ); |
1408 | break; |
1409 | case objDict: |
1410 | writeDictionary(dict: obj->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts); |
1411 | break; |
1412 | case objStream: { |
1413 | // We can't modify stream with the current implementation (no write functions in Stream API) |
1414 | // => the only type of streams which that have been modified are internal streams (=strWeird) |
1415 | Stream *stream = obj->getStream(); |
1416 | if (stream->getKind() == strWeird || stream->getKind() == strCrypt) { |
1417 | // we write the stream unencoded => TODO: write stream encoder |
1418 | |
1419 | // Encrypt stream |
1420 | bool removeFilter = true; |
1421 | bool addEncryptstream = false; |
1422 | if (stream->getKind() == strWeird && fileKey) { |
1423 | Object filter = stream->getDict()->lookup(key: "Filter" ); |
1424 | if (!filter.isName(nameA: "Crypt" )) { |
1425 | if (filter.isArray()) { |
1426 | for (int i = 0; i < filter.arrayGetLength(); i++) { |
1427 | Object filterEle = filter.arrayGet(i); |
1428 | if (filterEle.isName(nameA: "Crypt" )) { |
1429 | removeFilter = false; |
1430 | break; |
1431 | } |
1432 | } |
1433 | if (removeFilter) { |
1434 | addEncryptstream = true; |
1435 | } |
1436 | } else { |
1437 | addEncryptstream = true; |
1438 | } |
1439 | } else { |
1440 | removeFilter = false; |
1441 | } |
1442 | } else if (fileKey != nullptr) { // Encrypt stream |
1443 | addEncryptstream = true; |
1444 | } |
1445 | |
1446 | std::unique_ptr<EncryptStream> encStream; |
1447 | std::unique_ptr<Stream> compressStream; |
1448 | Object filter = stream->getDict()->lookup(key: "Filter" ); |
1449 | if (filter.isName(nameA: "FlateDecode" )) { |
1450 | compressStream = std::make_unique<FlateEncoder>(args&: stream); |
1451 | stream = compressStream.get(); |
1452 | removeFilter = false; |
1453 | } |
1454 | if (addEncryptstream) { |
1455 | encStream = std::make_unique<EncryptStream>(args&: stream, args&: fileKey, args&: encAlgorithm, args&: keyLength, args&: ref); |
1456 | encStream->setAutoDelete(false); |
1457 | stream = encStream.get(); |
1458 | } |
1459 | |
1460 | stream->reset(); |
1461 | // recalculate stream length |
1462 | Goffset tmp = 0; |
1463 | for (int c = stream->getChar(); c != EOF; c = stream->getChar()) { |
1464 | tmp++; |
1465 | } |
1466 | stream->getDict()->set(key: "Length" , val: Object(tmp)); |
1467 | |
1468 | // Remove Stream encoding |
1469 | AutoFreeMemStream *internalStream = dynamic_cast<AutoFreeMemStream *>(stream); |
1470 | if (internalStream && internalStream->isFilterRemovalForbidden()) { |
1471 | removeFilter = false; |
1472 | } |
1473 | if (removeFilter) { |
1474 | stream->getDict()->remove(key: "Filter" ); |
1475 | } |
1476 | stream->getDict()->remove(key: "DecodeParms" ); |
1477 | |
1478 | writeDictionary(dict: stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts); |
1479 | writeStream(str: stream, outStr); |
1480 | } else if (fileKey != nullptr && stream->getKind() == strFile && static_cast<FileStream *>(stream)->getNeedsEncryptionOnSave()) { |
1481 | EncryptStream *encStream = new EncryptStream(stream, fileKey, encAlgorithm, keyLength, ref); |
1482 | encStream->setAutoDelete(false); |
1483 | writeDictionary(dict: encStream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts); |
1484 | writeStream(str: encStream, outStr); |
1485 | delete encStream; |
1486 | } else { |
1487 | // raw stream copy |
1488 | FilterStream *fs = dynamic_cast<FilterStream *>(stream); |
1489 | if (fs) { |
1490 | BaseStream *bs = fs->getBaseStream(); |
1491 | if (bs) { |
1492 | Goffset streamEnd; |
1493 | if (xRef->getStreamEnd(streamStart: bs->getStart(), streamEnd: &streamEnd)) { |
1494 | Goffset val = streamEnd - bs->getStart(); |
1495 | stream->getDict()->set(key: "Length" , val: Object(val)); |
1496 | } |
1497 | } |
1498 | } |
1499 | writeDictionary(dict: stream->getDict(), outStr, xRef, numOffset, fileKey, encAlgorithm, keyLength, ref, alreadyWrittenDicts); |
1500 | writeRawStream(str: stream, outStr); |
1501 | } |
1502 | break; |
1503 | } |
1504 | case objRef: |
1505 | outStr->printf(format: "%i %i R " , obj->getRef().num + numOffset, obj->getRef().gen); |
1506 | break; |
1507 | case objCmd: |
1508 | outStr->printf(format: "%s\n" , obj->getCmd()); |
1509 | break; |
1510 | case objError: |
1511 | outStr->printf(format: "error\r\n" ); |
1512 | break; |
1513 | case objEOF: |
1514 | outStr->printf(format: "eof\r\n" ); |
1515 | break; |
1516 | case objNone: |
1517 | outStr->printf(format: "none\r\n" ); |
1518 | break; |
1519 | default: |
1520 | error(category: errUnimplemented, pos: -1, msg: "Unhandled objType : {0:d}, please report a bug with a testcase\r\n" , obj->getType()); |
1521 | break; |
1522 | } |
1523 | } |
1524 | |
1525 | void PDFDoc::(OutStream *outStr) |
1526 | { |
1527 | outStr->printf(format: "\r\nendobj\r\n" ); |
1528 | } |
1529 | |
1530 | Object PDFDoc::createTrailerDict(int uxrefSize, bool incrUpdate, Goffset startxRef, Ref *root, XRef *xRef, const char *fileName, Goffset fileSize) |
1531 | { |
1532 | Dict *trailerDict = new Dict(xRef); |
1533 | trailerDict->set(key: "Size" , val: Object(uxrefSize)); |
1534 | |
1535 | // build a new ID, as recommended in the reference, uses: |
1536 | // - current time |
1537 | // - file name |
1538 | // - file size |
1539 | // - values of entry in information dictionary |
1540 | GooString message; |
1541 | char buffer[256]; |
1542 | sprintf(s: buffer, format: "%i" , (int)time(timer: nullptr)); |
1543 | message.append(str: buffer); |
1544 | |
1545 | if (fileName) { |
1546 | message.append(str: fileName); |
1547 | } |
1548 | |
1549 | sprintf(s: buffer, format: "%lli" , (long long)fileSize); |
1550 | message.append(str: buffer); |
1551 | |
1552 | // info dict -- only use text string |
1553 | if (!xRef->getTrailerDict()->isNone()) { |
1554 | Object docInfo = xRef->getDocInfo(); |
1555 | if (docInfo.isDict()) { |
1556 | for (int i = 0; i < docInfo.getDict()->getLength(); i++) { |
1557 | Object obj2 = docInfo.getDict()->getVal(i); |
1558 | if (obj2.isString()) { |
1559 | message.append(str: obj2.getString()); |
1560 | } |
1561 | } |
1562 | } |
1563 | } |
1564 | |
1565 | bool hasEncrypt = false; |
1566 | if (!xRef->getTrailerDict()->isNone()) { |
1567 | Object obj2 = xRef->getTrailerDict()->dictLookupNF(key: "Encrypt" ).copy(); |
1568 | if (!obj2.isNull()) { |
1569 | trailerDict->set(key: "Encrypt" , val: std::move(obj2)); |
1570 | hasEncrypt = true; |
1571 | } |
1572 | } |
1573 | |
1574 | // calculate md5 digest |
1575 | unsigned char digest[16]; |
1576 | md5(msg: (unsigned char *)message.c_str(), msgLen: message.getLength(), digest); |
1577 | |
1578 | // create ID array |
1579 | // In case of encrypted files, the ID must not be changed because it's used to calculate the key |
1580 | if (incrUpdate || hasEncrypt) { |
1581 | // only update the second part of the array |
1582 | Object obj4 = xRef->getTrailerDict()->getDict()->lookup(key: "ID" ); |
1583 | if (!obj4.isArray()) { |
1584 | if (hasEncrypt) { |
1585 | error(category: errSyntaxWarning, pos: -1, msg: "PDFDoc::createTrailerDict original file's ID entry isn't an array. Trying to continue" ); |
1586 | } |
1587 | } else { |
1588 | Array *array = new Array(xRef); |
1589 | // Get the first part of the ID |
1590 | array->add(elem: obj4.arrayGet(i: 0)); |
1591 | array->add(elem: Object(new GooString((const char *)digest, 16))); |
1592 | trailerDict->set(key: "ID" , val: Object(array)); |
1593 | } |
1594 | } else { |
1595 | // new file => same values for the two identifiers |
1596 | Array *array = new Array(xRef); |
1597 | array->add(elem: Object(new GooString((const char *)digest, 16))); |
1598 | array->add(elem: Object(new GooString((const char *)digest, 16))); |
1599 | trailerDict->set(key: "ID" , val: Object(array)); |
1600 | } |
1601 | |
1602 | trailerDict->set(key: "Root" , val: Object(*root)); |
1603 | |
1604 | if (incrUpdate) { |
1605 | trailerDict->set(key: "Prev" , val: Object(startxRef)); |
1606 | } |
1607 | |
1608 | if (!xRef->getTrailerDict()->isNone()) { |
1609 | Object obj5 = xRef->getDocInfoNF(); |
1610 | if (!obj5.isNull()) { |
1611 | trailerDict->set(key: "Info" , val: std::move(obj5)); |
1612 | } |
1613 | } |
1614 | |
1615 | return Object(trailerDict); |
1616 | } |
1617 | |
1618 | void PDFDoc::writeXRefTableTrailer(Object &&trailerDict, XRef *uxref, bool writeAllEntries, Goffset uxrefOffset, OutStream *outStr, XRef *xRef) |
1619 | { |
1620 | uxref->writeTableToFile(outStr, writeAllEntries); |
1621 | outStr->printf(format: "trailer\r\n" ); |
1622 | writeDictionary(dict: trailerDict.getDict(), outStr, xRef, numOffset: 0, fileKey: nullptr, encAlgorithm: cryptRC4, keyLength: 0, ref: { .num: 0, .gen: 0 }, alreadyWrittenDicts: nullptr); |
1623 | outStr->printf(format: "\r\nstartxref\r\n" ); |
1624 | outStr->printf(format: "%lli\r\n" , uxrefOffset); |
1625 | outStr->printf(format: "%%%%EOF\r\n" ); |
1626 | } |
1627 | |
1628 | void PDFDoc::writeXRefStreamTrailer(Object &&trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream *outStr, XRef *xRef) |
1629 | { |
1630 | GooString stmData; |
1631 | |
1632 | // Fill stmData and some trailerDict fields |
1633 | uxref->writeStreamToBuffer(stmBuf: &stmData, xrefDict: trailerDict.getDict(), xref: xRef); |
1634 | |
1635 | // Create XRef stream object and write it |
1636 | MemStream *mStream = new MemStream(stmData.c_str(), 0, stmData.getLength(), std::move(trailerDict)); |
1637 | writeObjectHeader(ref: uxrefStreamRef, outStr); |
1638 | Object obj1(static_cast<Stream *>(mStream)); |
1639 | writeObject(obj: &obj1, outStr, xRef, numOffset: 0, fileKey: nullptr, encAlgorithm: cryptRC4, keyLength: 0, objNum: 0, objGen: 0); |
1640 | writeObjectFooter(outStr); |
1641 | |
1642 | outStr->printf(format: "startxref\r\n" ); |
1643 | outStr->printf(format: "%lli\r\n" , uxrefOffset); |
1644 | outStr->printf(format: "%%%%EOF\r\n" ); |
1645 | } |
1646 | |
1647 | void PDFDoc::writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, bool writeAllEntries, int uxrefSize, OutStream *outStr, bool incrUpdate) |
1648 | { |
1649 | const char *fileNameA = fileName ? fileName->c_str() : nullptr; |
1650 | // file size (doesn't include the trailer) |
1651 | unsigned int fileSize = 0; |
1652 | int c; |
1653 | str->reset(); |
1654 | while ((c = str->getChar()) != EOF) { |
1655 | fileSize++; |
1656 | } |
1657 | str->close(); |
1658 | Ref ref; |
1659 | ref.num = getXRef()->getRootNum(); |
1660 | ref.gen = getXRef()->getRootGen(); |
1661 | Object trailerDict = createTrailerDict(uxrefSize, incrUpdate, startxRef: getStartXRef(), root: &ref, xRef: getXRef(), fileName: fileNameA, fileSize); |
1662 | writeXRefTableTrailer(trailerDict: std::move(trailerDict), uxref, writeAllEntries, uxrefOffset, outStr, xRef: getXRef()); |
1663 | } |
1664 | |
1665 | void PDFDoc::(OutStream *outStr, int major, int minor) |
1666 | { |
1667 | outStr->printf(format: "%%PDF-%d.%d\n" , major, minor); |
1668 | outStr->printf(format: "%%%c%c%c%c\n" , 0xE2, 0xE3, 0xCF, 0xD3); |
1669 | } |
1670 | |
1671 | bool PDFDoc::markDictionary(Dict *dict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts) |
1672 | { |
1673 | bool deleteSet = false; |
1674 | if (!alreadyMarkedDicts) { |
1675 | alreadyMarkedDicts = new std::set<Dict *>; |
1676 | deleteSet = true; |
1677 | } |
1678 | |
1679 | if (alreadyMarkedDicts->find(x: dict) != alreadyMarkedDicts->end()) { |
1680 | error(category: errSyntaxWarning, pos: -1, msg: "PDFDoc::markDictionary: Found recursive dicts" ); |
1681 | if (deleteSet) { |
1682 | delete alreadyMarkedDicts; |
1683 | } |
1684 | return true; |
1685 | } else { |
1686 | alreadyMarkedDicts->insert(x: dict); |
1687 | } |
1688 | |
1689 | for (int i = 0; i < dict->getLength(); i++) { |
1690 | const char *key = dict->getKey(i); |
1691 | if (strcmp(s1: key, s2: "Annots" ) != 0) { |
1692 | Object obj1 = dict->getValNF(i).copy(); |
1693 | const bool success = markObject(obj: &obj1, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts); |
1694 | if (unlikely(!success)) { |
1695 | return false; |
1696 | } |
1697 | } else { |
1698 | Object annotsObj = dict->getValNF(i).copy(); |
1699 | if (!annotsObj.isNull()) { |
1700 | markAnnotations(annots: &annotsObj, xRef, countRef, numOffset: 0, oldPageNum: oldRefNum, newPageNum: newRefNum, alreadyMarkedDicts); |
1701 | } |
1702 | } |
1703 | } |
1704 | |
1705 | if (deleteSet) { |
1706 | delete alreadyMarkedDicts; |
1707 | } |
1708 | |
1709 | return true; |
1710 | } |
1711 | |
1712 | bool PDFDoc::markObject(Object *obj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts) |
1713 | { |
1714 | Array *array; |
1715 | |
1716 | switch (obj->getType()) { |
1717 | case objArray: |
1718 | array = obj->getArray(); |
1719 | for (int i = 0; i < array->getLength(); i++) { |
1720 | Object obj1 = array->getNF(i).copy(); |
1721 | const bool success = markObject(obj: &obj1, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts); |
1722 | if (unlikely(!success)) { |
1723 | return false; |
1724 | } |
1725 | } |
1726 | break; |
1727 | case objDict: { |
1728 | const bool success = markDictionary(dict: obj->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts); |
1729 | if (unlikely(!success)) { |
1730 | return false; |
1731 | } |
1732 | } break; |
1733 | case objStream: { |
1734 | Stream *stream = obj->getStream(); |
1735 | const bool success = markDictionary(dict: stream->getDict(), xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts); |
1736 | if (unlikely(!success)) { |
1737 | return false; |
1738 | } |
1739 | } break; |
1740 | case objRef: { |
1741 | if (obj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(i: obj->getRef().num + numOffset)->type == xrefEntryFree) { |
1742 | if (getXRef()->getEntry(i: obj->getRef().num)->type == xrefEntryFree) { |
1743 | return true; // already marked as free => should be replaced |
1744 | } |
1745 | const bool success = xRef->add(num: obj->getRef().num + numOffset, gen: obj->getRef().gen, offs: 0, used: true); |
1746 | if (unlikely(!success)) { |
1747 | return false; |
1748 | } |
1749 | if (getXRef()->getEntry(i: obj->getRef().num)->type == xrefEntryCompressed) { |
1750 | xRef->getEntry(i: obj->getRef().num + numOffset)->type = xrefEntryCompressed; |
1751 | } |
1752 | } |
1753 | if (obj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(i: obj->getRef().num + numOffset)->type == xrefEntryFree) { |
1754 | countRef->add(num: obj->getRef().num + numOffset, gen: 1, offs: 0, used: true); |
1755 | } else { |
1756 | XRefEntry *entry = countRef->getEntry(i: obj->getRef().num + numOffset); |
1757 | entry->gen++; |
1758 | if (entry->gen > 9) { |
1759 | break; |
1760 | } |
1761 | } |
1762 | Object obj1 = getXRef()->fetch(ref: obj->getRef()); |
1763 | const bool success = markObject(obj: &obj1, xRef, countRef, numOffset, oldRefNum, newRefNum); |
1764 | if (unlikely(!success)) { |
1765 | return false; |
1766 | } |
1767 | } break; |
1768 | default: |
1769 | break; |
1770 | } |
1771 | |
1772 | return true; |
1773 | } |
1774 | |
1775 | bool PDFDoc::replacePageDict(int pageNo, int rotate, const PDFRectangle *mediaBox, const PDFRectangle *cropBox) |
1776 | { |
1777 | Ref *refPage = getCatalog()->getPageRef(i: pageNo); |
1778 | Object page = getXRef()->fetch(ref: *refPage); |
1779 | if (!page.isDict()) { |
1780 | return false; |
1781 | } |
1782 | Dict *pageDict = page.getDict(); |
1783 | pageDict->remove(key: "MediaBoxssdf" ); |
1784 | pageDict->remove(key: "MediaBox" ); |
1785 | pageDict->remove(key: "CropBox" ); |
1786 | pageDict->remove(key: "ArtBox" ); |
1787 | pageDict->remove(key: "BleedBox" ); |
1788 | pageDict->remove(key: "TrimBox" ); |
1789 | pageDict->remove(key: "Rotate" ); |
1790 | Array *mediaBoxArray = new Array(getXRef()); |
1791 | mediaBoxArray->add(elem: Object(mediaBox->x1)); |
1792 | mediaBoxArray->add(elem: Object(mediaBox->y1)); |
1793 | mediaBoxArray->add(elem: Object(mediaBox->x2)); |
1794 | mediaBoxArray->add(elem: Object(mediaBox->y2)); |
1795 | Object mediaBoxObject(mediaBoxArray); |
1796 | Object trimBoxObject = mediaBoxObject.copy(); |
1797 | pageDict->add(key: "MediaBox" , val: std::move(mediaBoxObject)); |
1798 | if (cropBox != nullptr) { |
1799 | Array *cropBoxArray = new Array(getXRef()); |
1800 | cropBoxArray->add(elem: Object(cropBox->x1)); |
1801 | cropBoxArray->add(elem: Object(cropBox->y1)); |
1802 | cropBoxArray->add(elem: Object(cropBox->x2)); |
1803 | cropBoxArray->add(elem: Object(cropBox->y2)); |
1804 | Object cropBoxObject(cropBoxArray); |
1805 | trimBoxObject = cropBoxObject.copy(); |
1806 | pageDict->add(key: "CropBox" , val: std::move(cropBoxObject)); |
1807 | } |
1808 | pageDict->add(key: "TrimBox" , val: std::move(trimBoxObject)); |
1809 | pageDict->add(key: "Rotate" , val: Object(rotate)); |
1810 | getXRef()->setModifiedObject(o: &page, r: *refPage); |
1811 | return true; |
1812 | } |
1813 | |
1814 | bool PDFDoc::markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts) |
1815 | { |
1816 | pageDict->remove(key: "OpenAction" ); |
1817 | pageDict->remove(key: "Outlines" ); |
1818 | pageDict->remove(key: "StructTreeRoot" ); |
1819 | |
1820 | for (int n = 0; n < pageDict->getLength(); n++) { |
1821 | const char *key = pageDict->getKey(i: n); |
1822 | Object value = pageDict->getValNF(i: n).copy(); |
1823 | if (strcmp(s1: key, s2: "Parent" ) != 0 && strcmp(s1: key, s2: "Pages" ) != 0 && strcmp(s1: key, s2: "AcroForm" ) != 0 && strcmp(s1: key, s2: "Annots" ) != 0 && strcmp(s1: key, s2: "P" ) != 0 && strcmp(s1: key, s2: "Root" ) != 0) { |
1824 | const bool success = markObject(obj: &value, xRef, countRef, numOffset, oldRefNum, newRefNum, alreadyMarkedDicts); |
1825 | if (unlikely(!success)) { |
1826 | return false; |
1827 | } |
1828 | } |
1829 | } |
1830 | return true; |
1831 | } |
1832 | |
1833 | bool PDFDoc::markAnnotations(Object *annotsObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldPageNum, int newPageNum, std::set<Dict *> *alreadyMarkedDicts) |
1834 | { |
1835 | bool modified = false; |
1836 | Object annots = annotsObj->fetch(xref: getXRef()); |
1837 | if (annots.isArray()) { |
1838 | Array *array = annots.getArray(); |
1839 | for (int i = array->getLength() - 1; i >= 0; i--) { |
1840 | Object obj1 = array->get(i); |
1841 | if (obj1.isDict()) { |
1842 | Dict *dict = obj1.getDict(); |
1843 | Object type = dict->lookup(key: "Type" ); |
1844 | if (type.isName() && strcmp(s1: type.getName(), s2: "Annot" ) == 0) { |
1845 | const Object &obj2 = dict->lookupNF(key: "P" ); |
1846 | if (obj2.isRef()) { |
1847 | if (obj2.getRef().num == oldPageNum) { |
1848 | const Object &obj3 = array->getNF(i); |
1849 | if (obj3.isRef()) { |
1850 | Ref r; |
1851 | r.num = newPageNum; |
1852 | r.gen = 0; |
1853 | dict->set(key: "P" , val: Object(r)); |
1854 | getXRef()->setModifiedObject(o: &obj1, r: obj3.getRef()); |
1855 | } |
1856 | } else if (obj2.getRef().num == newPageNum) { |
1857 | continue; |
1858 | } else { |
1859 | Object page = getXRef()->fetch(ref: obj2.getRef()); |
1860 | if (page.isDict()) { |
1861 | Dict *pageDict = page.getDict(); |
1862 | Object pagetype = pageDict->lookup(key: "Type" ); |
1863 | if (!pagetype.isName() || strcmp(s1: pagetype.getName(), s2: "Page" ) != 0) { |
1864 | continue; |
1865 | } |
1866 | } |
1867 | array->remove(i); |
1868 | modified = true; |
1869 | continue; |
1870 | } |
1871 | } |
1872 | } |
1873 | markPageObjects(pageDict: dict, xRef, countRef, numOffset, oldRefNum: oldPageNum, newRefNum: newPageNum, alreadyMarkedDicts); |
1874 | } |
1875 | obj1 = array->getNF(i).copy(); |
1876 | if (obj1.isRef()) { |
1877 | if (obj1.getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(i: obj1.getRef().num + numOffset)->type == xrefEntryFree) { |
1878 | if (getXRef()->getEntry(i: obj1.getRef().num)->type == xrefEntryFree) { |
1879 | continue; // already marked as free => should be replaced |
1880 | } |
1881 | xRef->add(num: obj1.getRef().num + numOffset, gen: obj1.getRef().gen, offs: 0, used: true); |
1882 | if (getXRef()->getEntry(i: obj1.getRef().num)->type == xrefEntryCompressed) { |
1883 | xRef->getEntry(i: obj1.getRef().num + numOffset)->type = xrefEntryCompressed; |
1884 | } |
1885 | } |
1886 | if (obj1.getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(i: obj1.getRef().num + numOffset)->type == xrefEntryFree) { |
1887 | countRef->add(num: obj1.getRef().num + numOffset, gen: 1, offs: 0, used: true); |
1888 | } else { |
1889 | XRefEntry *entry = countRef->getEntry(i: obj1.getRef().num + numOffset); |
1890 | entry->gen++; |
1891 | } |
1892 | } |
1893 | } |
1894 | } |
1895 | if (annotsObj->isRef()) { |
1896 | if (annotsObj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(i: annotsObj->getRef().num + numOffset)->type == xrefEntryFree) { |
1897 | if (getXRef()->getEntry(i: annotsObj->getRef().num)->type == xrefEntryFree) { |
1898 | return modified; // already marked as free => should be replaced |
1899 | } |
1900 | xRef->add(num: annotsObj->getRef().num + numOffset, gen: annotsObj->getRef().gen, offs: 0, used: true); |
1901 | if (getXRef()->getEntry(i: annotsObj->getRef().num)->type == xrefEntryCompressed) { |
1902 | xRef->getEntry(i: annotsObj->getRef().num + numOffset)->type = xrefEntryCompressed; |
1903 | } |
1904 | } |
1905 | if (annotsObj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(i: annotsObj->getRef().num + numOffset)->type == xrefEntryFree) { |
1906 | countRef->add(num: annotsObj->getRef().num + numOffset, gen: 1, offs: 0, used: true); |
1907 | } else { |
1908 | XRefEntry *entry = countRef->getEntry(i: annotsObj->getRef().num + numOffset); |
1909 | entry->gen++; |
1910 | } |
1911 | getXRef()->setModifiedObject(o: &annots, r: annotsObj->getRef()); |
1912 | } |
1913 | return modified; |
1914 | } |
1915 | |
1916 | void PDFDoc::markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum) |
1917 | { |
1918 | bool modified = false; |
1919 | Object acroform = afObj->fetch(xref: getXRef()); |
1920 | if (acroform.isDict()) { |
1921 | Dict *dict = acroform.getDict(); |
1922 | for (int i = 0; i < dict->getLength(); i++) { |
1923 | if (strcmp(s1: dict->getKey(i), s2: "Fields" ) == 0) { |
1924 | Object fields = dict->getValNF(i).copy(); |
1925 | modified = markAnnotations(annotsObj: &fields, xRef, countRef, numOffset, oldPageNum: oldRefNum, newPageNum: newRefNum); |
1926 | } else { |
1927 | Object obj = dict->getValNF(i).copy(); |
1928 | markObject(obj: &obj, xRef, countRef, numOffset, oldRefNum, newRefNum); |
1929 | } |
1930 | } |
1931 | } |
1932 | if (afObj->isRef()) { |
1933 | if (afObj->getRef().num + (int)numOffset >= xRef->getNumObjects() || xRef->getEntry(i: afObj->getRef().num + numOffset)->type == xrefEntryFree) { |
1934 | if (getXRef()->getEntry(i: afObj->getRef().num)->type == xrefEntryFree) { |
1935 | return; // already marked as free => should be replaced |
1936 | } |
1937 | xRef->add(num: afObj->getRef().num + numOffset, gen: afObj->getRef().gen, offs: 0, used: true); |
1938 | if (getXRef()->getEntry(i: afObj->getRef().num)->type == xrefEntryCompressed) { |
1939 | xRef->getEntry(i: afObj->getRef().num + numOffset)->type = xrefEntryCompressed; |
1940 | } |
1941 | } |
1942 | if (afObj->getRef().num + (int)numOffset >= countRef->getNumObjects() || countRef->getEntry(i: afObj->getRef().num + numOffset)->type == xrefEntryFree) { |
1943 | countRef->add(num: afObj->getRef().num + numOffset, gen: 1, offs: 0, used: true); |
1944 | } else { |
1945 | XRefEntry *entry = countRef->getEntry(i: afObj->getRef().num + numOffset); |
1946 | entry->gen++; |
1947 | } |
1948 | if (modified) { |
1949 | getXRef()->setModifiedObject(o: &acroform, r: afObj->getRef()); |
1950 | } |
1951 | } |
1952 | return; |
1953 | } |
1954 | |
1955 | unsigned int PDFDoc::writePageObjects(OutStream *outStr, XRef *xRef, unsigned int numOffset, bool combine) |
1956 | { |
1957 | unsigned int objectsCount = 0; // count the number of objects in the XRef(s) |
1958 | unsigned char *fileKey; |
1959 | CryptAlgorithm encAlgorithm; |
1960 | int keyLength; |
1961 | xRef->getEncryptionParameters(fileKeyA: &fileKey, encAlgorithmA: &encAlgorithm, keyLengthA: &keyLength); |
1962 | |
1963 | for (int n = numOffset; n < xRef->getNumObjects(); n++) { |
1964 | if (xRef->getEntry(i: n)->type != xrefEntryFree) { |
1965 | Ref ref; |
1966 | ref.num = n; |
1967 | ref.gen = xRef->getEntry(i: n)->gen; |
1968 | objectsCount++; |
1969 | Object obj = getXRef()->fetch(num: ref.num - numOffset, gen: ref.gen); |
1970 | Goffset offset = writeObjectHeader(ref: &ref, outStr); |
1971 | if (combine) { |
1972 | writeObject(obj: &obj, outStr, xRef: getXRef(), numOffset, fileKey: nullptr, encAlgorithm: cryptRC4, keyLength: 0, objNum: 0, objGen: 0); |
1973 | } else if (xRef->getEntry(i: n)->getFlag(flag: XRefEntry::Unencrypted)) { |
1974 | writeObject(obj: &obj, outStr, fileKey: nullptr, encAlgorithm: cryptRC4, keyLength: 0, objNum: 0, objGen: 0); |
1975 | } else { |
1976 | writeObject(obj: &obj, outStr, fileKey, encAlgorithm, keyLength, ref); |
1977 | } |
1978 | writeObjectFooter(outStr); |
1979 | xRef->add(ref, offs: offset, used: true); |
1980 | } |
1981 | } |
1982 | return objectsCount; |
1983 | } |
1984 | |
1985 | Outline *PDFDoc::getOutline() |
1986 | { |
1987 | if (!outline) { |
1988 | pdfdocLocker(); |
1989 | // read outline |
1990 | outline = new Outline(catalog->getOutline(), xref, this); |
1991 | } |
1992 | |
1993 | return outline; |
1994 | } |
1995 | |
1996 | std::unique_ptr<PDFDoc> PDFDoc::ErrorPDFDoc(int errorCode, std::unique_ptr<GooString> &&fileNameA) |
1997 | { |
1998 | // We cannot call std::make_unique here because the PDFDoc constructor is private |
1999 | PDFDoc *doc = new PDFDoc(); |
2000 | doc->errCode = errorCode; |
2001 | doc->fileName = std::move(fileNameA); |
2002 | |
2003 | return std::unique_ptr<PDFDoc>(doc); |
2004 | } |
2005 | |
2006 | long long PDFDoc::strToLongLong(const char *s) |
2007 | { |
2008 | long long x, d; |
2009 | const char *p; |
2010 | |
2011 | x = 0; |
2012 | for (p = s; *p && isdigit(*p & 0xff); ++p) { |
2013 | d = *p - '0'; |
2014 | if (x > (LLONG_MAX - d) / 10) { |
2015 | break; |
2016 | } |
2017 | x = 10 * x + d; |
2018 | } |
2019 | return x; |
2020 | } |
2021 | |
2022 | // Read the 'startxref' position. |
2023 | Goffset PDFDoc::getStartXRef(bool tryingToReconstruct) |
2024 | { |
2025 | if (startXRefPos == -1) { |
2026 | |
2027 | if (isLinearized(tryingToReconstruct)) { |
2028 | char buf[linearizationSearchSize + 1]; |
2029 | int c, n, i; |
2030 | |
2031 | str->setPos(pos: 0); |
2032 | for (n = 0; n < linearizationSearchSize; ++n) { |
2033 | if ((c = str->getChar()) == EOF) { |
2034 | break; |
2035 | } |
2036 | buf[n] = c; |
2037 | } |
2038 | buf[n] = '\0'; |
2039 | |
2040 | // find end of first obj (linearization dictionary) |
2041 | startXRefPos = 0; |
2042 | for (i = 0; i < n; i++) { |
2043 | if (!strncmp(s1: "endobj" , s2: &buf[i], n: 6)) { |
2044 | i += 6; |
2045 | // skip whitespace |
2046 | while (buf[i] && Lexer::isSpace(c: buf[i])) { |
2047 | ++i; |
2048 | } |
2049 | startXRefPos = i; |
2050 | break; |
2051 | } |
2052 | } |
2053 | } else { |
2054 | char buf[xrefSearchSize + 1]; |
2055 | const char *p; |
2056 | int c, n, i; |
2057 | |
2058 | // read last xrefSearchSize bytes |
2059 | int segnum = 0; |
2060 | int maxXRefSearch = 24576; |
2061 | if (str->getLength() < maxXRefSearch) { |
2062 | maxXRefSearch = static_cast<int>(str->getLength()); |
2063 | } |
2064 | for (; (xrefSearchSize - 16) * segnum < maxXRefSearch; segnum++) { |
2065 | str->setPos(pos: (xrefSearchSize - 16) * segnum + xrefSearchSize, dir: -1); |
2066 | for (n = 0; n < xrefSearchSize; ++n) { |
2067 | if ((c = str->getChar()) == EOF) { |
2068 | break; |
2069 | } |
2070 | buf[n] = c; |
2071 | } |
2072 | buf[n] = '\0'; |
2073 | |
2074 | // find startxref |
2075 | for (i = n - 9; i >= 0; --i) { |
2076 | if (!strncmp(s1: &buf[i], s2: "startxref" , n: 9)) { |
2077 | break; |
2078 | } |
2079 | } |
2080 | if (i < 0) { |
2081 | startXRefPos = 0; |
2082 | } else { |
2083 | for (p = &buf[i + 9]; isspace(*p); ++p) { |
2084 | ; |
2085 | } |
2086 | startXRefPos = strToLongLong(s: p); |
2087 | break; |
2088 | } |
2089 | } |
2090 | } |
2091 | } |
2092 | |
2093 | return startXRefPos; |
2094 | } |
2095 | |
2096 | Goffset PDFDoc::getMainXRefEntriesOffset(bool tryingToReconstruct) |
2097 | { |
2098 | unsigned int mainXRefEntriesOffset = 0; |
2099 | |
2100 | if (isLinearized(tryingToReconstruct)) { |
2101 | mainXRefEntriesOffset = getLinearization()->getMainXRefEntriesOffset(); |
2102 | } |
2103 | |
2104 | return mainXRefEntriesOffset; |
2105 | } |
2106 | |
2107 | int PDFDoc::getNumPages() |
2108 | { |
2109 | if (isLinearized()) { |
2110 | int n; |
2111 | if ((n = getLinearization()->getNumPages())) { |
2112 | return n; |
2113 | } |
2114 | } |
2115 | |
2116 | return catalog->getNumPages(); |
2117 | } |
2118 | |
2119 | Page *PDFDoc::parsePage(int page) |
2120 | { |
2121 | Ref ; |
2122 | |
2123 | pageRef.num = getHints()->getPageObjectNum(page); |
2124 | if (!pageRef.num) { |
2125 | error(category: errSyntaxWarning, pos: -1, msg: "Failed to get object num from hint tables for page {0:d}" , page); |
2126 | return nullptr; |
2127 | } |
2128 | |
2129 | // check for bogus ref - this can happen in corrupted PDF files |
2130 | if (pageRef.num < 0 || pageRef.num >= xref->getNumObjects()) { |
2131 | error(category: errSyntaxWarning, pos: -1, msg: "Invalid object num ({0:d}) for page {1:d}" , pageRef.num, page); |
2132 | return nullptr; |
2133 | } |
2134 | |
2135 | pageRef.gen = xref->getEntry(i: pageRef.num)->gen; |
2136 | Object obj = xref->fetch(ref: pageRef); |
2137 | if (!obj.isDict(dictType: "Page" )) { |
2138 | error(category: errSyntaxWarning, pos: -1, msg: "Object ({0:d} {1:d}) is not a pageDict" , pageRef.num, pageRef.gen); |
2139 | return nullptr; |
2140 | } |
2141 | Dict *pageDict = obj.getDict(); |
2142 | |
2143 | return new Page(this, page, std::move(obj), pageRef, new PageAttrs(nullptr, pageDict), catalog->getForm()); |
2144 | } |
2145 | |
2146 | Page *PDFDoc::getPage(int page) |
2147 | { |
2148 | if ((page < 1) || page > getNumPages()) { |
2149 | return nullptr; |
2150 | } |
2151 | |
2152 | if (isLinearized() && checkLinearization()) { |
2153 | pdfdocLocker(); |
2154 | if (!pageCache) { |
2155 | pageCache = (Page **)gmallocn(count: getNumPages(), size: sizeof(Page *)); |
2156 | for (int i = 0; i < getNumPages(); i++) { |
2157 | pageCache[i] = nullptr; |
2158 | } |
2159 | } |
2160 | if (!pageCache[page - 1]) { |
2161 | pageCache[page - 1] = parsePage(page); |
2162 | } |
2163 | if (pageCache[page - 1]) { |
2164 | return pageCache[page - 1]; |
2165 | } else { |
2166 | error(category: errSyntaxWarning, pos: -1, msg: "Failed parsing page {0:d} using hint tables" , page); |
2167 | } |
2168 | } |
2169 | |
2170 | return catalog->getPage(i: page); |
2171 | } |
2172 | |
2173 | bool PDFDoc::hasJavascript() |
2174 | { |
2175 | JSInfo jsInfo(this); |
2176 | jsInfo.scanJS(nPages: getNumPages(), stopOnFirstJS: true); |
2177 | return jsInfo.containsJS(); |
2178 | } |
2179 | |
2180 | bool PDFDoc::sign(const std::string &saveFilename, const std::string &certNickname, const std::string &password, GooString *partialFieldName, int page, const PDFRectangle &rect, const GooString &signatureText, |
2181 | const GooString &signatureTextLeft, double fontSize, double leftFontSize, std::unique_ptr<AnnotColor> &&fontColor, double borderWidth, std::unique_ptr<AnnotColor> &&borderColor, |
2182 | std::unique_ptr<AnnotColor> &&backgroundColor, const GooString *reason, const GooString *location, const std::string &imagePath, const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword) |
2183 | { |
2184 | ::Page *destPage = getPage(page); |
2185 | if (destPage == nullptr) { |
2186 | return false; |
2187 | } |
2188 | Ref imageResourceRef = Ref::INVALID(); |
2189 | if (!imagePath.empty()) { |
2190 | imageResourceRef = ImageEmbeddingUtils::embed(xref, imagePath); |
2191 | if (imageResourceRef == Ref::INVALID()) { |
2192 | return false; |
2193 | } |
2194 | } |
2195 | |
2196 | Form *form = catalog->getCreateForm(); |
2197 | const std::string pdfFontName = form->findPdfFontNameToUseForSigning(); |
2198 | if (pdfFontName.empty()) { |
2199 | return false; |
2200 | } |
2201 | |
2202 | const DefaultAppearance da { { objName, pdfFontName.c_str() }, fontSize, std::move(fontColor) }; |
2203 | |
2204 | Object annotObj = Object(new Dict(getXRef())); |
2205 | annotObj.dictSet(key: "Type" , val: Object(objName, "Annot" )); |
2206 | annotObj.dictSet(key: "Subtype" , val: Object(objName, "Widget" )); |
2207 | annotObj.dictSet(key: "FT" , val: Object(objName, "Sig" )); |
2208 | annotObj.dictSet(key: "T" , val: Object(partialFieldName)); |
2209 | Array *rectArray = new Array(getXRef()); |
2210 | rectArray->add(elem: Object(rect.x1)); |
2211 | rectArray->add(elem: Object(rect.y1)); |
2212 | rectArray->add(elem: Object(rect.x2)); |
2213 | rectArray->add(elem: Object(rect.y2)); |
2214 | annotObj.dictSet(key: "Rect" , val: Object(rectArray)); |
2215 | |
2216 | const std::string daStr = da.toAppearanceString(); |
2217 | annotObj.dictSet(key: "DA" , val: Object(new GooString(daStr))); |
2218 | |
2219 | const Ref ref = getXRef()->addIndirectObject(o: annotObj); |
2220 | catalog->addFormToAcroForm(formRef: ref); |
2221 | // say that there a now signatures and that we should append only |
2222 | catalog->getAcroForm()->dictSet(key: "SigFlags" , val: Object(3)); |
2223 | catalog->setAcroFormModified(); |
2224 | |
2225 | form->ensureFontsForAllCharacters(unicodeText: &signatureText, pdfFontNameToEmulate: pdfFontName); |
2226 | form->ensureFontsForAllCharacters(unicodeText: &signatureTextLeft, pdfFontNameToEmulate: pdfFontName); |
2227 | |
2228 | std::unique_ptr<::FormFieldSignature> field = std::make_unique<::FormFieldSignature>(args: this, args: std::move(annotObj), args: ref, args: nullptr, args: nullptr); |
2229 | field->setCustomAppearanceContent(signatureText); |
2230 | field->setCustomAppearanceLeftContent(signatureTextLeft); |
2231 | field->setCustomAppearanceLeftFontSize(leftFontSize); |
2232 | field->setImageResource(imageResourceRef); |
2233 | |
2234 | Object refObj(ref); |
2235 | AnnotWidget *signatureAnnot = new AnnotWidget(this, field->getObj(), &refObj, field.get()); |
2236 | signatureAnnot->setFlags(signatureAnnot->getFlags() | Annot::flagPrint | Annot::flagLocked | Annot::flagNoRotate); |
2237 | Dict dummy(getXRef()); |
2238 | auto appearCharacs = std::make_unique<AnnotAppearanceCharacs>(args: &dummy); |
2239 | appearCharacs->setBorderColor(std::move(borderColor)); |
2240 | appearCharacs->setBackColor(std::move(backgroundColor)); |
2241 | signatureAnnot->setAppearCharacs(std::move(appearCharacs)); |
2242 | |
2243 | signatureAnnot->generateFieldAppearance(); |
2244 | signatureAnnot->updateAppearanceStream(); |
2245 | |
2246 | FormWidget *formWidget = field->getWidget(i: field->getNumWidgets() - 1); |
2247 | formWidget->setWidgetAnnotation(signatureAnnot); |
2248 | |
2249 | destPage->addAnnot(annot: signatureAnnot); |
2250 | |
2251 | std::unique_ptr<AnnotBorder> border(new AnnotBorderArray()); |
2252 | border->setWidth(borderWidth); |
2253 | signatureAnnot->setBorder(std::move(border)); |
2254 | |
2255 | FormWidgetSignature *fws = dynamic_cast<FormWidgetSignature *>(formWidget); |
2256 | if (fws) { |
2257 | const bool res = fws->signDocument(filename: saveFilename, certNickname, password, reason, location, ownerPassword, userPassword); |
2258 | |
2259 | // Now remove the signature stuff in case the user wants to continue editing stuff |
2260 | // So the document object is clean |
2261 | const Object &vRefObj = field->getObj()->dictLookupNF(key: "V" ); |
2262 | if (vRefObj.isRef()) { |
2263 | getXRef()->removeIndirectObject(r: vRefObj.getRef()); |
2264 | } |
2265 | destPage->removeAnnot(annot: signatureAnnot); |
2266 | catalog->removeFormFromAcroForm(formRef: ref); |
2267 | getXRef()->removeIndirectObject(r: ref); |
2268 | |
2269 | return res; |
2270 | } |
2271 | |
2272 | return false; |
2273 | } |
2274 | |