1//========================================================================
2//
3// PDFDoc.h
4//
5// Copyright 1996-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
17// Copyright (C) 2005, 2009, 2014, 2015, 2017-2022 Albert Astals Cid <aacid@kde.org>
18// Copyright (C) 2008 Julien Rebetez <julienr@svn.gnome.org>
19// Copyright (C) 2008 Pino Toscano <pino@kde.org>
20// Copyright (C) 2008 Carlos Garcia Campos <carlosgc@gnome.org>
21// Copyright (C) 2009 Eric Toombs <ewtoombs@uwaterloo.ca>
22// Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
23// Copyright (C) 2010, 2014 Hib Eris <hib@hiberis.nl>
24// Copyright (C) 2010 Srinivas Adicherla <srinivas.adicherla@geodesic.com>
25// Copyright (C) 2011, 2013, 2014, 2016 Thomas Freitag <Thomas.Freitag@alfa.de>
26// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
27// Copyright (C) 2013, 2017 Adrian Johnson <ajohnson@redneon.com>
28// Copyright (C) 2013, 2018 Adam Reichold <adamreichold@myopera.com>
29// Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
30// Copyright (C) 2015 André Guerreiro <aguerreiro1985@gmail.com>
31// Copyright (C) 2015 André Esser <bepandre@hotmail.com>
32// Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
33// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
34// Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
35// Copyright (C) 2020-2023 Oliver Sander <oliver.sander@tu-dresden.de>
36// Copyright (C) 2020 Nelson Benítez León <nbenitezl@gmail.com>
37// Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
38// Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net.
39// Copyright (C) 2021 Marek Kasik <mkasik@redhat.com>
40// Copyright (C) 2022 Felix Jung <fxjung@posteo.de>
41// Copyright (C) 2022 crt <chluo@cse.cuhk.edu.hk>
42// Copyright 2023 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
43//
44// To see a description of the changes please see the Changelog file that
45// came with your tarball or type make ChangeLog if you are building from git
46//
47//========================================================================
48
49#ifndef PDFDOC_H
50#define PDFDOC_H
51
52#include <algorithm>
53#include <cstdio>
54#include <mutex>
55
56#include "poppler-config.h"
57
58#include "poppler_private_export.h"
59
60#include "XRef.h"
61#include "Catalog.h"
62#include "Page.h"
63#include "Annot.h"
64#include "ErrorCodes.h"
65#include "Form.h"
66#include "OptionalContent.h"
67#include "Stream.h"
68
69class GooString;
70class GooFile;
71class BaseStream;
72class OutputDev;
73class Links;
74class LinkAction;
75class LinkDest;
76class Outline;
77class Linearization;
78class SecurityHandler;
79class Hints;
80class StructTreeRoot;
81
82enum PDFWriteMode
83{
84 writeStandard,
85 writeForceRewrite,
86 writeForceIncremental
87};
88
89enum PDFSubtype
90{
91 subtypeNull,
92 subtypePDFA,
93 subtypePDFE,
94 subtypePDFUA,
95 subtypePDFVT,
96 subtypePDFX,
97 subtypeNone
98};
99
100enum PDFSubtypePart
101{
102 subtypePartNull,
103 subtypePart1,
104 subtypePart2,
105 subtypePart3,
106 subtypePart4,
107 subtypePart5,
108 subtypePart6,
109 subtypePart7,
110 subtypePart8,
111 subtypePartNone
112};
113
114enum PDFSubtypeConformance
115{
116 subtypeConfNull,
117 subtypeConfA,
118 subtypeConfB,
119 subtypeConfG,
120 subtypeConfN,
121 subtypeConfP,
122 subtypeConfPG,
123 subtypeConfU,
124 subtypeConfNone
125};
126
127//------------------------------------------------------------------------
128// PDFDoc
129//------------------------------------------------------------------------
130
131class POPPLER_PRIVATE_EXPORT PDFDoc
132{
133public:
134 explicit PDFDoc(std::unique_ptr<GooString> &&fileNameA, const std::optional<GooString> &ownerPassword = {}, const std::optional<GooString> &userPassword = {}, void *guiDataA = nullptr,
135 const std::function<void()> &xrefReconstructedCallback = {});
136
137#ifdef _WIN32
138 PDFDoc(wchar_t *fileNameA, int fileNameLen, const std::optional<GooString> &ownerPassword = {}, const std::optional<GooString> &userPassword = {}, void *guiDataA = nullptr, const std::function<void()> &xrefReconstructedCallback = {});
139#endif
140
141 explicit PDFDoc(BaseStream *strA, const std::optional<GooString> &ownerPassword = {}, const std::optional<GooString> &userPassword = {}, void *guiDataA = nullptr, const std::function<void()> &xrefReconstructedCallback = {});
142 ~PDFDoc();
143
144 PDFDoc(const PDFDoc &) = delete;
145 PDFDoc &operator=(const PDFDoc &) = delete;
146
147 static std::unique_ptr<PDFDoc> ErrorPDFDoc(int errorCode, std::unique_ptr<GooString> &&fileNameA);
148
149 // Was PDF document successfully opened?
150 bool isOk() const { return ok; }
151
152 // Get the error code (if isOk() returns false).
153 int getErrorCode() const { return errCode; }
154
155 // Get the error code returned by fopen() (if getErrorCode() ==
156 // errOpenFile).
157 int getFopenErrno() const { return fopenErrno; }
158
159 // Get file name.
160 const GooString *getFileName() const { return fileName.get(); }
161#ifdef _WIN32
162 wchar_t *getFileNameU() { return fileNameU; }
163#endif
164
165 // Get the linearization table.
166 Linearization *getLinearization();
167 bool checkLinearization();
168
169 // Get the xref table.
170 XRef *getXRef() const { return xref; }
171
172 // Get catalog.
173 Catalog *getCatalog() const { return catalog; }
174
175 // Get optional content configuration
176 OCGs *getOptContentConfig() const { return catalog->getOptContentConfig(); }
177
178 // Get base stream.
179 BaseStream *getBaseStream() const { return str; }
180
181 // Get page parameters.
182 double getPageMediaWidth(int page) { return getPage(page) ? getPage(page)->getMediaWidth() : 0.0; }
183 double getPageMediaHeight(int page) { return getPage(page) ? getPage(page)->getMediaHeight() : 0.0; }
184 double getPageCropWidth(int page) { return getPage(page) ? getPage(page)->getCropWidth() : 0.0; }
185 double getPageCropHeight(int page) { return getPage(page) ? getPage(page)->getCropHeight() : 0.0; }
186 int getPageRotate(int page) { return getPage(page) ? getPage(page)->getRotate() : 0; }
187
188 // Get number of pages.
189 int getNumPages();
190
191 // Return the contents of the metadata stream, or nullptr if there is
192 // no metadata.
193 std::unique_ptr<GooString> readMetadata() const { return catalog->readMetadata(); }
194
195 // Return the structure tree root object.
196 const StructTreeRoot *getStructTreeRoot() const { return catalog->getStructTreeRoot(); }
197
198 // Get page.
199 Page *getPage(int page);
200
201 // Display a page.
202 void displayPage(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data) = nullptr, void *abortCheckCbkData = nullptr,
203 bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr, bool copyXRef = false);
204
205 // Display a range of pages.
206 void displayPages(OutputDev *out, int firstPage, int lastPage, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, bool (*abortCheckCbk)(void *data) = nullptr, void *abortCheckCbkData = nullptr,
207 bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr);
208
209 // Display part of a page.
210 void displayPageSlice(OutputDev *out, int page, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, bool printing, int sliceX, int sliceY, int sliceW, int sliceH, bool (*abortCheckCbk)(void *data) = nullptr,
211 void *abortCheckCbkData = nullptr, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr, bool copyXRef = false);
212
213 // Find a page, given its object ID. Returns page number, or 0 if
214 // not found.
215 int findPage(const Ref ref) { return catalog->findPage(pageRef: ref); }
216
217 // Returns the links for the current page, transferring ownership to
218 // the caller.
219 std::unique_ptr<Links> getLinks(int page);
220
221 // Find a named destination. Returns the link destination, or
222 // nullptr if <name> is not a destination.
223 std::unique_ptr<LinkDest> findDest(const GooString *name) { return catalog->findDest(name); }
224
225 // Process the links for a page.
226 void processLinks(OutputDev *out, int page);
227
228 // Return the outline object.
229 Outline *getOutline();
230
231 // Is the file encrypted?
232 bool isEncrypted() { return xref->isEncrypted(); }
233
234 std::vector<FormFieldSignature *> getSignatureFields();
235
236 // Check various permissions.
237 bool okToPrint(bool ignoreOwnerPW = false) { return xref->okToPrint(ignoreOwnerPW); }
238 bool okToPrintHighRes(bool ignoreOwnerPW = false) { return xref->okToPrintHighRes(ignoreOwnerPW); }
239 bool okToChange(bool ignoreOwnerPW = false) { return xref->okToChange(ignoreOwnerPW); }
240 bool okToCopy(bool ignoreOwnerPW = false) { return xref->okToCopy(ignoreOwnerPW); }
241 bool okToAddNotes(bool ignoreOwnerPW = false) { return xref->okToAddNotes(ignoreOwnerPW); }
242 bool okToFillForm(bool ignoreOwnerPW = false) { return xref->okToFillForm(ignoreOwnerPW); }
243 bool okToAccessibility(bool ignoreOwnerPW = false) { return xref->okToAccessibility(ignoreOwnerPW); }
244 bool okToAssemble(bool ignoreOwnerPW = false) { return xref->okToAssemble(ignoreOwnerPW); }
245
246 // Is this document linearized?
247 bool isLinearized(bool tryingToReconstruct = false);
248
249 // Return the document's Info dictionary (if any).
250 Object getDocInfo() { return xref->getDocInfo(); }
251 Object getDocInfoNF() { return xref->getDocInfoNF(); }
252
253 // Remove the document's Info dictionary and update the trailer dictionary.
254 void removeDocInfo() { xref->removeDocInfo(); }
255
256 // Set doc info string entry. nullptr or empty value will cause a removal.
257 // Takes ownership of value.
258 void setDocInfoStringEntry(const char *key, GooString *value);
259
260 // Set document's properties in document's Info dictionary.
261 // nullptr or empty value will cause a removal.
262 // Takes ownership of value.
263 void setDocInfoTitle(GooString *title) { setDocInfoStringEntry(key: "Title", value: title); }
264 void setDocInfoAuthor(GooString *author) { setDocInfoStringEntry(key: "Author", value: author); }
265 void setDocInfoSubject(GooString *subject) { setDocInfoStringEntry(key: "Subject", value: subject); }
266 void setDocInfoKeywords(GooString *keywords) { setDocInfoStringEntry(key: "Keywords", value: keywords); }
267 void setDocInfoCreator(GooString *creator) { setDocInfoStringEntry(key: "Creator", value: creator); }
268 void setDocInfoProducer(GooString *producer) { setDocInfoStringEntry(key: "Producer", value: producer); }
269 void setDocInfoCreatDate(GooString *creatDate) { setDocInfoStringEntry(key: "CreationDate", value: creatDate); }
270 void setDocInfoModDate(GooString *modDate) { setDocInfoStringEntry(key: "ModDate", value: modDate); }
271
272 // Get document's properties from document's Info dictionary.
273 // Returns nullptr on fail.
274 std::unique_ptr<GooString> getDocInfoStringEntry(const char *key);
275
276 std::unique_ptr<GooString> getDocInfoTitle() { return getDocInfoStringEntry(key: "Title"); }
277 std::unique_ptr<GooString> getDocInfoAuthor() { return getDocInfoStringEntry(key: "Author"); }
278 std::unique_ptr<GooString> getDocInfoSubject() { return getDocInfoStringEntry(key: "Subject"); }
279 std::unique_ptr<GooString> getDocInfoKeywords() { return getDocInfoStringEntry(key: "Keywords"); }
280 std::unique_ptr<GooString> getDocInfoCreator() { return getDocInfoStringEntry(key: "Creator"); }
281 std::unique_ptr<GooString> getDocInfoProducer() { return getDocInfoStringEntry(key: "Producer"); }
282 std::unique_ptr<GooString> getDocInfoCreatDate() { return getDocInfoStringEntry(key: "CreationDate"); }
283 std::unique_ptr<GooString> getDocInfoModDate() { return getDocInfoStringEntry(key: "ModDate"); }
284
285 // Return the PDF subtype, part, and conformance
286 PDFSubtype getPDFSubtype() const { return pdfSubtype; }
287 PDFSubtypePart getPDFSubtypePart() const { return pdfPart; }
288 PDFSubtypeConformance getPDFSubtypeConformance() const { return pdfConformance; }
289
290 // Return the PDF version specified by the file (either header or catalog).
291 int getPDFMajorVersion() const { return std::max(a: headerPdfMajorVersion, b: catalog->getPDFMajorVersion()); }
292 int getPDFMinorVersion() const
293 {
294 const int catalogMajorVersion = catalog->getPDFMajorVersion();
295 if (catalogMajorVersion > headerPdfMajorVersion) {
296 return catalog->getPDFMinorVersion();
297 } else if (headerPdfMajorVersion > catalogMajorVersion) {
298 return headerPdfMinorVersion;
299 } else {
300 return std::max(a: headerPdfMinorVersion, b: catalog->getPDFMinorVersion());
301 }
302 }
303
304 // Return the PDF ID in the trailer dictionary (if any).
305 bool getID(GooString *permanent_id, GooString *update_id) const;
306
307 // Save one page with another name.
308 int savePageAs(const GooString &name, int pageNo);
309 // Save this file with another name.
310 int saveAs(const GooString &name, PDFWriteMode mode = writeStandard);
311 // Save this file in the given output stream.
312 int saveAs(OutStream *outStr, PDFWriteMode mode = writeStandard);
313 // Save this file with another name without saving changes
314 int saveWithoutChangesAs(const GooString &name);
315 // Save this file in the given output stream without saving changes
316 int saveWithoutChangesAs(OutStream *outStr);
317
318 // Return a pointer to the GUI (XPDFCore or WinPDFCore object).
319 void *getGUIData() { return guiData; }
320
321 // rewrite pageDict with MediaBox, CropBox and new page CTM
322 bool replacePageDict(int pageNo, int rotate, const PDFRectangle *mediaBox, const PDFRectangle *cropBox);
323 bool markPageObjects(Dict *pageDict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts = nullptr);
324 bool markAnnotations(Object *annots, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldPageNum, int newPageNum, std::set<Dict *> *alreadyMarkedDicts = nullptr);
325 void markAcroForm(Object *afObj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum);
326 // write all objects used by pageDict to outStr
327 unsigned int writePageObjects(OutStream *outStr, XRef *xRef, unsigned int numOffset, bool combine = false);
328 static void writeObject(Object *obj, OutStream *outStr, XRef *xref, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set<Dict *> *alreadyWrittenDicts = nullptr);
329 static void writeObject(Object *obj, OutStream *outStr, XRef *xref, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts = nullptr);
330 static void writeHeader(OutStream *outStr, int major, int minor);
331
332 static Object createTrailerDict(int uxrefSize, bool incrUpdate, Goffset startxRef, Ref *root, XRef *xRef, const char *fileName, Goffset fileSize);
333 static void writeXRefTableTrailer(Object &&trailerDict, XRef *uxref, bool writeAllEntries, Goffset uxrefOffset, OutStream *outStr, XRef *xRef);
334 static void writeXRefStreamTrailer(Object &&trailerDict, XRef *uxref, Ref *uxrefStreamRef, Goffset uxrefOffset, OutStream *outStr, XRef *xRef);
335 // scans the PDF and returns whether it contains any javascript
336 bool hasJavascript();
337
338 // Arguments signatureText and signatureTextLeft are UTF-16 big endian strings with BOM.
339 // Arguments reason and location are UTF-16 big endian strings with BOM. An empty string and nullptr are acceptable too.
340 // Argument imagePath is a background image (a path to a file).
341 // sign() takes ownership of partialFieldName.
342 bool sign(const std::string &saveFilename, const std::string &certNickname, const std::string &password, GooString *partialFieldName, int page, const PDFRectangle &rect, const GooString &signatureText,
343 const GooString &signatureTextLeft, double fontSize, double leftFontSize, std::unique_ptr<AnnotColor> &&fontColor, double borderWidth, std::unique_ptr<AnnotColor> &&borderColor, std::unique_ptr<AnnotColor> &&backgroundColor,
344 const GooString *reason = nullptr, const GooString *location = nullptr, const std::string &imagePath = "", const std::optional<GooString> &ownerPassword = {}, const std::optional<GooString> &userPassword = {});
345
346private:
347 // insert referenced objects in XRef
348 bool markDictionary(Dict *dict, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts);
349 bool markObject(Object *obj, XRef *xRef, XRef *countRef, unsigned int numOffset, int oldRefNum, int newRefNum, std::set<Dict *> *alreadyMarkedDicts = nullptr);
350
351 // Sanitizes the string so that it does
352 // not contain any ( ) < > [ ] { } / %
353 static std::string sanitizedName(const std::string &name);
354
355 static void writeDictionary(Dict *dict, OutStream *outStr, XRef *xRef, unsigned int numOffset, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref, std::set<Dict *> *alreadyWrittenDicts);
356
357 // Write object header to current file stream and return its offset
358 static Goffset writeObjectHeader(Ref *ref, OutStream *outStr);
359 static void writeObjectFooter(OutStream *outStr);
360
361 inline void writeObject(Object *obj, OutStream *outStr, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen)
362 {
363 writeObject(obj, outStr, xref: getXRef(), numOffset: 0, fileKey, encAlgorithm, keyLength, ref: { .num: objNum, .gen: objGen });
364 }
365 inline void writeObject(Object *obj, OutStream *outStr, unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref) { writeObject(obj, outStr, xref: getXRef(), numOffset: 0, fileKey, encAlgorithm, keyLength, ref); }
366 static void writeStream(Stream *str, OutStream *outStr);
367 static void writeRawStream(Stream *str, OutStream *outStr);
368 void writeXRefTableTrailer(Goffset uxrefOffset, XRef *uxref, bool writeAllEntries, int uxrefSize, OutStream *outStr, bool incrUpdate);
369 static void writeString(const GooString *s, OutStream *outStr, const unsigned char *fileKey, CryptAlgorithm encAlgorithm, int keyLength, Ref ref);
370 void saveIncrementalUpdate(OutStream *outStr);
371 void saveCompleteRewrite(OutStream *outStr);
372
373 Page *parsePage(int page);
374
375 // Get hints.
376 Hints *getHints();
377
378 PDFDoc();
379 bool setup(const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword, const std::function<void()> &xrefReconstructedCallback);
380 bool checkFooter();
381 void checkHeader();
382 bool checkEncryption(const std::optional<GooString> &ownerPassword, const std::optional<GooString> &userPassword);
383 void extractPDFSubtype();
384
385 // Get the offset of the start xref table.
386 Goffset getStartXRef(bool tryingToReconstruct = false);
387 // Get the offset of the entries in the main XRef table of a
388 // linearized document (0 for non linearized documents).
389 Goffset getMainXRefEntriesOffset(bool tryingToReconstruct = false);
390 long long strToLongLong(const char *s);
391
392 std::unique_ptr<GooString> fileName;
393#ifdef _WIN32
394 wchar_t *fileNameU = nullptr;
395#endif
396 std::unique_ptr<GooFile> file;
397 BaseStream *str = nullptr;
398 void *guiData = nullptr;
399 int headerPdfMajorVersion;
400 int headerPdfMinorVersion;
401 PDFSubtype pdfSubtype;
402 PDFSubtypePart pdfPart;
403 PDFSubtypeConformance pdfConformance;
404 Linearization *linearization = nullptr;
405 // linearizationState = 0: unchecked
406 // linearizationState = 1: checked and valid
407 // linearizationState = 2: checked and invalid
408 int linearizationState;
409 XRef *xref = nullptr;
410 SecurityHandler *secHdlr = nullptr;
411 Catalog *catalog = nullptr;
412 Hints *hints = nullptr;
413 Outline *outline = nullptr;
414 Page **pageCache = nullptr;
415
416 bool ok = false;
417 int errCode = errNone;
418 // If there is an error opening the PDF file with fopen() in the constructor,
419 // then the POSIX errno will be here.
420 int fopenErrno;
421
422 Goffset startXRefPos = -1; // offset of last xref table
423 mutable std::recursive_mutex mutex;
424};
425
426#endif
427

source code of poppler/poppler/PDFDoc.h