1 | //======================================================================== |
2 | // |
3 | // XRef.h |
4 | // |
5 | // Copyright 1996-2003 Glyph & Cog, LLC |
6 | // |
7 | //======================================================================== |
8 | |
9 | //======================================================================== |
10 | // |
11 | // Modified under the Poppler project - http://poppler.freedesktop.org |
12 | // |
13 | // All changes made under the Poppler project to this file are licensed |
14 | // under GPL version 2 or later |
15 | // |
16 | // Copyright (C) 2005 Brad Hards <bradh@frogmouth.net> |
17 | // Copyright (C) 2006, 2008, 2010-2013, 2017-2022, 2024 Albert Astals Cid <aacid@kde.org> |
18 | // Copyright (C) 2007-2008 Julien Rebetez <julienr@svn.gnome.org> |
19 | // Copyright (C) 2007 Carlos Garcia Campos <carlosgc@gnome.org> |
20 | // Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com> |
21 | // Copyright (C) 2010 Hib Eris <hib@hiberis.nl> |
22 | // Copyright (C) 2012, 2013, 2016 Thomas Freitag <Thomas.Freitag@kabelmail.de> |
23 | // Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it> |
24 | // Copyright (C) 2013, 2017, 2019 Adrian Johnson <ajohnson@redneon.com> |
25 | // Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com> |
26 | // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> |
27 | // Copyright (C) 2018 Marek Kasik <mkasik@redhat.com> |
28 | // Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com> |
29 | // Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net. |
30 | // Copyright (C) 2023, 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
31 | // |
32 | // To see a description of the changes please see the Changelog file that |
33 | // came with your tarball or type make ChangeLog if you are building from git |
34 | // |
35 | //======================================================================== |
36 | |
37 | #ifndef XREF_H |
38 | #define XREF_H |
39 | |
40 | #include <functional> |
41 | |
42 | #include "poppler-config.h" |
43 | #include "poppler_private_export.h" |
44 | #include "Object.h" |
45 | #include "Stream.h" |
46 | #include "PopplerCache.h" |
47 | |
48 | class Dict; |
49 | class Stream; |
50 | class Parser; |
51 | class ObjectStream; |
52 | |
53 | //------------------------------------------------------------------------ |
54 | // XRef |
55 | //------------------------------------------------------------------------ |
56 | |
57 | enum XRefEntryType |
58 | { |
59 | xrefEntryFree, |
60 | xrefEntryUncompressed, |
61 | xrefEntryCompressed, |
62 | xrefEntryNone |
63 | }; |
64 | |
65 | struct XRefEntry |
66 | { |
67 | Goffset offset; |
68 | int gen; |
69 | XRefEntryType type; |
70 | int flags; |
71 | Object obj; // if this entry was updated, obj will contains the updated object |
72 | |
73 | enum Flag |
74 | { |
75 | // Regular flags |
76 | Updated, // Entry was modified |
77 | Parsing, // Entry is currently being parsed |
78 | |
79 | // Special flags -- available only after xref->scanSpecialFlags() is run |
80 | Unencrypted, // Entry is stored in unencrypted form (meaningless in unencrypted documents) |
81 | DontRewrite // Entry must not be written back in case of full rewrite |
82 | }; |
83 | |
84 | inline bool getFlag(Flag flag) const |
85 | { |
86 | const int mask = (1 << (int)flag); |
87 | return (flags & mask) != 0; |
88 | } |
89 | |
90 | inline void setFlag(Flag flag, bool value) |
91 | { |
92 | const int mask = (1 << (int)flag); |
93 | if (value) { |
94 | flags |= mask; |
95 | } else { |
96 | flags &= ~mask; |
97 | } |
98 | } |
99 | }; |
100 | |
101 | // How to compress the a added stream |
102 | enum class StreamCompression |
103 | { |
104 | None, /* No compression */ |
105 | Compress, /* Compresses the stream */ |
106 | }; |
107 | |
108 | class POPPLER_PRIVATE_EXPORT XRef |
109 | { |
110 | public: |
111 | // Constructor, create an empty XRef, used for PDF writing |
112 | XRef(); |
113 | // Constructor, create an empty XRef but with info dict, used for PDF writing |
114 | explicit XRef(const Object *trailerDictA); |
115 | // Constructor. Read xref table from stream. |
116 | XRef(BaseStream *strA, Goffset pos, Goffset mainXRefEntriesOffsetA = 0, bool *wasReconstructed = nullptr, bool reconstruct = false, const std::function<void()> &xrefReconstructedCallback = {}); |
117 | |
118 | // Destructor. |
119 | ~XRef(); |
120 | |
121 | XRef(const XRef &) = delete; |
122 | XRef &operator=(const XRef &) = delete; |
123 | |
124 | // Copy xref but with new base stream! |
125 | XRef *copy() const; |
126 | |
127 | // Is xref table valid? |
128 | bool isOk() const { return ok; } |
129 | |
130 | // Is the last XRef section a stream or a table? |
131 | bool isXRefStream() const { return xRefStream; } |
132 | |
133 | // Get the error code (if isOk() returns false). |
134 | int getErrorCode() const { return errCode; } |
135 | |
136 | // Set the encryption parameters. |
137 | void setEncryption(int permFlagsA, bool ownerPasswordOkA, const unsigned char *fileKeyA, int keyLengthA, int encVersionA, int encRevisionA, CryptAlgorithm encAlgorithmA); |
138 | // Mark Encrypt entry as Unencrypted |
139 | void markUnencrypted(); |
140 | |
141 | void getEncryptionParameters(unsigned char **fileKeyA, CryptAlgorithm *encAlgorithmA, int *keyLengthA); |
142 | |
143 | // Is the file encrypted? |
144 | bool isEncrypted() const { return encrypted; } |
145 | |
146 | // Is the given Ref encrypted? |
147 | bool isRefEncrypted(Ref r); |
148 | |
149 | // Check various permissions. |
150 | bool okToPrint(bool ignoreOwnerPW = false) const; |
151 | bool okToPrintHighRes(bool ignoreOwnerPW = false) const; |
152 | bool okToChange(bool ignoreOwnerPW = false) const; |
153 | bool okToCopy(bool ignoreOwnerPW = false) const; |
154 | bool okToAddNotes(bool ignoreOwnerPW = false) const; |
155 | bool okToFillForm(bool ignoreOwnerPW = false) const; |
156 | bool okToAccessibility(bool ignoreOwnerPW = false) const; |
157 | bool okToAssemble(bool ignoreOwnerPW = false) const; |
158 | int getPermFlags() const { return permFlags; } |
159 | |
160 | // Get catalog object. |
161 | Object getCatalog(); |
162 | |
163 | // Fetch an indirect reference. |
164 | Object fetch(const Ref ref, int recursion = 0); |
165 | // If endPos is not null, returns file position after parsing the object. This will |
166 | // be a few bytes after the end of the object due to the parser reading ahead. |
167 | // Returns -1 if object is in compressed stream. |
168 | Object fetch(int num, int gen, int recursion = 0, Goffset *endPos = nullptr); |
169 | |
170 | // Return the document's Info dictionary (if any). |
171 | Object getDocInfo(); |
172 | Object getDocInfoNF(); |
173 | |
174 | // Create and return the document's Info dictionary if needed. |
175 | // Otherwise return the existing one. |
176 | // Returns in the given parameter the Ref the Info is in |
177 | Object createDocInfoIfNeeded(Ref *ref); |
178 | |
179 | // Remove the document's Info dictionary and update the trailer dictionary. |
180 | void removeDocInfo(); |
181 | |
182 | // Return the number of objects in the xref table. |
183 | int getNumObjects() const { return size; } |
184 | |
185 | // Return the catalog object reference. |
186 | int getRootNum() const { return rootNum; } |
187 | int getRootGen() const { return rootGen; } |
188 | Ref getRoot() const { return { .num: rootNum, .gen: rootGen }; } |
189 | |
190 | // Get end position for a stream in a damaged file. |
191 | // Returns false if unknown or file is not damaged. |
192 | bool getStreamEnd(Goffset streamStart, Goffset *streamEnd); |
193 | |
194 | // Retuns the entry that belongs to the offset |
195 | int getNumEntry(Goffset offset); |
196 | |
197 | // Scans the document and sets special flags in all xref entries. One of those |
198 | // flags is Unencrypted, which affects how the object is fetched. Therefore, |
199 | // this function must be called before fetching unencrypted objects (e.g. |
200 | // Encrypt dictionary, XRef streams). Note that the code that initializes |
201 | // decryption doesn't need to call this function, because it runs before |
202 | // decryption is enabled, and therefore the Unencrypted flag is ignored. |
203 | void scanSpecialFlags(); |
204 | |
205 | // Direct access. |
206 | XRefEntry *getEntry(int i, bool complainIfMissing = true); |
207 | Object *getTrailerDict() { return &trailerDict; } |
208 | |
209 | // Was the XRef modified? |
210 | bool isModified() const { return modified; } |
211 | // Set the modification flag for XRef to true. |
212 | void setModified() { modified = true; } |
213 | |
214 | // Write access |
215 | void setModifiedObject(const Object *o, Ref r); |
216 | Ref addIndirectObject(const Object &o); |
217 | void removeIndirectObject(Ref r); |
218 | bool add(int num, int gen, Goffset offs, bool used); |
219 | void add(Ref ref, Goffset offs, bool used); |
220 | // Adds a stream object using AutoFreeMemStream. |
221 | // The function takes ownership over dict and buffer. |
222 | // The buffer should be created using gmalloc(). |
223 | // For stream compression, if the data is already compressed |
224 | // don't compress again. If it is not compressed, use compress (Flate / zlib) |
225 | // Returns ref to a new object. |
226 | Ref addStreamObject(Dict *dict, char *buffer, const Goffset bufferSize, StreamCompression compression); |
227 | Ref addStreamObject(Dict *dict, uint8_t *buffer, const Goffset bufferSize, StreamCompression compression); |
228 | |
229 | // Output XRef table to stream |
230 | void writeTableToFile(OutStream *outStr, bool writeAllEntries); |
231 | // Output XRef stream contents to GooString and fill trailerDict fields accordingly |
232 | void writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref); |
233 | |
234 | // to be thread safe during write where changes are not allowed |
235 | void lock(); |
236 | void unlock(); |
237 | |
238 | private: |
239 | BaseStream *str; // input stream |
240 | Goffset start; // offset in file (to allow for garbage |
241 | // at beginning of file) |
242 | XRefEntry *entries; // xref entries |
243 | int capacity; // size of <entries> array |
244 | int size; // number of entries |
245 | int rootNum, rootGen; // catalog dict |
246 | bool ok; // true if xref table is valid |
247 | int errCode; // error code (if <ok> is false) |
248 | bool xrefReconstructed; // marker, true if xref was already reconstructed |
249 | Object trailerDict; // trailer dictionary |
250 | bool modified; |
251 | Goffset *streamEnds; // 'endstream' positions - only used in |
252 | // damaged files |
253 | int streamEndsLen; // number of valid entries in streamEnds |
254 | PopplerCache<Goffset, ObjectStream> objStrs; // cached object streams |
255 | bool encrypted; // true if file is encrypted |
256 | int encRevision; |
257 | int encVersion; // encryption algorithm |
258 | CryptAlgorithm encAlgorithm; // encryption algorithm |
259 | int keyLength; // length of key, in bytes |
260 | int permFlags; // permission bits |
261 | unsigned char fileKey[32]; // file decryption key |
262 | bool ownerPasswordOk; // true if owner password is correct |
263 | Goffset prevXRefOffset; // position of prev XRef section (= next to read) |
264 | Goffset mainXRefEntriesOffset; // offset of entries in main XRef table |
265 | bool xRefStream; // true if last XRef section is a stream |
266 | Goffset mainXRefOffset; // position of the main XRef table/stream |
267 | bool scannedSpecialFlags; // true if scanSpecialFlags has been called |
268 | bool strOwner; // true if str is owned by the instance |
269 | mutable std::recursive_mutex mutex; |
270 | std::function<void()> xrefReconstructedCb; |
271 | |
272 | RefRecursionChecker refsBeingFetched; |
273 | |
274 | int reserve(int newSize); |
275 | int resize(int newSize); |
276 | bool readXRef(Goffset *pos, std::vector<Goffset> *followedXRefStm, std::vector<int> *xrefStreamObjsNum); |
277 | bool readXRefTable(Parser *parser, Goffset *pos, std::vector<Goffset> *followedXRefStm, std::vector<int> *xrefStreamObjsNum); |
278 | bool readXRefStreamSection(Stream *xrefStr, const int *w, int first, int n); |
279 | bool readXRefStream(Stream *xrefStr, Goffset *pos); |
280 | bool constructXRef(bool *wasReconstructed, bool needCatalogDict = false); |
281 | bool parseEntry(Goffset offset, XRefEntry *entry); |
282 | void readXRefUntil(int untilEntryNum, std::vector<int> *xrefStreamObjsNum = nullptr); |
283 | void markUnencrypted(Object *obj); |
284 | |
285 | class XRefWriter |
286 | { |
287 | public: |
288 | XRefWriter() = default; |
289 | virtual void startSection(int first, int count) = 0; |
290 | virtual void writeEntry(Goffset offset, int gen, XRefEntryType type) = 0; |
291 | virtual ~XRefWriter(); |
292 | |
293 | XRefWriter(const XRefWriter &) = delete; |
294 | XRefWriter &operator=(const XRefWriter &other) = delete; |
295 | }; |
296 | |
297 | // XRefWriter subclass that writes a XRef table |
298 | class XRefTableWriter : public XRefWriter |
299 | { |
300 | public: |
301 | explicit XRefTableWriter(OutStream *outStrA); |
302 | void startSection(int first, int count) override; |
303 | void writeEntry(Goffset offset, int gen, XRefEntryType type) override; |
304 | |
305 | private: |
306 | OutStream *outStr; |
307 | }; |
308 | |
309 | // XRefWriter subclass that writes a XRef stream |
310 | class XRefStreamWriter : public XRefWriter |
311 | { |
312 | public: |
313 | XRefStreamWriter(Array *index, GooString *stmBuf, int offsetSize); |
314 | void startSection(int first, int count) override; |
315 | void writeEntry(Goffset offset, int gen, XRefEntryType type) override; |
316 | |
317 | private: |
318 | Array *index; |
319 | GooString *stmBuf; |
320 | int offsetSize; |
321 | }; |
322 | |
323 | // Dummy XRefWriter subclass that only checks if all offsets fit in 4 bytes |
324 | class XRefPreScanWriter : public XRefWriter |
325 | { |
326 | public: |
327 | XRefPreScanWriter(); |
328 | void startSection(int first, int count) override; |
329 | void writeEntry(Goffset offset, int gen, XRefEntryType type) override; |
330 | |
331 | bool hasOffsetsBeyond4GB; |
332 | }; |
333 | |
334 | void writeXRef(XRefWriter *writer, bool writeAllEntries); |
335 | }; |
336 | |
337 | #endif |
338 | |