1//========================================================================
2//
3// XRef.h
4//
5// Copyright 1996-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2005 Brad Hards <bradh@frogmouth.net>
17// Copyright (C) 2006, 2008, 2010-2013, 2017-2022, 2024 Albert Astals Cid <aacid@kde.org>
18// Copyright (C) 2007-2008 Julien Rebetez <julienr@svn.gnome.org>
19// Copyright (C) 2007 Carlos Garcia Campos <carlosgc@gnome.org>
20// Copyright (C) 2010 Ilya Gorenbein <igorenbein@finjan.com>
21// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
22// Copyright (C) 2012, 2013, 2016 Thomas Freitag <Thomas.Freitag@kabelmail.de>
23// Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
24// Copyright (C) 2013, 2017, 2019 Adrian Johnson <ajohnson@redneon.com>
25// Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
26// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
27// Copyright (C) 2018 Marek Kasik <mkasik@redhat.com>
28// Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
29// Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net.
30// Copyright (C) 2023, 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
31//
32// To see a description of the changes please see the Changelog file that
33// came with your tarball or type make ChangeLog if you are building from git
34//
35//========================================================================
36
37#ifndef XREF_H
38#define XREF_H
39
40#include <functional>
41
42#include "poppler-config.h"
43#include "poppler_private_export.h"
44#include "Object.h"
45#include "Stream.h"
46#include "PopplerCache.h"
47
48class Dict;
49class Stream;
50class Parser;
51class ObjectStream;
52
53//------------------------------------------------------------------------
54// XRef
55//------------------------------------------------------------------------
56
57enum XRefEntryType
58{
59 xrefEntryFree,
60 xrefEntryUncompressed,
61 xrefEntryCompressed,
62 xrefEntryNone
63};
64
65struct XRefEntry
66{
67 Goffset offset;
68 int gen;
69 XRefEntryType type;
70 int flags;
71 Object obj; // if this entry was updated, obj will contains the updated object
72
73 enum Flag
74 {
75 // Regular flags
76 Updated, // Entry was modified
77 Parsing, // Entry is currently being parsed
78
79 // Special flags -- available only after xref->scanSpecialFlags() is run
80 Unencrypted, // Entry is stored in unencrypted form (meaningless in unencrypted documents)
81 DontRewrite // Entry must not be written back in case of full rewrite
82 };
83
84 inline bool getFlag(Flag flag) const
85 {
86 const int mask = (1 << (int)flag);
87 return (flags & mask) != 0;
88 }
89
90 inline void setFlag(Flag flag, bool value)
91 {
92 const int mask = (1 << (int)flag);
93 if (value) {
94 flags |= mask;
95 } else {
96 flags &= ~mask;
97 }
98 }
99};
100
101// How to compress the a added stream
102enum class StreamCompression
103{
104 None, /* No compression */
105 Compress, /* Compresses the stream */
106};
107
108class POPPLER_PRIVATE_EXPORT XRef
109{
110public:
111 // Constructor, create an empty XRef, used for PDF writing
112 XRef();
113 // Constructor, create an empty XRef but with info dict, used for PDF writing
114 explicit XRef(const Object *trailerDictA);
115 // Constructor. Read xref table from stream.
116 XRef(BaseStream *strA, Goffset pos, Goffset mainXRefEntriesOffsetA = 0, bool *wasReconstructed = nullptr, bool reconstruct = false, const std::function<void()> &xrefReconstructedCallback = {});
117
118 // Destructor.
119 ~XRef();
120
121 XRef(const XRef &) = delete;
122 XRef &operator=(const XRef &) = delete;
123
124 // Copy xref but with new base stream!
125 XRef *copy() const;
126
127 // Is xref table valid?
128 bool isOk() const { return ok; }
129
130 // Is the last XRef section a stream or a table?
131 bool isXRefStream() const { return xRefStream; }
132
133 // Get the error code (if isOk() returns false).
134 int getErrorCode() const { return errCode; }
135
136 // Set the encryption parameters.
137 void setEncryption(int permFlagsA, bool ownerPasswordOkA, const unsigned char *fileKeyA, int keyLengthA, int encVersionA, int encRevisionA, CryptAlgorithm encAlgorithmA);
138 // Mark Encrypt entry as Unencrypted
139 void markUnencrypted();
140
141 void getEncryptionParameters(unsigned char **fileKeyA, CryptAlgorithm *encAlgorithmA, int *keyLengthA);
142
143 // Is the file encrypted?
144 bool isEncrypted() const { return encrypted; }
145
146 // Is the given Ref encrypted?
147 bool isRefEncrypted(Ref r);
148
149 // Check various permissions.
150 bool okToPrint(bool ignoreOwnerPW = false) const;
151 bool okToPrintHighRes(bool ignoreOwnerPW = false) const;
152 bool okToChange(bool ignoreOwnerPW = false) const;
153 bool okToCopy(bool ignoreOwnerPW = false) const;
154 bool okToAddNotes(bool ignoreOwnerPW = false) const;
155 bool okToFillForm(bool ignoreOwnerPW = false) const;
156 bool okToAccessibility(bool ignoreOwnerPW = false) const;
157 bool okToAssemble(bool ignoreOwnerPW = false) const;
158 int getPermFlags() const { return permFlags; }
159
160 // Get catalog object.
161 Object getCatalog();
162
163 // Fetch an indirect reference.
164 Object fetch(const Ref ref, int recursion = 0);
165 // If endPos is not null, returns file position after parsing the object. This will
166 // be a few bytes after the end of the object due to the parser reading ahead.
167 // Returns -1 if object is in compressed stream.
168 Object fetch(int num, int gen, int recursion = 0, Goffset *endPos = nullptr);
169
170 // Return the document's Info dictionary (if any).
171 Object getDocInfo();
172 Object getDocInfoNF();
173
174 // Create and return the document's Info dictionary if needed.
175 // Otherwise return the existing one.
176 // Returns in the given parameter the Ref the Info is in
177 Object createDocInfoIfNeeded(Ref *ref);
178
179 // Remove the document's Info dictionary and update the trailer dictionary.
180 void removeDocInfo();
181
182 // Return the number of objects in the xref table.
183 int getNumObjects() const { return size; }
184
185 // Return the catalog object reference.
186 int getRootNum() const { return rootNum; }
187 int getRootGen() const { return rootGen; }
188 Ref getRoot() const { return { .num: rootNum, .gen: rootGen }; }
189
190 // Get end position for a stream in a damaged file.
191 // Returns false if unknown or file is not damaged.
192 bool getStreamEnd(Goffset streamStart, Goffset *streamEnd);
193
194 // Retuns the entry that belongs to the offset
195 int getNumEntry(Goffset offset);
196
197 // Scans the document and sets special flags in all xref entries. One of those
198 // flags is Unencrypted, which affects how the object is fetched. Therefore,
199 // this function must be called before fetching unencrypted objects (e.g.
200 // Encrypt dictionary, XRef streams). Note that the code that initializes
201 // decryption doesn't need to call this function, because it runs before
202 // decryption is enabled, and therefore the Unencrypted flag is ignored.
203 void scanSpecialFlags();
204
205 // Direct access.
206 XRefEntry *getEntry(int i, bool complainIfMissing = true);
207 Object *getTrailerDict() { return &trailerDict; }
208
209 // Was the XRef modified?
210 bool isModified() const { return modified; }
211 // Set the modification flag for XRef to true.
212 void setModified() { modified = true; }
213
214 // Write access
215 void setModifiedObject(const Object *o, Ref r);
216 Ref addIndirectObject(const Object &o);
217 void removeIndirectObject(Ref r);
218 bool add(int num, int gen, Goffset offs, bool used);
219 void add(Ref ref, Goffset offs, bool used);
220 // Adds a stream object using AutoFreeMemStream.
221 // The function takes ownership over dict and buffer.
222 // The buffer should be created using gmalloc().
223 // For stream compression, if the data is already compressed
224 // don't compress again. If it is not compressed, use compress (Flate / zlib)
225 // Returns ref to a new object.
226 Ref addStreamObject(Dict *dict, char *buffer, const Goffset bufferSize, StreamCompression compression);
227 Ref addStreamObject(Dict *dict, uint8_t *buffer, const Goffset bufferSize, StreamCompression compression);
228
229 // Output XRef table to stream
230 void writeTableToFile(OutStream *outStr, bool writeAllEntries);
231 // Output XRef stream contents to GooString and fill trailerDict fields accordingly
232 void writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref);
233
234 // to be thread safe during write where changes are not allowed
235 void lock();
236 void unlock();
237
238private:
239 BaseStream *str; // input stream
240 Goffset start; // offset in file (to allow for garbage
241 // at beginning of file)
242 XRefEntry *entries; // xref entries
243 int capacity; // size of <entries> array
244 int size; // number of entries
245 int rootNum, rootGen; // catalog dict
246 bool ok; // true if xref table is valid
247 int errCode; // error code (if <ok> is false)
248 bool xrefReconstructed; // marker, true if xref was already reconstructed
249 Object trailerDict; // trailer dictionary
250 bool modified;
251 Goffset *streamEnds; // 'endstream' positions - only used in
252 // damaged files
253 int streamEndsLen; // number of valid entries in streamEnds
254 PopplerCache<Goffset, ObjectStream> objStrs; // cached object streams
255 bool encrypted; // true if file is encrypted
256 int encRevision;
257 int encVersion; // encryption algorithm
258 CryptAlgorithm encAlgorithm; // encryption algorithm
259 int keyLength; // length of key, in bytes
260 int permFlags; // permission bits
261 unsigned char fileKey[32]; // file decryption key
262 bool ownerPasswordOk; // true if owner password is correct
263 Goffset prevXRefOffset; // position of prev XRef section (= next to read)
264 Goffset mainXRefEntriesOffset; // offset of entries in main XRef table
265 bool xRefStream; // true if last XRef section is a stream
266 Goffset mainXRefOffset; // position of the main XRef table/stream
267 bool scannedSpecialFlags; // true if scanSpecialFlags has been called
268 bool strOwner; // true if str is owned by the instance
269 mutable std::recursive_mutex mutex;
270 std::function<void()> xrefReconstructedCb;
271
272 RefRecursionChecker refsBeingFetched;
273
274 int reserve(int newSize);
275 int resize(int newSize);
276 bool readXRef(Goffset *pos, std::vector<Goffset> *followedXRefStm, std::vector<int> *xrefStreamObjsNum);
277 bool readXRefTable(Parser *parser, Goffset *pos, std::vector<Goffset> *followedXRefStm, std::vector<int> *xrefStreamObjsNum);
278 bool readXRefStreamSection(Stream *xrefStr, const int *w, int first, int n);
279 bool readXRefStream(Stream *xrefStr, Goffset *pos);
280 bool constructXRef(bool *wasReconstructed, bool needCatalogDict = false);
281 bool parseEntry(Goffset offset, XRefEntry *entry);
282 void readXRefUntil(int untilEntryNum, std::vector<int> *xrefStreamObjsNum = nullptr);
283 void markUnencrypted(Object *obj);
284
285 class XRefWriter
286 {
287 public:
288 XRefWriter() = default;
289 virtual void startSection(int first, int count) = 0;
290 virtual void writeEntry(Goffset offset, int gen, XRefEntryType type) = 0;
291 virtual ~XRefWriter();
292
293 XRefWriter(const XRefWriter &) = delete;
294 XRefWriter &operator=(const XRefWriter &other) = delete;
295 };
296
297 // XRefWriter subclass that writes a XRef table
298 class XRefTableWriter : public XRefWriter
299 {
300 public:
301 explicit XRefTableWriter(OutStream *outStrA);
302 void startSection(int first, int count) override;
303 void writeEntry(Goffset offset, int gen, XRefEntryType type) override;
304
305 private:
306 OutStream *outStr;
307 };
308
309 // XRefWriter subclass that writes a XRef stream
310 class XRefStreamWriter : public XRefWriter
311 {
312 public:
313 XRefStreamWriter(Array *index, GooString *stmBuf, int offsetSize);
314 void startSection(int first, int count) override;
315 void writeEntry(Goffset offset, int gen, XRefEntryType type) override;
316
317 private:
318 Array *index;
319 GooString *stmBuf;
320 int offsetSize;
321 };
322
323 // Dummy XRefWriter subclass that only checks if all offsets fit in 4 bytes
324 class XRefPreScanWriter : public XRefWriter
325 {
326 public:
327 XRefPreScanWriter();
328 void startSection(int first, int count) override;
329 void writeEntry(Goffset offset, int gen, XRefEntryType type) override;
330
331 bool hasOffsetsBeyond4GB;
332 };
333
334 void writeXRef(XRefWriter *writer, bool writeAllEntries);
335};
336
337#endif
338

source code of poppler/poppler/XRef.h