1//========================================================================
2//
3// XRef.cc
4//
5// Copyright 1996-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2005 Dan Sheridan <dan.sheridan@postman.org.uk>
17// Copyright (C) 2005 Brad Hards <bradh@frogmouth.net>
18// Copyright (C) 2006, 2008, 2010, 2012-2014, 2016-2024 Albert Astals Cid <aacid@kde.org>
19// Copyright (C) 2007-2008 Julien Rebetez <julienr@svn.gnome.org>
20// Copyright (C) 2007 Carlos Garcia Campos <carlosgc@gnome.org>
21// Copyright (C) 2009, 2010 Ilya Gorenbein <igorenbein@finjan.com>
22// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
23// Copyright (C) 2012, 2013, 2016 Thomas Freitag <Thomas.Freitag@kabelmail.de>
24// Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
25// Copyright (C) 2013, 2014, 2017, 2019 Adrian Johnson <ajohnson@redneon.com>
26// Copyright (C) 2013 Pino Toscano <pino@kde.org>
27// Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
28// Copyright (C) 2018, 2019 Adam Reichold <adam.reichold@t-online.de>
29// Copyright (C) 2018 Tobias Deiminger <haxtibal@posteo.de>
30// Copyright (C) 2019 LE GARREC Vincent <legarrec.vincent@gmail.com>
31// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden
32// Copyright (C) 2010 William Bader <william@newspapersystems.com>
33// Copyright (C) 2021 Mahmoud Khalil <mahmoudkhalil11@gmail.com>
34// Copyright (C) 2021 Georgiy Sgibnev <georgiy@sgibnev.com>. Work sponsored by lab50.net.
35// Copyright (C) 2023 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
36// Copyright (C) 2023 Ilaï Deutel <idtl@google.com>
37// Copyright (C) 2023 Even Rouault <even.rouault@spatialys.com>
38// Copyright (C) 2024 Nelson Benítez León <nbenitezl@gmail.com>
39//
40// To see a description of the changes please see the Changelog file that
41// came with your tarball or type make ChangeLog if you are building from git
42//
43//========================================================================
44
45#include <config.h>
46#include "poppler-config.h"
47
48#include <cstdlib>
49#include <cstddef>
50#include <cstring>
51#include <cmath>
52#include <cctype>
53#include <climits>
54#include <cfloat>
55#include <limits>
56#include "goo/gfile.h"
57#include "goo/gmem.h"
58#include "Object.h"
59#include "Stream.h"
60#include "Lexer.h"
61#include "Parser.h"
62#include "Dict.h"
63#include "Error.h"
64#include "ErrorCodes.h"
65#include "XRef.h"
66
67//------------------------------------------------------------------------
68// Permission bits
69// Note that the PDF spec uses 1 base (eg bit 3 is 1<<2)
70//------------------------------------------------------------------------
71
72#define permPrint (1 << 2) // bit 3
73#define permChange (1 << 3) // bit 4
74#define permCopy (1 << 4) // bit 5
75#define permNotes (1 << 5) // bit 6
76#define permFillForm (1 << 8) // bit 9
77#define permAccessibility (1 << 9) // bit 10
78#define permAssemble (1 << 10) // bit 11
79#define permHighResPrint (1 << 11) // bit 12
80#define defPermFlags 0xfffc
81
82//------------------------------------------------------------------------
83// ObjectStream
84//------------------------------------------------------------------------
85
86class ObjectStream
87{
88public:
89 // Create an object stream, using object number <objStrNum>,
90 // generation 0.
91 ObjectStream(XRef *xref, int objStrNumA, int recursion = 0);
92
93 bool isOk() { return ok; }
94
95 ~ObjectStream();
96
97 ObjectStream(const ObjectStream &) = delete;
98 ObjectStream &operator=(const ObjectStream &) = delete;
99
100 // Return the object number of this object stream.
101 int getObjStrNum() { return objStrNum; }
102
103 // Get the <objIdx>th object from this stream, which should be
104 // object number <objNum>, generation 0.
105 Object getObject(int objIdx, int objNum);
106
107private:
108 int objStrNum; // object number of the object stream
109 int nObjects; // number of objects in the stream
110 Object *objs; // the objects (length = nObjects)
111 int *objNums; // the object numbers (length = nObjects)
112 bool ok;
113};
114
115ObjectStream::ObjectStream(XRef *xref, int objStrNumA, int recursion)
116{
117 Stream *str;
118 Parser *parser;
119 Goffset *offsets;
120 Object objStr, obj1;
121 Goffset first;
122 int i;
123
124 objStrNum = objStrNumA;
125 nObjects = 0;
126 objs = nullptr;
127 objNums = nullptr;
128 ok = false;
129
130 objStr = xref->fetch(num: objStrNum, gen: 0, recursion);
131 if (!objStr.isStream()) {
132 return;
133 }
134
135 obj1 = objStr.streamGetDict()->lookup(key: "N", recursion);
136 if (!obj1.isInt()) {
137 return;
138 }
139 nObjects = obj1.getInt();
140 if (nObjects <= 0) {
141 return;
142 }
143
144 obj1 = objStr.streamGetDict()->lookup(key: "First", recursion);
145 if (!obj1.isInt() && !obj1.isInt64()) {
146 return;
147 }
148 if (obj1.isInt()) {
149 first = obj1.getInt();
150 } else {
151 first = obj1.getInt64();
152 }
153 if (first < 0) {
154 return;
155 }
156
157 // this is an arbitrary limit to avoid integer overflow problems
158 // in the 'new Object[nObjects]' call (Acrobat apparently limits
159 // object streams to 100-200 objects)
160 if (nObjects > 1000000) {
161 error(category: errSyntaxError, pos: -1, msg: "Too many objects in an object stream");
162 return;
163 }
164 objs = new Object[nObjects];
165 objNums = (int *)gmallocn(count: nObjects, size: sizeof(int));
166 offsets = (Goffset *)gmallocn(count: nObjects, size: sizeof(Goffset));
167
168 // parse the header: object numbers and offsets
169 objStr.streamReset();
170 str = new EmbedStream(objStr.getStream(), Object(objNull), true, first);
171 parser = new Parser(xref, str, false);
172 for (i = 0; i < nObjects; ++i) {
173 obj1 = parser->getObj();
174 Object obj2 = parser->getObj();
175 if (!obj1.isInt() || !(obj2.isInt() || obj2.isInt64())) {
176 delete parser;
177 gfree(p: offsets);
178 return;
179 }
180 objNums[i] = obj1.getInt();
181 if (obj2.isInt()) {
182 offsets[i] = obj2.getInt();
183 } else {
184 offsets[i] = obj2.getInt64();
185 }
186 if (objNums[i] < 0 || offsets[i] < 0 || (i > 0 && offsets[i] < offsets[i - 1])) {
187 delete parser;
188 gfree(p: offsets);
189 return;
190 }
191 }
192 while (str->getChar() != EOF) {
193 ;
194 }
195 delete parser;
196
197 // skip to the first object - this shouldn't be necessary because
198 // the First key is supposed to be equal to offsets[0], but just in
199 // case...
200 for (Goffset pos = first; pos < offsets[0]; ++pos) {
201 objStr.getStream()->getChar();
202 }
203
204 // parse the objects
205 for (i = 0; i < nObjects; ++i) {
206 if (i == nObjects - 1) {
207 str = new EmbedStream(objStr.getStream(), Object(objNull), false, 0);
208 } else {
209 str = new EmbedStream(objStr.getStream(), Object(objNull), true, offsets[i + 1] - offsets[i]);
210 }
211 parser = new Parser(xref, str, false);
212 objs[i] = parser->getObj();
213 while (str->getChar() != EOF) {
214 ;
215 }
216 delete parser;
217 }
218
219 gfree(p: offsets);
220 ok = true;
221}
222
223ObjectStream::~ObjectStream()
224{
225 delete[] objs;
226 gfree(p: objNums);
227}
228
229Object ObjectStream::getObject(int objIdx, int objNum)
230{
231 if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) {
232 return Object(objNull);
233 }
234 return objs[objIdx].copy();
235}
236
237//------------------------------------------------------------------------
238// XRef
239//------------------------------------------------------------------------
240
241#define xrefLocker() const std::scoped_lock locker(mutex)
242
243XRef::XRef() : objStrs { 5 }
244{
245 ok = true;
246 errCode = errNone;
247 entries = nullptr;
248 capacity = 0;
249 size = 0;
250 modified = false;
251 streamEnds = nullptr;
252 streamEndsLen = 0;
253 mainXRefEntriesOffset = 0;
254 xRefStream = false;
255 scannedSpecialFlags = false;
256 encrypted = false;
257 permFlags = defPermFlags;
258 ownerPasswordOk = false;
259 rootNum = -1;
260 strOwner = false;
261 xrefReconstructed = false;
262 encAlgorithm = cryptNone;
263 keyLength = 0;
264}
265
266XRef::XRef(const Object *trailerDictA) : XRef {}
267{
268 if (trailerDictA->isDict()) {
269 trailerDict = trailerDictA->copy();
270 }
271}
272
273XRef::XRef(BaseStream *strA, Goffset pos, Goffset mainXRefEntriesOffsetA, bool *wasReconstructed, bool reconstruct, const std::function<void()> &xrefReconstructedCallback) : XRef {}
274{
275 Object obj;
276
277 mainXRefEntriesOffset = mainXRefEntriesOffsetA;
278
279 xrefReconstructedCb = xrefReconstructedCallback;
280
281 // read the trailer
282 str = strA;
283 start = str->getStart();
284 prevXRefOffset = mainXRefOffset = pos;
285
286 if (reconstruct && !(ok = constructXRef(wasReconstructed))) {
287 errCode = errDamaged;
288 return;
289 } else {
290 // if there was a problem with the 'startxref' position, try to
291 // reconstruct the xref table
292 if (prevXRefOffset == 0) {
293 if (!(ok = constructXRef(wasReconstructed))) {
294 errCode = errDamaged;
295 return;
296 }
297
298 // read the xref table
299 } else {
300 std::vector<Goffset> followedXRefStm;
301 readXRef(pos: &prevXRefOffset, followedXRefStm: &followedXRefStm, xrefStreamObjsNum: nullptr);
302
303 // if there was a problem with the xref table,
304 // try to reconstruct it
305 if (!ok) {
306 if (!(ok = constructXRef(wasReconstructed))) {
307 errCode = errDamaged;
308 return;
309 }
310 }
311 }
312
313 // set size to (at least) the size specified in trailer dict
314 obj = trailerDict.dictLookupNF(key: "Size").copy();
315 if (!obj.isInt()) {
316 error(category: errSyntaxWarning, pos: -1, msg: "No valid XRef size in trailer");
317 } else {
318 if (obj.getInt() > size) {
319 if (resize(newSize: obj.getInt()) != obj.getInt()) {
320 if (!(ok = constructXRef(wasReconstructed))) {
321 errCode = errDamaged;
322 return;
323 }
324 }
325 }
326 }
327
328 // get the root dictionary (catalog) object
329 obj = trailerDict.dictLookupNF(key: "Root").copy();
330 if (obj.isRef()) {
331 rootNum = obj.getRefNum();
332 rootGen = obj.getRefGen();
333 } else {
334 if (!(ok = constructXRef(wasReconstructed))) {
335 errCode = errDamaged;
336 return;
337 }
338 }
339 }
340 // now set the trailer dictionary's xref pointer so we can fetch
341 // indirect objects from it
342 trailerDict.getDict()->setXRef(this);
343}
344
345XRef::~XRef()
346{
347 for (int i = 0; i < size; i++) {
348 if (entries[i].type == xrefEntryFree) {
349 continue;
350 }
351
352 entries[i].obj.~Object();
353 }
354 gfree(p: entries);
355
356 if (streamEnds) {
357 gfree(p: streamEnds);
358 }
359 if (strOwner) {
360 delete str;
361 }
362}
363
364XRef *XRef::copy() const
365{
366 XRef *xref = new XRef();
367 xref->str = str->copy();
368 xref->strOwner = true;
369 xref->encrypted = encrypted;
370 xref->permFlags = permFlags;
371 xref->ownerPasswordOk = ownerPasswordOk;
372 xref->rootGen = rootGen;
373 xref->rootNum = rootNum;
374
375 xref->start = start;
376 xref->prevXRefOffset = prevXRefOffset;
377 xref->mainXRefEntriesOffset = mainXRefEntriesOffset;
378 xref->xRefStream = xRefStream;
379 xref->trailerDict = trailerDict.copy();
380 xref->encAlgorithm = encAlgorithm;
381 xref->encRevision = encRevision;
382 xref->encVersion = encVersion;
383 xref->permFlags = permFlags;
384 xref->keyLength = keyLength;
385 xref->permFlags = permFlags;
386 for (int i = 0; i < 32; i++) {
387 xref->fileKey[i] = fileKey[i];
388 }
389
390 if (xref->reserve(newSize: size) == 0) {
391 error(category: errSyntaxError, pos: -1, msg: "unable to allocate {0:d} entries", size);
392 delete xref;
393 return nullptr;
394 }
395 xref->size = size;
396 for (int i = 0; i < size; ++i) {
397 xref->entries[i].offset = entries[i].offset;
398 xref->entries[i].type = entries[i].type;
399 // set the object to null, it will be fetched from the stream when needed
400 new (&xref->entries[i].obj) Object(objNull);
401 xref->entries[i].flags = entries[i].flags;
402 xref->entries[i].gen = entries[i].gen;
403
404 // If entry has been changed from the stream value we need to copy it
405 // otherwise it's lost
406 if (entries[i].getFlag(flag: XRefEntry::Updated)) {
407 xref->entries[i].obj = entries[i].obj.copy();
408 }
409 }
410 xref->streamEndsLen = streamEndsLen;
411 if (streamEndsLen != 0) {
412 xref->streamEnds = (Goffset *)gmalloc(size: streamEndsLen * sizeof(Goffset));
413 for (int i = 0; i < streamEndsLen; i++) {
414 xref->streamEnds[i] = streamEnds[i];
415 }
416 }
417 return xref;
418}
419
420int XRef::reserve(int newSize)
421{
422 if (newSize > capacity) {
423 int newCapacity = 1024;
424 if (capacity) {
425 if (capacity <= INT_MAX / 2) {
426 newCapacity = capacity * 2;
427 } else {
428 newCapacity = newSize;
429 }
430 }
431 while (newSize > newCapacity) {
432 if (newCapacity > INT_MAX / 2) {
433 std::fputs(s: "Too large XRef size\n", stderr);
434 return 0;
435 }
436 newCapacity *= 2;
437 }
438 if (newCapacity >= INT_MAX / (int)sizeof(XRefEntry)) {
439 std::fputs(s: "Too large XRef size\n", stderr);
440 return 0;
441 }
442
443 void *p = grealloc(p: entries, size: newCapacity * sizeof(XRefEntry),
444 /* checkoverflow=*/true);
445 if (p == nullptr) {
446 return 0;
447 }
448
449 entries = (XRefEntry *)p;
450 capacity = newCapacity;
451 }
452
453 return capacity;
454}
455
456int XRef::resize(int newSize)
457{
458 if (newSize > size) {
459
460 if (reserve(newSize) < newSize) {
461 return size;
462 }
463
464 for (int i = size; i < newSize; ++i) {
465 entries[i].offset = -1;
466 entries[i].type = xrefEntryNone;
467 new (&entries[i].obj) Object(objNull);
468 entries[i].flags = 0;
469 entries[i].gen = 0;
470 }
471 } else {
472 for (int i = newSize; i < size; i++) {
473 entries[i].obj.~Object();
474 }
475 }
476
477 size = newSize;
478
479 return size;
480}
481
482/* Read one xref table section. Also reads the associated trailer
483 * dictionary, and returns the prev pointer (if any).
484 * Arguments:
485 * pos Points to a Goffset containing the offset of the XRef
486 * section to be read. If a prev pointer is found, *pos is
487 * updated with its value
488 * followedXRefStm Used in case of nested readXRef calls to spot circular
489 * references in XRefStm pointers
490 * xrefStreamObjsNum If not NULL, every time a XRef stream is encountered,
491 * its object number is appended
492 * Return value:
493 * true if a prev pointer is found, otherwise false
494 */
495bool XRef::readXRef(Goffset *pos, std::vector<Goffset> *followedXRefStm, std::vector<int> *xrefStreamObjsNum)
496{
497 Parser *parser;
498 Object obj;
499 bool more;
500
501 Goffset parsePos;
502 if (unlikely(checkedAdd(start, *pos, &parsePos))) {
503 ok = false;
504 return false;
505 }
506 if (parsePos < 0) {
507 ok = false;
508 return false;
509 }
510
511 // start up a parser, parse one token
512 parser = new Parser(nullptr, str->makeSubStream(start: parsePos, limited: false, length: 0, dict: Object(objNull)), true);
513 obj = parser->getObj(simpleOnly: true);
514
515 // parse an old-style xref table
516 if (obj.isCmd(cmdA: "xref")) {
517 more = readXRefTable(parser, pos, followedXRefStm, xrefStreamObjsNum);
518
519 // parse an xref stream
520 } else if (obj.isInt()) {
521 const int objNum = obj.getInt();
522 if (obj = parser->getObj(simpleOnly: true), !obj.isInt()) {
523 goto err1;
524 }
525 if (obj = parser->getObj(simpleOnly: true), !obj.isCmd(cmdA: "obj")) {
526 goto err1;
527 }
528 if (obj = parser->getObj(), !obj.isStream()) {
529 goto err1;
530 }
531 if (trailerDict.isNone()) {
532 xRefStream = true;
533 }
534 if (xrefStreamObjsNum) {
535 xrefStreamObjsNum->push_back(x: objNum);
536 }
537 more = readXRefStream(xrefStr: obj.getStream(), pos);
538
539 } else {
540 goto err1;
541 }
542
543 delete parser;
544 return more;
545
546err1:
547 delete parser;
548 ok = false;
549 return false;
550}
551
552bool XRef::readXRefTable(Parser *parser, Goffset *pos, std::vector<Goffset> *followedXRefStm, std::vector<int> *xrefStreamObjsNum)
553{
554 XRefEntry entry;
555 bool more;
556 Object obj, obj2;
557 Goffset pos2;
558 int first, n;
559
560 while (true) {
561 obj = parser->getObj(simpleOnly: true);
562 if (obj.isCmd(cmdA: "trailer")) {
563 break;
564 }
565 if (!obj.isInt()) {
566 goto err0;
567 }
568 first = obj.getInt();
569 obj = parser->getObj(simpleOnly: true);
570 if (!obj.isInt()) {
571 goto err0;
572 }
573 n = obj.getInt();
574 if (first < 0 || n < 0 || first > INT_MAX - n) {
575 goto err0;
576 }
577 if (first + n > size) {
578 if (resize(newSize: first + n) != first + n) {
579 error(category: errSyntaxError, pos: -1, msg: "Invalid 'obj' parameters'");
580 goto err0;
581 }
582 }
583 for (int i = first; i < first + n; ++i) {
584 obj = parser->getObj(simpleOnly: true);
585 if (obj.isInt()) {
586 entry.offset = obj.getInt();
587 } else if (obj.isInt64()) {
588 entry.offset = obj.getInt64();
589 } else {
590 goto err0;
591 }
592 obj = parser->getObj(simpleOnly: true);
593 if (!obj.isInt()) {
594 goto err0;
595 }
596 entry.gen = obj.getInt();
597 entry.flags = 0;
598 obj = parser->getObj(simpleOnly: true);
599 if (obj.isCmd(cmdA: "n")) {
600 entry.type = xrefEntryUncompressed;
601 } else if (obj.isCmd(cmdA: "f")) {
602 entry.type = xrefEntryFree;
603 } else {
604 goto err0;
605 }
606 if (entries[i].offset == -1) {
607 entries[i].offset = entry.offset;
608 entries[i].gen = entry.gen;
609 entries[i].type = entry.type;
610 entries[i].flags = entry.flags;
611 entries[i].obj.setToNull();
612
613 // PDF files of patents from the IBM Intellectual Property
614 // Network have a bug: the xref table claims to start at 1
615 // instead of 0.
616 if (i == 1 && first == 1 && entries[1].offset == 0 && entries[1].gen == 65535 && entries[1].type == xrefEntryFree) {
617 i = first = 0;
618 entries[0].offset = 0;
619 entries[0].gen = 65535;
620 entries[0].type = xrefEntryFree;
621 entries[0].flags = entries[1].flags;
622 entries[0].obj = std::move(entries[1].obj);
623
624 entries[1].offset = -1;
625 entries[1].obj.setToNull();
626 }
627 }
628 }
629 }
630
631 // read the trailer dictionary
632 obj = parser->getObj();
633 if (!obj.isDict()) {
634 goto err0;
635 }
636
637 // get the 'Prev' pointer
638 obj2 = obj.getDict()->lookupNF(key: "Prev").copy();
639 if (obj2.isInt() || obj2.isInt64()) {
640 if (obj2.isInt()) {
641 pos2 = obj2.getInt();
642 } else {
643 pos2 = obj2.getInt64();
644 }
645 if (pos2 != *pos) {
646 *pos = pos2;
647 more = true;
648 } else {
649 error(category: errSyntaxWarning, pos: -1, msg: "Infinite loop in xref table");
650 more = false;
651 }
652 } else if (obj2.isRef()) {
653 // certain buggy PDF generators generate "/Prev NNN 0 R" instead
654 // of "/Prev NNN"
655 pos2 = (unsigned int)obj2.getRefNum();
656 if (pos2 != *pos) {
657 *pos = pos2;
658 more = true;
659 } else {
660 error(category: errSyntaxWarning, pos: -1, msg: "Infinite loop in xref table");
661 more = false;
662 }
663 } else {
664 more = false;
665 }
666
667 // save the first trailer dictionary
668 if (trailerDict.isNone()) {
669 trailerDict = obj.copy();
670 }
671
672 // check for an 'XRefStm' key
673 obj2 = obj.getDict()->lookup(key: "XRefStm");
674 if (obj2.isInt() || obj2.isInt64()) {
675 if (obj2.isInt()) {
676 pos2 = obj2.getInt();
677 } else {
678 pos2 = obj2.getInt64();
679 }
680 for (size_t i = 0; ok == true && i < followedXRefStm->size(); ++i) {
681 if (followedXRefStm->at(n: i) == pos2) {
682 ok = false;
683 }
684 }
685 // Arbitrary limit because otherwise we exhaust the stack
686 // calling readXRef + readXRefTable
687 if (followedXRefStm->size() > 4096) {
688 error(category: errSyntaxError, pos: -1, msg: "File has more than 4096 XRefStm, aborting");
689 ok = false;
690 }
691 if (ok) {
692 followedXRefStm->push_back(x: pos2);
693 readXRef(pos: &pos2, followedXRefStm, xrefStreamObjsNum);
694 }
695 if (!ok) {
696 goto err0;
697 }
698 }
699
700 return more;
701
702err0:
703 ok = false;
704 return false;
705}
706
707bool XRef::readXRefStream(Stream *xrefStr, Goffset *pos)
708{
709 int w[3];
710 bool more;
711 Object obj;
712
713 ok = false;
714
715 Dict *dict = xrefStr->getDict();
716 obj = dict->lookupNF(key: "Size").copy();
717 if (!obj.isInt()) {
718 return false;
719 }
720 int newSize = obj.getInt();
721 if (newSize < 0) {
722 return false;
723 }
724 if (newSize > size) {
725 if (resize(newSize) != newSize) {
726 error(category: errSyntaxError, pos: -1, msg: "Invalid 'size' parameter");
727 return false;
728 }
729 }
730
731 obj = dict->lookupNF(key: "W").copy();
732 if (!obj.isArray() || obj.arrayGetLength() < 3) {
733 return false;
734 }
735 for (int i = 0; i < 3; ++i) {
736 Object obj2 = obj.arrayGet(i);
737 if (!obj2.isInt()) {
738 return false;
739 }
740 w[i] = obj2.getInt();
741 if (w[i] < 0) {
742 return false;
743 }
744 }
745 if (w[0] > (int)sizeof(int) || w[1] > (int)sizeof(long long) || w[2] > (int)sizeof(long long)) {
746 return false;
747 }
748
749 xrefStr->reset();
750 const Object &idx = dict->lookupNF(key: "Index");
751 if (idx.isArray()) {
752 for (int i = 0; i + 1 < idx.arrayGetLength(); i += 2) {
753 obj = idx.arrayGet(i);
754 if (!obj.isInt()) {
755 return false;
756 }
757 int first = obj.getInt();
758 obj = idx.arrayGet(i: i + 1);
759 if (!obj.isInt()) {
760 return false;
761 }
762 int n = obj.getInt();
763 if (first < 0 || n < 0 || !readXRefStreamSection(xrefStr, w, first, n)) {
764 return false;
765 }
766 }
767 } else {
768 if (!readXRefStreamSection(xrefStr, w, first: 0, n: newSize)) {
769 return false;
770 }
771 }
772
773 obj = dict->lookupNF(key: "Prev").copy();
774 if (obj.isInt() && obj.getInt() >= 0) {
775 *pos = obj.getInt();
776 more = true;
777 } else if (obj.isInt64() && obj.getInt64() >= 0) {
778 *pos = obj.getInt64();
779 more = true;
780 } else {
781 more = false;
782 }
783 if (trailerDict.isNone()) {
784 trailerDict = xrefStr->getDictObject()->copy();
785 }
786
787 ok = true;
788 return more;
789}
790
791bool XRef::readXRefStreamSection(Stream *xrefStr, const int *w, int first, int n)
792{
793 unsigned long long offset, gen;
794 int type, c, i, j;
795
796 if (first > INT_MAX - n) {
797 return false;
798 }
799 if (first + n < 0) {
800 return false;
801 }
802 if (first + n > size) {
803 if (resize(newSize: first + n) != size) {
804 error(category: errSyntaxError, pos: -1, msg: "Invalid 'size' inside xref table");
805 return false;
806 }
807 if (first + n > size) {
808 error(category: errSyntaxError, pos: -1, msg: "Invalid 'first' or 'n' inside xref table");
809 return false;
810 }
811 }
812 for (i = first; i < first + n; ++i) {
813 if (w[0] == 0) {
814 type = 1;
815 } else {
816 for (type = 0, j = 0; j < w[0]; ++j) {
817 if ((c = xrefStr->getChar()) == EOF) {
818 return false;
819 }
820 type = (type << 8) + c;
821 }
822 }
823 for (offset = 0, j = 0; j < w[1]; ++j) {
824 if ((c = xrefStr->getChar()) == EOF) {
825 return false;
826 }
827 offset = (offset << 8) + c;
828 }
829 if (offset > (unsigned long long)GoffsetMax()) {
830 error(category: errSyntaxError, pos: -1, msg: "Offset inside xref table too large for fseek");
831 return false;
832 }
833 for (gen = 0, j = 0; j < w[2]; ++j) {
834 if ((c = xrefStr->getChar()) == EOF) {
835 return false;
836 }
837 gen = (gen << 8) + c;
838 }
839 if (gen > INT_MAX) {
840 if (i == 0 && gen == std::numeric_limits<uint32_t>::max()) {
841 // workaround broken generators
842 gen = 65535;
843 } else {
844 error(category: errSyntaxError, pos: -1, msg: "Gen inside xref table too large (bigger than INT_MAX)");
845 return false;
846 }
847 }
848 if (entries[i].offset == -1) {
849 switch (type) {
850 case 0:
851 entries[i].offset = offset;
852 entries[i].gen = static_cast<int>(gen);
853 entries[i].type = xrefEntryFree;
854 break;
855 case 1:
856 entries[i].offset = offset;
857 entries[i].gen = static_cast<int>(gen);
858 entries[i].type = xrefEntryUncompressed;
859 break;
860 case 2:
861 entries[i].offset = offset;
862 entries[i].gen = static_cast<int>(gen);
863 entries[i].type = xrefEntryCompressed;
864 break;
865 default:
866 return false;
867 }
868 }
869 }
870
871 return true;
872}
873
874// Attempt to construct an xref table for a damaged file.
875// Warning: Reconstruction of files where last XRef section is a stream
876// or where some objects are defined inside an object stream is not yet supported.
877// Existing data in XRef::entries may get corrupted if applied anyway.
878bool XRef::constructXRef(bool *wasReconstructed, bool needCatalogDict)
879{
880 Parser *parser;
881 char buf[256];
882 Goffset pos;
883 int num, gen;
884 int streamEndsSize;
885 char *p;
886 bool gotRoot;
887 char *token = nullptr;
888 bool oneCycle = true;
889 Goffset offset = 0;
890
891 resize(newSize: 0); // free entries properly
892 gfree(p: entries);
893 capacity = 0;
894 size = 0;
895 entries = nullptr;
896
897 gotRoot = false;
898 streamEndsLen = streamEndsSize = 0;
899
900 if (wasReconstructed) {
901 *wasReconstructed = true;
902 }
903
904 if (xrefReconstructedCb) {
905 xrefReconstructedCb();
906 }
907
908 str->reset();
909 while (true) {
910 pos = str->getPos();
911 if (!str->getLine(buf, size: 256)) {
912 break;
913 }
914 p = buf;
915
916 // skip whitespace
917 while (*p && Lexer::isSpace(c: *p & 0xff)) {
918 ++p;
919 }
920
921 oneCycle = true;
922 offset = 0;
923
924 while ((token = strstr(haystack: p, needle: "endobj")) || oneCycle) {
925 oneCycle = false;
926
927 if (token) {
928 oneCycle = true;
929 token[0] = '\0';
930 offset = token - p;
931 }
932
933 // got trailer dictionary
934 if (!strncmp(s1: p, s2: "trailer", n: 7)) {
935 parser = new Parser(nullptr, str->makeSubStream(start: pos + 7, limited: false, length: 0, dict: Object(objNull)), false);
936 Object newTrailerDict = parser->getObj();
937 if (newTrailerDict.isDict()) {
938 const Object &obj = newTrailerDict.dictLookupNF(key: "Root");
939 if (obj.isRef() && (!gotRoot || !needCatalogDict)) {
940 rootNum = obj.getRefNum();
941 rootGen = obj.getRefGen();
942 trailerDict = newTrailerDict.copy();
943 gotRoot = true;
944 }
945 }
946 delete parser;
947
948 // look for object
949 } else if (isdigit(*p & 0xff)) {
950 num = atoi(nptr: p);
951 if (num > 0) {
952 do {
953 ++p;
954 } while (*p && isdigit(*p & 0xff));
955 if ((*p & 0xff) == 0 || isspace(*p & 0xff)) {
956 if ((*p & 0xff) == 0) {
957 // new line, continue with next line!
958 str->getLine(buf, size: 256);
959 p = buf;
960 } else {
961 ++p;
962 }
963 while (*p && isspace(*p & 0xff)) {
964 ++p;
965 }
966 if (isdigit(*p & 0xff)) {
967 gen = atoi(nptr: p);
968 do {
969 ++p;
970 } while (*p && isdigit(*p & 0xff));
971 if ((*p & 0xff) == 0 || isspace(*p & 0xff)) {
972 if ((*p & 0xff) == 0) {
973 // new line, continue with next line!
974 str->getLine(buf, size: 256);
975 p = buf;
976 } else {
977 ++p;
978 }
979 while (*p && isspace(*p & 0xff)) {
980 ++p;
981 }
982 if (!strncmp(s1: p, s2: "obj", n: 3)) {
983 if (num >= size) {
984 if (unlikely(num >= INT_MAX - 1 - 255)) {
985 error(category: errSyntaxError, pos: -1, msg: "Bad object number");
986 return false;
987 }
988 const int newSize = (num + 1 + 255) & ~255;
989 if (newSize < 0) {
990 error(category: errSyntaxError, pos: -1, msg: "Bad object number");
991 return false;
992 }
993 if (resize(newSize) != newSize) {
994 error(category: errSyntaxError, pos: -1, msg: "Invalid 'obj' parameters");
995 return false;
996 }
997 }
998 if (entries[num].type == xrefEntryFree || gen >= entries[num].gen) {
999 entries[num].offset = pos - start;
1000 entries[num].gen = gen;
1001 entries[num].type = xrefEntryUncompressed;
1002 }
1003 }
1004 }
1005 }
1006 }
1007 }
1008
1009 } else {
1010 char *endstream = strstr(haystack: p, needle: "endstream");
1011 if (endstream) {
1012 intptr_t endstreamPos = endstream - p;
1013 if ((endstreamPos == 0 || Lexer::isSpace(c: p[endstreamPos - 1] & 0xff)) // endstream is either at beginning or preceeded by space
1014 && (endstreamPos + 9 >= 256 || Lexer::isSpace(c: p[endstreamPos + 9] & 0xff))) // endstream is either at end or followed by space
1015 {
1016 if (streamEndsLen == streamEndsSize) {
1017 streamEndsSize += 64;
1018 if (streamEndsSize >= INT_MAX / (int)sizeof(int)) {
1019 error(category: errSyntaxError, pos: -1, msg: "Invalid 'endstream' parameter.");
1020 return false;
1021 }
1022 streamEnds = (Goffset *)greallocn(p: streamEnds, count: streamEndsSize, size: sizeof(Goffset));
1023 }
1024 streamEnds[streamEndsLen++] = pos + endstreamPos;
1025 }
1026 }
1027 }
1028 if (token) {
1029 p = token + 6; // strlen( "endobj" ) = 6
1030 pos += offset + 6; // strlen( "endobj" ) = 6
1031 while (*p && Lexer::isSpace(c: *p & 0xff)) {
1032 ++p;
1033 ++pos;
1034 }
1035 }
1036 }
1037 }
1038
1039 if (gotRoot) {
1040 return true;
1041 }
1042
1043 error(category: errSyntaxError, pos: -1, msg: "Couldn't find trailer dictionary");
1044 return false;
1045}
1046
1047void XRef::setEncryption(int permFlagsA, bool ownerPasswordOkA, const unsigned char *fileKeyA, int keyLengthA, int encVersionA, int encRevisionA, CryptAlgorithm encAlgorithmA)
1048{
1049 int i;
1050
1051 encrypted = true;
1052 permFlags = permFlagsA;
1053 ownerPasswordOk = ownerPasswordOkA;
1054 if (keyLengthA <= 32) {
1055 keyLength = keyLengthA;
1056 } else {
1057 keyLength = 32;
1058 }
1059 for (i = 0; i < keyLength; ++i) {
1060 fileKey[i] = fileKeyA[i];
1061 }
1062 encVersion = encVersionA;
1063 encRevision = encRevisionA;
1064 encAlgorithm = encAlgorithmA;
1065}
1066
1067void XRef::getEncryptionParameters(unsigned char **fileKeyA, CryptAlgorithm *encAlgorithmA, int *keyLengthA)
1068{
1069 if (encrypted) {
1070 *fileKeyA = fileKey;
1071 *encAlgorithmA = encAlgorithm;
1072 *keyLengthA = keyLength;
1073 } else {
1074 // null encryption parameters
1075 *fileKeyA = nullptr;
1076 *encAlgorithmA = cryptRC4;
1077 *keyLengthA = 0;
1078 }
1079}
1080
1081bool XRef::isRefEncrypted(Ref r)
1082{
1083 xrefLocker();
1084
1085 const XRefEntry *e = getEntry(i: r.num);
1086 if (!e->obj.isNull()) { // check for updated object
1087 return false;
1088 }
1089
1090 switch (e->type) {
1091 case xrefEntryUncompressed: {
1092 return encrypted && !e->getFlag(flag: XRefEntry::Unencrypted);
1093 }
1094
1095 case xrefEntryCompressed: {
1096 const Goffset objStrNum = e->offset;
1097 if (unlikely(objStrNum < 0 || objStrNum >= size)) {
1098 error(category: errSyntaxError, pos: -1, msg: "XRef::isRefEncrypted - Compressed object offset out of xref bounds");
1099 return false;
1100 }
1101 const Object objStr = fetch(num: static_cast<int>(e->offset), gen: 0);
1102 return objStr.getStream()->isEncrypted();
1103 }
1104
1105 default: {
1106 }
1107 }
1108
1109 return false;
1110}
1111
1112bool XRef::okToPrint(bool ignoreOwnerPW) const
1113{
1114 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint);
1115}
1116
1117// we can print at high res if we are only doing security handler revision
1118// 2 (and we are allowed to print at all), or with security handler rev
1119// 3 and we are allowed to print, and bit 12 is set.
1120bool XRef::okToPrintHighRes(bool ignoreOwnerPW) const
1121{
1122 if (encrypted) {
1123 if (2 == encRevision) {
1124 return (okToPrint(ignoreOwnerPW));
1125 } else if (encRevision >= 3) {
1126 return (okToPrint(ignoreOwnerPW) && (permFlags & permHighResPrint));
1127 } else {
1128 // something weird - unknown security handler version
1129 return false;
1130 }
1131 } else {
1132 return true;
1133 }
1134}
1135
1136bool XRef::okToChange(bool ignoreOwnerPW) const
1137{
1138 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange);
1139}
1140
1141bool XRef::okToCopy(bool ignoreOwnerPW) const
1142{
1143 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy);
1144}
1145
1146bool XRef::okToAddNotes(bool ignoreOwnerPW) const
1147{
1148 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes);
1149}
1150
1151bool XRef::okToFillForm(bool ignoreOwnerPW) const
1152{
1153 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permFillForm);
1154}
1155
1156bool XRef::okToAccessibility(bool ignoreOwnerPW) const
1157{
1158 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permAccessibility);
1159}
1160
1161bool XRef::okToAssemble(bool ignoreOwnerPW) const
1162{
1163 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permAssemble);
1164}
1165
1166Object XRef::getCatalog()
1167{
1168 Object catalog = fetch(num: rootNum, gen: rootGen);
1169 if (catalog.isDict()) {
1170 return catalog;
1171 }
1172 bool wasReconstructed = false;
1173 if (constructXRef(wasReconstructed: &wasReconstructed, needCatalogDict: true)) {
1174 catalog = fetch(num: rootNum, gen: rootGen);
1175 }
1176 return catalog;
1177}
1178
1179Object XRef::fetch(const Ref ref, int recursion)
1180{
1181 return fetch(num: ref.num, gen: ref.gen, recursion);
1182}
1183
1184Object XRef::fetch(int num, int gen, int recursion, Goffset *endPos)
1185{
1186 XRefEntry *e;
1187 Object obj1, obj2, obj3;
1188
1189 xrefLocker();
1190
1191 const Ref ref = { .num: num, .gen: gen };
1192
1193 if (!refsBeingFetched.insert(ref)) {
1194 return Object(objNull);
1195 }
1196
1197 // Will remove ref from refsBeingFetched once it's destroyed, i.e. the function returns
1198 RefRecursionCheckerRemover remover(refsBeingFetched, ref);
1199
1200 // check for bogus ref - this can happen in corrupted PDF files
1201 if (num < 0 || num >= size) {
1202 goto err;
1203 }
1204
1205 e = getEntry(i: num);
1206 if (!e->obj.isNull()) { // check for updated object
1207 return e->obj.copy();
1208 }
1209
1210 switch (e->type) {
1211
1212 case xrefEntryUncompressed: {
1213 if (e->gen != gen || e->offset < 0) {
1214 goto err;
1215 }
1216 Parser parser { this, str->makeSubStream(start: start + e->offset, limited: false, length: 0, dict: Object(objNull)), true };
1217 obj1 = parser.getObj(recursion);
1218 obj2 = parser.getObj(recursion);
1219 obj3 = parser.getObj(recursion);
1220 if (!obj1.isInt() || obj1.getInt() != num || !obj2.isInt() || obj2.getInt() != gen || !obj3.isCmd(cmdA: "obj")) {
1221 // some buggy pdf have obj1234 for ints that represent 1234
1222 // try to recover here
1223 if (obj1.isInt() && obj1.getInt() == num && obj2.isInt() && obj2.getInt() == gen && obj3.isCmd()) {
1224 const char *cmd = obj3.getCmd();
1225 if (strlen(s: cmd) > 3 && cmd[0] == 'o' && cmd[1] == 'b' && cmd[2] == 'j') {
1226 char *end_ptr;
1227 long longNumber = strtol(nptr: cmd + 3, endptr: &end_ptr, base: 0);
1228 if (longNumber <= INT_MAX && longNumber >= INT_MIN && *end_ptr == '\0') {
1229 int number = longNumber;
1230 error(category: errSyntaxWarning, pos: -1, msg: "Cmd was not obj but {0:s}, assuming the creator meant obj {1:d}", cmd, number);
1231 if (endPos) {
1232 *endPos = parser.getPos();
1233 }
1234 return Object(number);
1235 }
1236 }
1237 }
1238 goto err;
1239 }
1240 Object obj = parser.getObj(simpleOnly: false, fileKey: (encrypted && !e->getFlag(flag: XRefEntry::Unencrypted)) ? fileKey : nullptr, encAlgorithm, keyLength, objNum: num, objGen: gen, recursion);
1241 if (endPos) {
1242 *endPos = parser.getPos();
1243 }
1244 return obj;
1245 }
1246
1247 case xrefEntryCompressed: {
1248#if 0 // Adobe apparently ignores the generation number on compressed objects
1249 if (gen != 0) {
1250 goto err;
1251 }
1252#endif
1253 if (e->offset >= (unsigned int)size || (entries[e->offset].type != xrefEntryUncompressed && entries[e->offset].type != xrefEntryNone)) {
1254 error(category: errSyntaxError, pos: -1, msg: "Invalid object stream");
1255 goto err;
1256 }
1257
1258 ObjectStream *objStr = objStrs.lookup(key: e->offset);
1259 if (!objStr) {
1260 objStr = new ObjectStream(this, static_cast<int>(e->offset), recursion + 1);
1261 if (!objStr->isOk()) {
1262 delete objStr;
1263 objStr = nullptr;
1264 goto err;
1265 } else {
1266 // XRef could be reconstructed in constructor of ObjectStream:
1267 e = getEntry(i: num);
1268 objStrs.put(key: e->offset, item: objStr);
1269 }
1270 }
1271 if (endPos) {
1272 *endPos = -1;
1273 }
1274 return objStr->getObject(objIdx: e->gen, objNum: num);
1275 }
1276
1277 default:
1278 goto err;
1279 }
1280
1281err:
1282 if (!xRefStream && !xrefReconstructed) {
1283 // Check if there has been any updated object, if there has been we can't reconstruct because that would mean losing the changes
1284 bool xrefHasChanges = false;
1285 for (int i = 0; i < size; i++) {
1286 if (entries[i].getFlag(flag: XRefEntry::Updated)) {
1287 xrefHasChanges = true;
1288 break;
1289 }
1290 }
1291 if (xrefHasChanges) {
1292 error(category: errInternal, pos: -1, msg: "xref num {0:d} not found but needed, document has changes, reconstruct aborted\n", num);
1293 // pretend we constructed the xref, otherwise we will do this check again and again
1294 xrefReconstructed = true;
1295 return Object(objNull);
1296 }
1297
1298 error(category: errInternal, pos: -1, msg: "xref num {0:d} not found but needed, try to reconstruct\n", num);
1299 rootNum = -1;
1300 constructXRef(wasReconstructed: &xrefReconstructed);
1301 return fetch(num, gen, recursion: ++recursion, endPos);
1302 }
1303 if (endPos) {
1304 *endPos = -1;
1305 }
1306 return Object(objNull);
1307}
1308
1309void XRef::lock()
1310{
1311 mutex.lock();
1312}
1313
1314void XRef::unlock()
1315{
1316 mutex.unlock();
1317}
1318
1319Object XRef::getDocInfo()
1320{
1321 return trailerDict.dictLookup(key: "Info");
1322}
1323
1324// Added for the pdftex project.
1325Object XRef::getDocInfoNF()
1326{
1327 return trailerDict.dictLookupNF(key: "Info").copy();
1328}
1329
1330Object XRef::createDocInfoIfNeeded(Ref *ref)
1331{
1332 Object obj = trailerDict.getDict()->lookup(key: "Info", returnRef: ref);
1333 getDocInfo();
1334
1335 if (obj.isDict() && *ref != Ref::INVALID()) {
1336 // Info is valid if it's a dict and to pointed by an indirect reference
1337 return obj;
1338 }
1339
1340 removeDocInfo();
1341
1342 obj = Object(new Dict(this));
1343 *ref = addIndirectObject(o: obj);
1344 trailerDict.dictSet(key: "Info", val: Object(*ref));
1345
1346 return obj;
1347}
1348
1349void XRef::removeDocInfo()
1350{
1351 Object infoObjRef = getDocInfoNF();
1352 if (infoObjRef.isNull()) {
1353 return;
1354 }
1355
1356 trailerDict.dictRemove(key: "Info");
1357
1358 if (likely(infoObjRef.isRef())) {
1359 removeIndirectObject(r: infoObjRef.getRef());
1360 }
1361}
1362
1363bool XRef::getStreamEnd(Goffset streamStart, Goffset *streamEnd)
1364{
1365 int a, b, m;
1366
1367 if (streamEndsLen == 0 || streamStart > streamEnds[streamEndsLen - 1]) {
1368 return false;
1369 }
1370
1371 a = -1;
1372 b = streamEndsLen - 1;
1373 // invariant: streamEnds[a] < streamStart <= streamEnds[b]
1374 while (b - a > 1) {
1375 m = (a + b) / 2;
1376 if (streamStart <= streamEnds[m]) {
1377 b = m;
1378 } else {
1379 a = m;
1380 }
1381 }
1382 *streamEnd = streamEnds[b];
1383 return true;
1384}
1385
1386int XRef::getNumEntry(Goffset offset)
1387{
1388 if (size > 0) {
1389 int res = 0;
1390 Goffset resOffset = getEntry(i: 0)->offset;
1391 XRefEntry *e;
1392 for (int i = 1; i < size; ++i) {
1393 e = getEntry(i, complainIfMissing: false);
1394 if (e->type != xrefEntryFree && e->offset < offset && e->offset >= resOffset) {
1395 res = i;
1396 resOffset = e->offset;
1397 }
1398 }
1399 return res;
1400 } else {
1401 return -1;
1402 }
1403}
1404
1405void XRef::add(Ref ref, Goffset offs, bool used)
1406{
1407 add(num: ref.num, gen: ref.gen, offs, used);
1408}
1409
1410bool XRef::add(int num, int gen, Goffset offs, bool used)
1411{
1412 xrefLocker();
1413 if (num >= size) {
1414 if (num >= capacity) {
1415 entries = (XRefEntry *)greallocn_checkoverflow(p: entries, count: num + 1, size: sizeof(XRefEntry));
1416 if (unlikely(entries == nullptr)) {
1417 size = 0;
1418 capacity = 0;
1419 return false;
1420 }
1421
1422 capacity = num + 1;
1423 }
1424 for (int i = size; i < num + 1; ++i) {
1425 entries[i].offset = -1;
1426 entries[i].type = xrefEntryFree;
1427 new (&entries[i].obj) Object(objNull);
1428 entries[i].flags = 0;
1429 entries[i].gen = 0;
1430 }
1431 size = num + 1;
1432 }
1433 XRefEntry *e = getEntry(i: num);
1434 e->gen = gen;
1435 e->obj.setToNull();
1436 e->flags = 0;
1437 if (used) {
1438 e->type = xrefEntryUncompressed;
1439 e->offset = offs;
1440 } else {
1441 e->type = xrefEntryFree;
1442 e->offset = 0;
1443 }
1444 return true;
1445}
1446
1447void XRef::setModifiedObject(const Object *o, Ref r)
1448{
1449 xrefLocker();
1450 if (r.num < 0 || r.num >= size) {
1451 error(category: errInternal, pos: -1, msg: "XRef::setModifiedObject on unknown ref: {0:d}, {1:d}\n", r.num, r.gen);
1452 return;
1453 }
1454 XRefEntry *e = getEntry(i: r.num);
1455 if (unlikely(e->type == xrefEntryFree)) {
1456 error(category: errInternal, pos: -1, msg: "XRef::setModifiedObject on ref: {0:d}, {1:d} that is marked as free. This will cause a memory leak\n", r.num, r.gen);
1457 }
1458 e->obj = o->copy();
1459 e->setFlag(flag: XRefEntry::Updated, value: true);
1460 setModified();
1461}
1462
1463Ref XRef::addIndirectObject(const Object &o)
1464{
1465 int entryIndexToUse = -1;
1466 for (int i = 1; entryIndexToUse == -1 && i < size; ++i) {
1467 XRefEntry *e = getEntry(i, complainIfMissing: false /* complainIfMissing */);
1468 if (e->type == xrefEntryFree && e->gen < 65535) {
1469 entryIndexToUse = i;
1470 }
1471 }
1472
1473 XRefEntry *e;
1474 if (entryIndexToUse == -1) {
1475 entryIndexToUse = size;
1476 add(num: entryIndexToUse, gen: 0, offs: 0, used: false);
1477 e = getEntry(i: entryIndexToUse);
1478 } else {
1479 // reuse a free entry
1480 e = getEntry(i: entryIndexToUse);
1481 // we don't touch gen number, because it should have been
1482 // incremented when the object was deleted
1483 }
1484 e->type = xrefEntryUncompressed;
1485 e->obj = o.copy();
1486 e->setFlag(flag: XRefEntry::Updated, value: true);
1487 setModified();
1488
1489 Ref r;
1490 r.num = entryIndexToUse;
1491 r.gen = e->gen;
1492 return r;
1493}
1494
1495void XRef::removeIndirectObject(Ref r)
1496{
1497 xrefLocker();
1498 if (r.num < 0 || r.num >= size) {
1499 error(category: errInternal, pos: -1, msg: "XRef::removeIndirectObject on unknown ref: {0:d}, {1:d}\n", r.num, r.gen);
1500 return;
1501 }
1502 XRefEntry *e = getEntry(i: r.num);
1503 if (e->type == xrefEntryFree) {
1504 return;
1505 }
1506 e->obj.~Object();
1507 e->type = xrefEntryFree;
1508 if (likely(e->gen < 65535)) {
1509 e->gen++;
1510 }
1511 e->setFlag(flag: XRefEntry::Updated, value: true);
1512 setModified();
1513}
1514
1515Ref XRef::addStreamObject(Dict *dict, char *buffer, const Goffset bufferSize, StreamCompression compression)
1516{
1517 dict->add(key: "Length", val: Object((int)bufferSize));
1518 AutoFreeMemStream *stream = new AutoFreeMemStream(buffer, 0, bufferSize, Object(dict));
1519 stream->setFilterRemovalForbidden(true);
1520 switch (compression) {
1521 case StreamCompression::None:;
1522 break;
1523 case StreamCompression::Compress:
1524 stream->getDict()->add(key: "Filter", val: Object(objName, "FlateDecode"));
1525 break;
1526 }
1527 return addIndirectObject(o: Object((Stream *)stream));
1528}
1529
1530Ref XRef::addStreamObject(Dict *dict, uint8_t *buffer, const Goffset bufferSize, StreamCompression compression)
1531{
1532 return addStreamObject(dict, buffer: (char *)buffer, bufferSize, compression);
1533}
1534
1535void XRef::writeXRef(XRef::XRefWriter *writer, bool writeAllEntries)
1536{
1537 // create free entries linked-list
1538 if (getEntry(i: 0)->gen != 65535) {
1539 error(category: errInternal, pos: -1, msg: "XRef::writeXRef, entry 0 of the XRef is invalid (gen != 65535)\n");
1540 }
1541 int lastFreeEntry = 0;
1542 for (int i = 0; i < size; i++) {
1543 if (getEntry(i)->type == xrefEntryFree) {
1544 getEntry(i: lastFreeEntry)->offset = i;
1545 lastFreeEntry = i;
1546 }
1547 }
1548 getEntry(i: lastFreeEntry)->offset = 0;
1549
1550 if (writeAllEntries) {
1551 writer->startSection(first: 0, count: size);
1552 for (int i = 0; i < size; i++) {
1553 XRefEntry *e = getEntry(i);
1554 if (e->gen > 65535) {
1555 e->gen = 65535; // cap generation number to 65535 (required by PDFReference)
1556 }
1557 writer->writeEntry(offset: e->offset, gen: e->gen, type: e->type);
1558 }
1559 } else {
1560 int i = 0;
1561 while (i < size) {
1562 int j;
1563 for (j = i; j < size; j++) { // look for consecutive entries
1564 if ((getEntry(i: j)->type == xrefEntryFree) && (getEntry(i: j)->gen == 0)) {
1565 break;
1566 }
1567 }
1568 if (j - i != 0) {
1569 writer->startSection(first: i, count: j - i);
1570 for (int k = i; k < j; k++) {
1571 XRefEntry *e = getEntry(i: k);
1572 if (e->gen > 65535) {
1573 e->gen = 65535; // cap generation number to 65535 (required by PDFReference)
1574 }
1575 writer->writeEntry(offset: e->offset, gen: e->gen, type: e->type);
1576 }
1577 i = j;
1578 } else {
1579 ++i;
1580 }
1581 }
1582 }
1583}
1584
1585XRef::XRefTableWriter::XRefTableWriter(OutStream *outStrA)
1586{
1587 outStr = outStrA;
1588}
1589
1590void XRef::XRefTableWriter::startSection(int first, int count)
1591{
1592 outStr->printf(format: "%i %i\r\n", first, count);
1593}
1594
1595void XRef::XRefTableWriter::writeEntry(Goffset offset, int gen, XRefEntryType type)
1596{
1597 outStr->printf(format: "%010lli %05i %c\r\n", (long long)offset, gen, (type == xrefEntryFree) ? 'f' : 'n');
1598}
1599
1600void XRef::writeTableToFile(OutStream *outStr, bool writeAllEntries)
1601{
1602 XRefTableWriter writer(outStr);
1603 outStr->printf(format: "xref\r\n");
1604 writeXRef(writer: &writer, writeAllEntries);
1605}
1606
1607XRef::XRefStreamWriter::XRefStreamWriter(Array *indexA, GooString *stmBufA, int offsetSizeA)
1608{
1609 index = indexA;
1610 stmBuf = stmBufA;
1611 offsetSize = offsetSizeA;
1612}
1613
1614void XRef::XRefStreamWriter::startSection(int first, int count)
1615{
1616 index->add(elem: Object(first));
1617 index->add(elem: Object(count));
1618}
1619
1620void XRef::XRefStreamWriter::writeEntry(Goffset offset, int gen, XRefEntryType type)
1621{
1622 const int entryTotalSize = 1 + offsetSize + 2; /* type + offset + gen */
1623 char data[16];
1624 data[0] = (type == xrefEntryFree) ? 0 : 1;
1625 for (int i = offsetSize; i > 0; i--) {
1626 data[i] = offset & 0xff;
1627 offset >>= 8;
1628 }
1629 data[offsetSize + 1] = (gen >> 8) & 0xff;
1630 data[offsetSize + 2] = gen & 0xff;
1631 stmBuf->append(str: data, lengthA: entryTotalSize);
1632}
1633
1634XRef::XRefPreScanWriter::XRefPreScanWriter()
1635{
1636 hasOffsetsBeyond4GB = false;
1637}
1638
1639void XRef::XRefPreScanWriter::startSection(int first, int count) { }
1640
1641void XRef::XRefPreScanWriter::writeEntry(Goffset offset, int gen, XRefEntryType type)
1642{
1643 if (offset >= 0x100000000ll) {
1644 hasOffsetsBeyond4GB = true;
1645 }
1646}
1647
1648void XRef::writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref)
1649{
1650 Array *index = new Array(xref);
1651 stmBuf->clear();
1652
1653 // First pass: determine whether all offsets fit in 4 bytes or not
1654 XRefPreScanWriter prescan;
1655 writeXRef(writer: &prescan, writeAllEntries: false);
1656 const int offsetSize = prescan.hasOffsetsBeyond4GB ? sizeof(Goffset) : 4;
1657
1658 // Second pass: actually write the xref stream
1659 XRefStreamWriter writer(index, stmBuf, offsetSize);
1660 writeXRef(writer: &writer, writeAllEntries: false);
1661
1662 xrefDict->set(key: "Type", val: Object(objName, "XRef"));
1663 xrefDict->set(key: "Index", val: Object(index));
1664 Array *wArray = new Array(xref);
1665 wArray->add(elem: Object(1));
1666 wArray->add(elem: Object(offsetSize));
1667 wArray->add(elem: Object(2));
1668 xrefDict->set(key: "W", val: Object(wArray));
1669}
1670
1671bool XRef::parseEntry(Goffset offset, XRefEntry *entry)
1672{
1673 bool r;
1674
1675 if (unlikely(entry == nullptr)) {
1676 return false;
1677 }
1678
1679 Parser parser(nullptr, str->makeSubStream(start: offset, limited: false, length: 20, dict: Object(objNull)), true);
1680
1681 Object obj1, obj2, obj3;
1682 if (((obj1 = parser.getObj(), obj1.isInt()) || obj1.isInt64()) && (obj2 = parser.getObj(), obj2.isInt()) && (obj3 = parser.getObj(), obj3.isCmd(cmdA: "n") || obj3.isCmd(cmdA: "f"))) {
1683 if (obj1.isInt64()) {
1684 entry->offset = obj1.getInt64();
1685 } else {
1686 entry->offset = obj1.getInt();
1687 }
1688 entry->gen = obj2.getInt();
1689 entry->type = obj3.isCmd(cmdA: "n") ? xrefEntryUncompressed : xrefEntryFree;
1690 entry->obj.setToNull();
1691 entry->flags = 0;
1692 r = true;
1693 } else {
1694 r = false;
1695 }
1696
1697 return r;
1698}
1699
1700/* Traverse all XRef tables and, if untilEntryNum != -1, stop as soon as
1701 * untilEntryNum is found, or try to reconstruct the xref table if it's not
1702 * present in any xref.
1703 * If xrefStreamObjsNum is not NULL, it is filled with the list of the object
1704 * numbers of the XRef streams that have been traversed */
1705void XRef::readXRefUntil(int untilEntryNum, std::vector<int> *xrefStreamObjsNum)
1706{
1707 std::vector<Goffset> followedPrev;
1708 while (prevXRefOffset && (untilEntryNum == -1 || (untilEntryNum < size && entries[untilEntryNum].type == xrefEntryNone))) {
1709 bool followed = false;
1710 for (long long j : followedPrev) {
1711 if (j == prevXRefOffset) {
1712 followed = true;
1713 break;
1714 }
1715 }
1716 if (followed) {
1717 error(category: errSyntaxError, pos: -1, msg: "Circular XRef");
1718 if (!xRefStream && !(ok = constructXRef(wasReconstructed: nullptr))) {
1719 errCode = errDamaged;
1720 }
1721 break;
1722 }
1723
1724 followedPrev.push_back(x: prevXRefOffset);
1725
1726 std::vector<Goffset> followedXRefStm;
1727 if (!readXRef(pos: &prevXRefOffset, followedXRefStm: &followedXRefStm, xrefStreamObjsNum)) {
1728 prevXRefOffset = 0;
1729 }
1730
1731 // if there was a problem with the xref table, or we haven't found the entry
1732 // we were looking for, try to reconstruct the xref
1733 if (!ok || (!prevXRefOffset && untilEntryNum != -1 && entries[untilEntryNum].type == xrefEntryNone)) {
1734 if (!xRefStream && !(ok = constructXRef(wasReconstructed: nullptr))) {
1735 errCode = errDamaged;
1736 break;
1737 }
1738 break;
1739 }
1740 }
1741}
1742
1743namespace {
1744
1745struct DummyXRefEntry : XRefEntry
1746{
1747 DummyXRefEntry()
1748 {
1749 offset = -1;
1750 gen = 0;
1751 type = xrefEntryNone;
1752 flags = 0;
1753 obj = Object(objNull);
1754 }
1755};
1756
1757DummyXRefEntry dummyXRefEntry;
1758
1759}
1760
1761XRefEntry *XRef::getEntry(int i, bool complainIfMissing)
1762{
1763 if (unlikely(i < 0)) {
1764 error(category: errInternal, pos: -1, msg: "Request for invalid XRef entry [{0:d}]", i);
1765 return &dummyXRefEntry;
1766 }
1767
1768 if (i >= size || entries[i].type == xrefEntryNone) {
1769
1770 if ((!xRefStream) && mainXRefEntriesOffset) {
1771 if (unlikely(i >= capacity)) {
1772 error(category: errInternal, pos: -1, msg: "Request for out-of-bounds XRef entry [{0:d}]", i);
1773 return &dummyXRefEntry;
1774 }
1775
1776 if (!parseEntry(offset: mainXRefEntriesOffset + 20 * i, entry: &entries[i])) {
1777 error(category: errSyntaxError, pos: -1, msg: "Failed to parse XRef entry [{0:d}].", i);
1778 return &dummyXRefEntry;
1779 }
1780 } else {
1781 // Read XRef tables until the entry we're looking for is found
1782 readXRefUntil(untilEntryNum: i);
1783
1784 // We might have reconstructed the xref
1785 // Check again i is in bounds
1786 if (unlikely(i >= size)) {
1787 return &dummyXRefEntry;
1788 }
1789
1790 if (entries[i].type == xrefEntryNone) {
1791 if (complainIfMissing) {
1792 error(category: errSyntaxError, pos: -1, msg: "Invalid XRef entry {0:d}", i);
1793 }
1794 entries[i].type = xrefEntryFree;
1795 }
1796 }
1797 }
1798
1799 return &entries[i];
1800}
1801
1802// Recursively sets the Unencrypted flag in all referenced xref entries
1803void XRef::markUnencrypted(Object *obj)
1804{
1805 Object obj1;
1806
1807 switch (obj->getType()) {
1808 case objArray: {
1809 Array *array = obj->getArray();
1810 for (int i = 0; i < array->getLength(); i++) {
1811 obj1 = array->getNF(i).copy();
1812 markUnencrypted(obj: &obj1);
1813 }
1814 break;
1815 }
1816 case objStream:
1817 case objDict: {
1818 Dict *dict;
1819 if (obj->getType() == objStream) {
1820 Stream *stream = obj->getStream();
1821 dict = stream->getDict();
1822 } else {
1823 dict = obj->getDict();
1824 }
1825 for (int i = 0; i < dict->getLength(); i++) {
1826 obj1 = dict->getValNF(i).copy();
1827 markUnencrypted(obj: &obj1);
1828 }
1829 break;
1830 }
1831 case objRef: {
1832 const Ref ref = obj->getRef();
1833 XRefEntry *e = getEntry(i: ref.num);
1834 if (e->getFlag(flag: XRefEntry::Unencrypted)) {
1835 return; // We've already been here: prevent infinite recursion
1836 }
1837 e->setFlag(flag: XRefEntry::Unencrypted, value: true);
1838 obj1 = fetch(ref);
1839 markUnencrypted(obj: &obj1);
1840 break;
1841 }
1842 default:
1843 break;
1844 }
1845}
1846
1847void XRef::scanSpecialFlags()
1848{
1849 if (scannedSpecialFlags) {
1850 return;
1851 }
1852 scannedSpecialFlags = true;
1853
1854 // "Rewind" the XRef linked list, so that readXRefUntil re-reads all XRef
1855 // tables/streams, even those that had already been parsed
1856 prevXRefOffset = mainXRefOffset;
1857
1858 std::vector<int> xrefStreamObjNums;
1859 if (!streamEndsLen) { // don't do it for already reconstructed xref
1860 readXRefUntil(untilEntryNum: -1 /* read all xref sections */, xrefStreamObjsNum: &xrefStreamObjNums);
1861 }
1862
1863 // Mark object streams as DontRewrite, because we write each object
1864 // individually in full rewrite mode.
1865 for (int i = 0; i < size; ++i) {
1866 if (entries[i].type == xrefEntryCompressed) {
1867 const Goffset objStmNum = entries[i].offset;
1868 if (unlikely(objStmNum < 0 || objStmNum >= size)) {
1869 error(category: errSyntaxError, pos: -1, msg: "Compressed object offset out of xref bounds");
1870 } else {
1871 getEntry(i: static_cast<int>(objStmNum))->setFlag(flag: XRefEntry::DontRewrite, value: true);
1872 }
1873 }
1874 }
1875
1876 // Mark XRef streams objects as Unencrypted and DontRewrite
1877 for (const int objNum : xrefStreamObjNums) {
1878 getEntry(i: objNum)->setFlag(flag: XRefEntry::Unencrypted, value: true);
1879 getEntry(i: objNum)->setFlag(flag: XRefEntry::DontRewrite, value: true);
1880 }
1881
1882 // Mark objects referred from the Encrypt dict as Unencrypted
1883 markUnencrypted();
1884}
1885
1886void XRef::markUnencrypted()
1887{
1888 // Mark objects referred from the Encrypt dict as Unencrypted
1889 const Object &obj = trailerDict.dictLookupNF(key: "Encrypt");
1890 if (obj.isRef()) {
1891 XRefEntry *e = getEntry(i: obj.getRefNum());
1892 e->setFlag(flag: XRefEntry::Unencrypted, value: true);
1893 }
1894}
1895
1896XRef::XRefWriter::~XRefWriter() = default;
1897

source code of poppler/poppler/XRef.cc