1//========================================================================
2//
3// Catalog.cc
4//
5// Copyright 1996-2007 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com>
17// Copyright (C) 2005-2013, 2015, 2017-2024 Albert Astals Cid <aacid@kde.org>
18// Copyright (C) 2005 Jeff Muizelaar <jrmuizel@nit.ca>
19// Copyright (C) 2005 Jonathan Blandford <jrb@redhat.com>
20// Copyright (C) 2005 Marco Pesenti Gritti <mpg@redhat.com>
21// Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
22// Copyright (C) 2006, 2008, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
23// Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
24// Copyright (C) 2008, 2011 Pino Toscano <pino@kde.org>
25// Copyright (C) 2009 Ilya Gorenbein <igorenbein@finjan.com>
26// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
27// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
28// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
29// Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
30// Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
31// Copyright (C) 2013, 2017 Adrian Johnson <ajohnson@redneon.com>
32// Copyright (C) 2013 José Aliste <jaliste@src.gnome.org>
33// Copyright (C) 2014 Ed Porras <ed@moto-research.com>
34// Copyright (C) 2015 Even Rouault <even.rouault@spatialys.com>
35// Copyright (C) 2016 Masamichi Hosoda <trueroad@trueroad.jp>
36// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
37// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
38// Copyright (C) 2020 Oliver Sander <oliver.sander@tu-dresden.de>
39// Copyright (C) 2020 Katarina Behrens <Katarina.Behrens@cib.de>
40// Copyright (C) 2020 Thorsten Behrens <Thorsten.Behrens@CIB.de>
41// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden
42// Copyright (C) 2021 RM <rm+git@arcsin.org>
43// Copyright (C) 2023 Ilaï Deutel <idtl@google.com>
44//
45// To see a description of the changes please see the Changelog file that
46// came with your tarball or type make ChangeLog if you are building from git
47//
48//========================================================================
49
50#include <config.h>
51
52#include <cstddef>
53#include <cstdlib>
54#include "goo/gmem.h"
55#include "Object.h"
56#include "PDFDoc.h"
57#include "XRef.h"
58#include "Array.h"
59#include "Dict.h"
60#include "Page.h"
61#include "Error.h"
62#include "Link.h"
63#include "PageLabelInfo.h"
64#include "Catalog.h"
65#include "Form.h"
66#include "OptionalContent.h"
67#include "ViewerPreferences.h"
68#include "FileSpec.h"
69#include "StructTreeRoot.h"
70
71//------------------------------------------------------------------------
72// Catalog
73//------------------------------------------------------------------------
74
75#define catalogLocker() const std::scoped_lock locker(mutex)
76
77Catalog::Catalog(PDFDoc *docA)
78{
79 ok = true;
80 doc = docA;
81 xref = doc->getXRef();
82 numPages = -1;
83 pageLabelInfo = nullptr;
84 form = nullptr;
85 optContent = nullptr;
86 pageMode = pageModeNull;
87 pageLayout = pageLayoutNull;
88 destNameTree = nullptr;
89 embeddedFileNameTree = nullptr;
90 jsNameTree = nullptr;
91 viewerPrefs = nullptr;
92 structTreeRoot = nullptr;
93
94 pagesList = nullptr;
95 pagesRefList = nullptr;
96 attrsList = nullptr;
97 kidsIdxList = nullptr;
98 markInfo = markInfoNull;
99
100 Object catDict = xref->getCatalog();
101 if (!catDict.isDict()) {
102 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
103 ok = false;
104 return;
105 }
106 // get the AcroForm dictionary
107 acroForm = catDict.getDict()->lookup(key: "AcroForm");
108
109 // read base URI
110 Object obj = catDict.getDict()->lookupEnsureEncryptedIfNeeded(key: "URI");
111 if (obj.isDict()) {
112 Object obj2 = obj.getDict()->lookupEnsureEncryptedIfNeeded(key: "Base");
113 if (obj2.isString()) {
114 baseURI = obj2.getString()->toStr();
115 }
116 }
117
118 // get the Optional Content dictionary
119 Object optContentProps = catDict.dictLookup(key: "OCProperties");
120 if (optContentProps.isDict()) {
121 optContent = new OCGs(&optContentProps, xref);
122 if (!optContent->isOk()) {
123 delete optContent;
124 optContent = nullptr;
125 }
126 }
127
128 // actions
129 additionalActions = catDict.dictLookupNF(key: "AA").copy();
130
131 // get the ViewerPreferences dictionary
132 viewerPreferences = catDict.dictLookup(key: "ViewerPreferences");
133
134 const Object version = catDict.dictLookup(key: "Version");
135 if (version.isName()) {
136 const int res = sscanf(s: version.getName(), format: "%d.%d", &catalogPdfMajorVersion, &catalogPdfMinorVersion);
137 if (res != 2) {
138 catalogPdfMajorVersion = -1;
139 catalogPdfMinorVersion = -1;
140 }
141 }
142}
143
144Catalog::~Catalog()
145{
146 delete kidsIdxList;
147 if (attrsList) {
148 std::vector<PageAttrs *>::iterator it;
149 for (it = attrsList->begin(); it != attrsList->end(); ++it) {
150 delete *it;
151 }
152 delete attrsList;
153 }
154 delete pagesRefList;
155 delete pagesList;
156 delete destNameTree;
157 delete embeddedFileNameTree;
158 delete jsNameTree;
159 delete pageLabelInfo;
160 delete form;
161 delete optContent;
162 delete viewerPrefs;
163 delete structTreeRoot;
164}
165
166std::unique_ptr<GooString> Catalog::readMetadata()
167{
168 catalogLocker();
169 if (metadata.isNone()) {
170 Object catDict = xref->getCatalog();
171 if (catDict.isDict()) {
172 metadata = catDict.dictLookup(key: "Metadata");
173 } else {
174 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
175 metadata.setToNull();
176 }
177 }
178
179 if (!metadata.isStream()) {
180 return {};
181 }
182 Object obj = metadata.streamGetDict()->lookup(key: "Subtype");
183 if (!obj.isName(nameA: "XML")) {
184 error(category: errSyntaxWarning, pos: -1, msg: "Unknown Metadata type: '{0:s}'", obj.isName() ? obj.getName() : "???");
185 }
186 std::unique_ptr<GooString> s = std::make_unique<GooString>();
187 metadata.getStream()->fillGooString(s: s.get());
188 metadata.streamClose();
189 return s;
190}
191
192Page *Catalog::getPage(int i)
193{
194 if (i < 1) {
195 return nullptr;
196 }
197
198 catalogLocker();
199 if (std::size_t(i) > pages.size()) {
200 bool cached = cachePageTree(page: i);
201 if (cached == false) {
202 return nullptr;
203 }
204 }
205 return pages[i - 1].first.get();
206}
207
208Ref *Catalog::getPageRef(int i)
209{
210 if (i < 1) {
211 return nullptr;
212 }
213
214 catalogLocker();
215 if (std::size_t(i) > pages.size()) {
216 bool cached = cachePageTree(page: i);
217 if (cached == false) {
218 return nullptr;
219 }
220 }
221 return &pages[i - 1].second;
222}
223
224bool Catalog::cachePageTree(int page)
225{
226 if (pagesList == nullptr) {
227
228 Ref pagesRef;
229
230 Object catDict = xref->getCatalog();
231
232 if (catDict.isDict()) {
233 const Object &pagesDictRef = catDict.dictLookupNF(key: "Pages");
234 if (pagesDictRef.isRef() && pagesDictRef.getRefNum() >= 0 && pagesDictRef.getRefNum() < xref->getNumObjects()) {
235 pagesRef = pagesDictRef.getRef();
236 } else {
237 error(category: errSyntaxError, pos: -1, msg: "Catalog dictionary does not contain a valid \"Pages\" entry");
238 return false;
239 }
240 } else {
241 error(category: errSyntaxError, pos: -1, msg: "Could not find catalog dictionary");
242 return false;
243 }
244
245 Object obj = catDict.dictLookup(key: "Pages");
246 // This should really be isDict("Pages"), but I've seen at least one
247 // PDF file where the /Type entry is missing.
248 if (!obj.isDict()) {
249 error(category: errSyntaxError, pos: -1, msg: "Top-level pages object is wrong type ({0:s})", obj.getTypeName());
250 return false;
251 }
252
253 pages.clear();
254 attrsList = new std::vector<PageAttrs *>();
255 attrsList->push_back(x: new PageAttrs(nullptr, obj.getDict()));
256 pagesList = new std::vector<Object>();
257 pagesList->push_back(x: std::move(obj));
258 pagesRefList = new std::vector<Ref>();
259 pagesRefList->push_back(x: pagesRef);
260 kidsIdxList = new std::vector<int>();
261 kidsIdxList->push_back(x: 0);
262 }
263
264 while (true) {
265
266 if (std::size_t(page) <= pages.size()) {
267 return true;
268 }
269
270 if (pagesList->empty()) {
271 return false;
272 }
273
274 Object kids = pagesList->back().dictLookup(key: "Kids");
275 if (!kids.isArray()) {
276 error(category: errSyntaxError, pos: -1, msg: "Kids object (page {0:uld}) is wrong type ({1:s})", pages.size() + 1, kids.getTypeName());
277 return false;
278 }
279
280 int kidsIdx = kidsIdxList->back();
281 if (kidsIdx >= kids.arrayGetLength()) {
282 pagesList->pop_back();
283 pagesRefList->pop_back();
284 delete attrsList->back();
285 attrsList->pop_back();
286 kidsIdxList->pop_back();
287 if (!kidsIdxList->empty()) {
288 kidsIdxList->back()++;
289 }
290 continue;
291 }
292
293 const Object &kidRef = kids.arrayGetNF(i: kidsIdx);
294 if (!kidRef.isRef()) {
295 error(category: errSyntaxError, pos: -1, msg: "Kid object (page {0:uld}) is not an indirect reference ({1:s})", pages.size() + 1, kidRef.getTypeName());
296 return false;
297 }
298
299 bool loop = false;
300 ;
301 for (const Ref &pageRef : *pagesRefList) {
302 if (pageRef.num == kidRef.getRefNum()) {
303 loop = true;
304 break;
305 }
306 }
307 if (loop) {
308 error(category: errSyntaxError, pos: -1, msg: "Loop in Pages tree");
309 kidsIdxList->back()++;
310 continue;
311 }
312
313 Object kid = kids.arrayGet(i: kidsIdx);
314 if (kid.isDict(dictType: "Page") || (kid.isDict() && !kid.getDict()->hasKey(key: "Kids"))) {
315 PageAttrs *attrs = new PageAttrs(attrsList->back(), kid.getDict());
316 auto p = std::make_unique<Page>(args&: doc, args: pages.size() + 1, args: std::move(kid), args: kidRef.getRef(), args&: attrs, args&: form);
317 if (!p->isOk()) {
318 error(category: errSyntaxError, pos: -1, msg: "Failed to create page (page {0:uld})", pages.size() + 1);
319 return false;
320 }
321
322 if (pages.size() >= std::size_t(numPages)) {
323 error(category: errSyntaxError, pos: -1, msg: "Page count in top-level pages object is incorrect");
324 return false;
325 }
326
327 pages.emplace_back(args: std::move(p), args: kidRef.getRef());
328
329 kidsIdxList->back()++;
330
331 // This should really be isDict("Pages"), but I've seen at least one
332 // PDF file where the /Type entry is missing.
333 } else if (kid.isDict()) {
334 attrsList->push_back(x: new PageAttrs(attrsList->back(), kid.getDict()));
335 pagesRefList->push_back(x: kidRef.getRef());
336 pagesList->push_back(x: std::move(kid));
337 kidsIdxList->push_back(x: 0);
338 } else {
339 error(category: errSyntaxError, pos: -1, msg: "Kid object (page {0:uld}) is wrong type ({1:s})", pages.size() + 1, kid.getTypeName());
340 kidsIdxList->back()++;
341 }
342 }
343
344 return false;
345}
346
347int Catalog::findPage(const Ref pageRef)
348{
349 int i;
350
351 for (i = 0; i < getNumPages(); ++i) {
352 Ref *ref = getPageRef(i: i + 1);
353 if (ref != nullptr && *ref == pageRef) {
354 return i + 1;
355 }
356 }
357 return 0;
358}
359
360std::unique_ptr<LinkDest> Catalog::findDest(const GooString *name)
361{
362 // try named destination dictionary then name tree
363 if (getDests()->isDict()) {
364 Object obj1 = getDests()->dictLookup(key: name->c_str());
365 return createLinkDest(obj: &obj1);
366 }
367
368 catalogLocker();
369 Object obj2 = getDestNameTree()->lookup(name);
370 return createLinkDest(obj: &obj2);
371}
372
373std::unique_ptr<LinkDest> Catalog::createLinkDest(Object *obj)
374{
375 std::unique_ptr<LinkDest> dest;
376 if (obj->isArray()) {
377 dest = std::make_unique<LinkDest>(args: obj->getArray());
378 } else if (obj->isDict()) {
379 Object obj2 = obj->dictLookup(key: "D");
380 if (obj2.isArray()) {
381 dest = std::make_unique<LinkDest>(args: obj2.getArray());
382 } else {
383 error(category: errSyntaxWarning, pos: -1, msg: "Bad named destination value");
384 }
385 } else {
386 error(category: errSyntaxWarning, pos: -1, msg: "Bad named destination value");
387 }
388 if (dest && !dest->isOk()) {
389 dest.reset();
390 }
391
392 return dest;
393}
394
395int Catalog::numDests()
396{
397 Object *obj;
398
399 obj = getDests();
400 if (!obj->isDict()) {
401 return 0;
402 }
403 return obj->dictGetLength();
404}
405
406const char *Catalog::getDestsName(int i)
407{
408 Object *obj;
409
410 obj = getDests();
411 if (!obj->isDict()) {
412 return nullptr;
413 }
414 return obj->dictGetKey(i);
415}
416
417std::unique_ptr<LinkDest> Catalog::getDestsDest(int i)
418{
419 Object *obj = getDests();
420 if (!obj->isDict()) {
421 return nullptr;
422 }
423 Object obj1 = obj->dictGetVal(i);
424 return createLinkDest(obj: &obj1);
425}
426
427std::unique_ptr<LinkDest> Catalog::getDestNameTreeDest(int i)
428{
429 Object obj;
430
431 catalogLocker();
432 Object *aux = getDestNameTree()->getValue(i);
433 if (aux) {
434 obj = aux->fetch(xref);
435 }
436 return createLinkDest(obj: &obj);
437}
438
439std::unique_ptr<FileSpec> Catalog::embeddedFile(int i)
440{
441 catalogLocker();
442 Object *obj = getEmbeddedFileNameTree()->getValue(i);
443 if (obj->isRef()) {
444 Object fsDict = obj->fetch(xref);
445 return std::make_unique<FileSpec>(args: &fsDict);
446 } else if (obj->isDict()) {
447 return std::make_unique<FileSpec>(args&: obj);
448 } else {
449 Object null;
450 return std::make_unique<FileSpec>(args: &null);
451 }
452}
453
454bool Catalog::hasEmbeddedFile(const std::string &fileName)
455{
456 NameTree *ef = getEmbeddedFileNameTree();
457 for (int i = 0; i < ef->numEntries(); ++i) {
458 if (fileName == ef->getName(i)->toStr()) {
459 return true;
460 }
461 }
462 return false;
463}
464
465void Catalog::addEmbeddedFile(GooFile *file, const std::string &fileName)
466{
467 catalogLocker();
468
469 const Ref fileSpecRef = xref->addIndirectObject(o: FileSpec::newFileSpecObject(xref, file, fileName));
470
471 Object catDict = xref->getCatalog();
472 Ref namesObjRef;
473 Object namesObj = catDict.getDict()->lookup(key: "Names", returnRef: &namesObjRef);
474 if (!namesObj.isDict()) {
475 // Need to create the names Dict
476 catDict.dictSet(key: "Names", val: Object(new Dict(xref)));
477 namesObj = catDict.getDict()->lookup(key: "Names");
478
479 // Trigger getting the names dict again when needed
480 names = Object();
481 }
482
483 Dict *namesDict = namesObj.getDict();
484
485 // We create a new EmbeddedFiles nametree, this replaces the existing one (if any), but it's not a problem
486 Object embeddedFilesObj = Object(new Dict(xref));
487 const Ref embeddedFilesRef = xref->addIndirectObject(o: embeddedFilesObj);
488
489 Array *embeddedFilesNamesArray = new Array(xref);
490
491 // This flattens out the existing EmbeddedFiles nametree (if any), should not be a problem
492 NameTree *ef = getEmbeddedFileNameTree();
493 bool fileAlreadyAdded = false;
494 for (int i = 0; i < ef->numEntries(); ++i) {
495 const GooString *efNameI = ef->getName(i);
496
497 // we need to add the file if it has not been added yet and the name is smaller or equal lexicographically
498 // than the current item
499 const bool sameFileName = fileName == efNameI->toStr();
500 const bool addFile = !fileAlreadyAdded && (sameFileName || fileName < efNameI->toStr());
501 if (addFile) {
502 // If the new name is smaller lexicographically than an existing file add it in its correct position
503 embeddedFilesNamesArray->add(elem: Object(new GooString(fileName)));
504 embeddedFilesNamesArray->add(elem: Object(fileSpecRef));
505 fileAlreadyAdded = true;
506 }
507 if (sameFileName) {
508 // If the new name is the same lexicographically than an existing file then don't add the existing file (i.e. replace)
509 continue;
510 }
511 embeddedFilesNamesArray->add(elem: Object(efNameI->copy()));
512 embeddedFilesNamesArray->add(elem: ef->getValue(i)->copy());
513 }
514
515 if (!fileAlreadyAdded) {
516 // The new file is bigger lexicographically than the existing ones
517 embeddedFilesNamesArray->add(elem: Object(new GooString(fileName)));
518 embeddedFilesNamesArray->add(elem: Object(fileSpecRef));
519 }
520
521 embeddedFilesObj.dictSet(key: "Names", val: Object(embeddedFilesNamesArray));
522 namesDict->set(key: "EmbeddedFiles", val: Object(embeddedFilesRef));
523
524 if (namesObjRef != Ref::INVALID()) {
525 xref->setModifiedObject(o: &namesObj, r: namesObjRef);
526 } else {
527 xref->setModifiedObject(o: &catDict, r: { .num: xref->getRootNum(), .gen: xref->getRootGen() });
528 }
529
530 // recreate Nametree on next call that uses it
531 delete embeddedFileNameTree;
532 embeddedFileNameTree = nullptr;
533}
534
535GooString *Catalog::getJS(int i)
536{
537 Object obj;
538 // getJSNameTree()->getValue(i) returns a shallow copy of the object so we
539 // do not need to free it
540 catalogLocker();
541 Object *aux = getJSNameTree()->getValue(i);
542 if (aux) {
543 obj = aux->fetch(xref);
544 }
545
546 if (!obj.isDict()) {
547 return nullptr;
548 }
549 Object obj2 = obj.dictLookup(key: "S");
550 if (!obj2.isName()) {
551 return nullptr;
552 }
553 if (strcmp(s1: obj2.getName(), s2: "JavaScript")) {
554 return nullptr;
555 }
556 obj2 = obj.dictLookup(key: "JS");
557 GooString *js = nullptr;
558 if (obj2.isString()) {
559 js = new GooString(obj2.getString());
560 } else if (obj2.isStream()) {
561 Stream *stream = obj2.getStream();
562 js = new GooString();
563 stream->fillGooString(s: js);
564 }
565 return js;
566}
567
568Catalog::PageMode Catalog::getPageMode()
569{
570
571 catalogLocker();
572 if (pageMode == pageModeNull) {
573
574 pageMode = pageModeNone;
575
576 Object catDict = xref->getCatalog();
577 if (!catDict.isDict()) {
578 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
579 return pageMode;
580 }
581
582 Object obj = catDict.dictLookup(key: "PageMode");
583 if (obj.isName()) {
584 if (obj.isName(nameA: "UseNone")) {
585 pageMode = pageModeNone;
586 } else if (obj.isName(nameA: "UseOutlines")) {
587 pageMode = pageModeOutlines;
588 } else if (obj.isName(nameA: "UseThumbs")) {
589 pageMode = pageModeThumbs;
590 } else if (obj.isName(nameA: "FullScreen")) {
591 pageMode = pageModeFullScreen;
592 } else if (obj.isName(nameA: "UseOC")) {
593 pageMode = pageModeOC;
594 } else if (obj.isName(nameA: "UseAttachments")) {
595 pageMode = pageModeAttach;
596 }
597 }
598 }
599 return pageMode;
600}
601
602Catalog::PageLayout Catalog::getPageLayout()
603{
604
605 catalogLocker();
606 if (pageLayout == pageLayoutNull) {
607
608 pageLayout = pageLayoutNone;
609
610 Object catDict = xref->getCatalog();
611 if (!catDict.isDict()) {
612 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
613 return pageLayout;
614 }
615
616 pageLayout = pageLayoutNone;
617 Object obj = catDict.dictLookup(key: "PageLayout");
618 if (obj.isName()) {
619 if (obj.isName(nameA: "SinglePage")) {
620 pageLayout = pageLayoutSinglePage;
621 }
622 if (obj.isName(nameA: "OneColumn")) {
623 pageLayout = pageLayoutOneColumn;
624 }
625 if (obj.isName(nameA: "TwoColumnLeft")) {
626 pageLayout = pageLayoutTwoColumnLeft;
627 }
628 if (obj.isName(nameA: "TwoColumnRight")) {
629 pageLayout = pageLayoutTwoColumnRight;
630 }
631 if (obj.isName(nameA: "TwoPageLeft")) {
632 pageLayout = pageLayoutTwoPageLeft;
633 }
634 if (obj.isName(nameA: "TwoPageRight")) {
635 pageLayout = pageLayoutTwoPageRight;
636 }
637 }
638 }
639 return pageLayout;
640}
641
642NameTree::NameTree()
643{
644 size = 0;
645 length = 0;
646 entries = nullptr;
647}
648
649NameTree::~NameTree()
650{
651 int i;
652
653 for (i = 0; i < length; i++) {
654 delete entries[i];
655 }
656
657 gfree(p: entries);
658}
659
660NameTree::Entry::Entry(Array *array, int index)
661{
662 if (!array->getString(i: index, string: &name)) {
663 Object aux = array->get(i: index);
664 if (aux.isString()) {
665 name.append(str: aux.getString());
666 } else {
667 error(category: errSyntaxError, pos: -1, msg: "Invalid page tree");
668 }
669 }
670 value = array->getNF(i: index + 1).copy();
671}
672
673NameTree::Entry::~Entry() { }
674
675void NameTree::addEntry(Entry *entry)
676{
677 if (length == size) {
678 if (length == 0) {
679 size = 8;
680 } else {
681 size *= 2;
682 }
683 entries = (Entry **)grealloc(p: entries, size: sizeof(Entry *) * size);
684 }
685
686 entries[length] = entry;
687 ++length;
688}
689
690int NameTree::Entry::cmpEntry(const void *voidEntry, const void *voidOtherEntry)
691{
692 Entry *entry = *(NameTree::Entry **)voidEntry;
693 Entry *otherEntry = *(NameTree::Entry **)voidOtherEntry;
694
695 return entry->name.cmp(str: &otherEntry->name);
696}
697
698void NameTree::init(XRef *xrefA, Object *tree)
699{
700 xref = xrefA;
701 RefRecursionChecker seen;
702 parse(tree, seen);
703 if (entries && length > 0) {
704 qsort(base: entries, nmemb: length, size: sizeof(Entry *), compar: Entry::cmpEntry);
705 }
706}
707
708void NameTree::parse(const Object *tree, RefRecursionChecker &seen)
709{
710 if (!tree->isDict()) {
711 return;
712 }
713
714 // leaf node
715 Object names = tree->dictLookup(key: "Names");
716 if (names.isArray()) {
717 for (int i = 0; i < names.arrayGetLength(); i += 2) {
718 NameTree::Entry *entry;
719
720 entry = new Entry(names.getArray(), i);
721 addEntry(entry);
722 }
723 }
724
725 // root or intermediate node
726 Ref ref;
727 const Object kids = tree->getDict()->lookup(key: "Kids", returnRef: &ref);
728 if (!seen.insert(ref)) {
729 error(category: errSyntaxError, pos: -1, msg: "loop in NameTree (numObj: {0:d})", ref.num);
730 return;
731 }
732 if (kids.isArray()) {
733 for (int i = 0; i < kids.arrayGetLength(); ++i) {
734 const Object kid = kids.getArray()->get(i, returnRef: &ref);
735 if (!seen.insert(ref)) {
736 error(category: errSyntaxError, pos: -1, msg: "loop in NameTree (numObj: {0:d})", ref.num);
737 continue;
738 }
739 if (kid.isDict()) {
740 parse(tree: &kid, seen);
741 }
742 }
743 }
744}
745
746int NameTree::Entry::cmp(const void *voidKey, const void *voidEntry)
747{
748 GooString *key = (GooString *)voidKey;
749 Entry *entry = *(NameTree::Entry **)voidEntry;
750
751 return key->cmp(str: &entry->name);
752}
753
754Object NameTree::lookup(const GooString *name)
755{
756 Entry **entry;
757
758 entry = (Entry **)bsearch(key: name, base: entries, nmemb: length, size: sizeof(Entry *), compar: Entry::cmp);
759 if (entry != nullptr) {
760 return (*entry)->value.fetch(xref);
761 } else {
762 error(category: errSyntaxError, pos: -1, msg: "failed to look up ({0:s})", name->c_str());
763 return Object(objNull);
764 }
765}
766
767Object *NameTree::getValue(int index)
768{
769 if (index < length) {
770 return &entries[index]->value;
771 } else {
772 return nullptr;
773 }
774}
775
776const GooString *NameTree::getName(int index) const
777{
778 if (index < length) {
779 return &entries[index]->name;
780 } else {
781 return nullptr;
782 }
783}
784
785bool Catalog::labelToIndex(GooString *label, int *index)
786{
787 char *end;
788
789 PageLabelInfo *pli = getPageLabelInfo();
790 if (pli != nullptr) {
791 if (!pli->labelToIndex(label, index)) {
792 return false;
793 }
794 } else {
795 *index = strtol(nptr: label->c_str(), endptr: &end, base: 10) - 1;
796 if (*end != '\0') {
797 return false;
798 }
799 }
800
801 if (*index < 0 || *index >= getNumPages()) {
802 return false;
803 }
804
805 return true;
806}
807
808bool Catalog::indexToLabel(int index, GooString *label)
809{
810 char buffer[32];
811
812 if (index < 0 || index >= getNumPages()) {
813 return false;
814 }
815
816 PageLabelInfo *pli = getPageLabelInfo();
817 if (pli != nullptr) {
818 return pli->indexToLabel(index, label);
819 } else {
820 snprintf(s: buffer, maxlen: sizeof(buffer), format: "%d", index + 1);
821 label->append(str: buffer);
822 return true;
823 }
824}
825
826int Catalog::getNumPages()
827{
828 catalogLocker();
829 if (numPages == -1) {
830 Object catDict = xref->getCatalog();
831 if (!catDict.isDict()) {
832 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
833 return 0;
834 }
835 Object pagesDict = catDict.dictLookup(key: "Pages");
836
837 // This should really be isDict("Pages"), but I've seen at least one
838 // PDF file where the /Type entry is missing.
839 if (!pagesDict.isDict()) {
840 error(category: errSyntaxError, pos: -1, msg: "Top-level pages object is wrong type ({0:s})", pagesDict.getTypeName());
841 return 0;
842 }
843
844 Object obj = pagesDict.dictLookup(key: "Count");
845 // some PDF files actually use real numbers here ("/Count 9.0")
846 if (!obj.isNum()) {
847 if (pagesDict.dictIs(dictType: "Page")) {
848 const Object &pageRootRef = catDict.dictLookupNF(key: "Pages");
849
850 error(category: errSyntaxError, pos: -1, msg: "Pages top-level is a single Page. The document is malformed, trying to recover...");
851
852 Dict *pageDict = pagesDict.getDict();
853 if (pageRootRef.isRef()) {
854 const Ref pageRef = pageRootRef.getRef();
855 auto p = std::make_unique<Page>(args&: doc, args: 1, args: std::move(pagesDict), args: pageRef, args: new PageAttrs(nullptr, pageDict), args&: form);
856 if (p->isOk()) {
857 pages.emplace_back(args: std::move(p), args: pageRef);
858
859 numPages = 1;
860 } else {
861 numPages = 0;
862 }
863 } else {
864 numPages = 0;
865 }
866 } else {
867 error(category: errSyntaxError, pos: -1, msg: "Page count in top-level pages object is wrong type ({0:s})", obj.getTypeName());
868 numPages = 0;
869 }
870 } else {
871 if (obj.isInt()) {
872 numPages = obj.getInt();
873 } else if (obj.isInt64()) {
874 numPages = obj.getInt64();
875 } else {
876 numPages = obj.getNum();
877 }
878 if (numPages <= 0) {
879 error(category: errSyntaxError, pos: -1, msg: "Invalid page count {0:d}", numPages);
880 numPages = 0;
881 } else if (numPages > xref->getNumObjects()) {
882 error(category: errSyntaxError, pos: -1, msg: "Page count ({0:d}) larger than number of objects ({1:d})", numPages, xref->getNumObjects());
883 numPages = 0;
884 }
885 }
886 }
887
888 return numPages;
889}
890
891PageLabelInfo *Catalog::getPageLabelInfo()
892{
893 catalogLocker();
894 if (!pageLabelInfo) {
895 Object catDict = xref->getCatalog();
896 if (!catDict.isDict()) {
897 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
898 return nullptr;
899 }
900
901 Object obj = catDict.dictLookup(key: "PageLabels");
902 if (obj.isDict()) {
903 pageLabelInfo = new PageLabelInfo(&obj, getNumPages());
904 }
905 }
906
907 return pageLabelInfo;
908}
909
910StructTreeRoot *Catalog::getStructTreeRoot()
911{
912 catalogLocker();
913 if (!structTreeRoot) {
914 Object catalog = xref->getCatalog();
915 if (!catalog.isDict()) {
916 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catalog.getTypeName());
917 return nullptr;
918 }
919
920 Object root = catalog.dictLookup(key: "StructTreeRoot");
921 if (root.isDict(dictType: "StructTreeRoot")) {
922 structTreeRoot = new StructTreeRoot(doc, root.getDict());
923 }
924 }
925 return structTreeRoot;
926}
927
928unsigned int Catalog::getMarkInfo()
929{
930 if (markInfo == markInfoNull) {
931 markInfo = 0;
932
933 catalogLocker();
934 Object catDict = xref->getCatalog();
935
936 if (catDict.isDict()) {
937 Object markInfoDict = catDict.dictLookup(key: "MarkInfo");
938 if (markInfoDict.isDict()) {
939 Object value = markInfoDict.dictLookup(key: "Marked");
940 if (value.isBool()) {
941 if (value.getBool()) {
942 markInfo |= markInfoMarked;
943 }
944 } else if (!value.isNull()) {
945 error(category: errSyntaxError, pos: -1, msg: "Marked object is wrong type ({0:s})", value.getTypeName());
946 }
947
948 value = markInfoDict.dictLookup(key: "Suspects");
949 if (value.isBool() && value.getBool()) {
950 markInfo |= markInfoSuspects;
951 } else if (!value.isNull()) {
952 error(category: errSyntaxError, pos: -1, msg: "Suspects object is wrong type ({0:s})", value.getTypeName());
953 }
954
955 value = markInfoDict.dictLookup(key: "UserProperties");
956 if (value.isBool() && value.getBool()) {
957 markInfo |= markInfoUserProperties;
958 } else if (!value.isNull()) {
959 error(category: errSyntaxError, pos: -1, msg: "UserProperties object is wrong type ({0:s})", value.getTypeName());
960 }
961 } else if (!markInfoDict.isNull()) {
962 error(category: errSyntaxError, pos: -1, msg: "MarkInfo object is wrong type ({0:s})", markInfoDict.getTypeName());
963 }
964 } else {
965 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
966 }
967 }
968 return markInfo;
969}
970
971Object *Catalog::getCreateOutline()
972{
973
974 catalogLocker();
975 Object catDict = xref->getCatalog();
976
977 // If there is no Object in the outline variable,
978 // check if there is an Outline dict in the catalog
979 if (outline.isNone()) {
980 if (catDict.isDict()) {
981 Object outline_obj = catDict.dictLookup(key: "Outlines");
982 if (outline_obj.isDict()) {
983 return &outline;
984 }
985 } else {
986 // catalog is not a dict, give up?
987 return &outline;
988 }
989 }
990
991 // If there is an Object in variable, make sure it's a dict
992 if (outline.isDict()) {
993 return &outline;
994 }
995
996 // setup an empty outline dict
997 outline = Object(new Dict(doc->getXRef()));
998 outline.dictSet(key: "Type", val: Object(objName, "Outlines"));
999 outline.dictSet(key: "Count", val: Object(0));
1000
1001 const Ref outlineRef = doc->getXRef()->addIndirectObject(o: outline);
1002 catDict.dictAdd(key: "Outlines", val: Object(outlineRef));
1003 xref->setModifiedObject(o: &catDict, r: { .num: xref->getRootNum(), .gen: xref->getRootGen() });
1004
1005 return &outline;
1006}
1007
1008Object *Catalog::getOutline()
1009{
1010 catalogLocker();
1011 if (outline.isNone()) {
1012 Object catDict = xref->getCatalog();
1013 if (catDict.isDict()) {
1014 outline = catDict.dictLookup(key: "Outlines");
1015 } else {
1016 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
1017 outline.setToNull();
1018 }
1019 }
1020
1021 return &outline;
1022}
1023
1024Object *Catalog::getDests()
1025{
1026 catalogLocker();
1027 if (dests.isNone()) {
1028 Object catDict = xref->getCatalog();
1029 if (catDict.isDict()) {
1030 dests = catDict.dictLookup(key: "Dests");
1031 } else {
1032 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
1033 dests.setToNull();
1034 }
1035 }
1036
1037 return &dests;
1038}
1039
1040Catalog::FormType Catalog::getFormType()
1041{
1042 Object xfa;
1043 FormType res = NoForm;
1044
1045 if (acroForm.isDict()) {
1046 xfa = acroForm.dictLookup(key: "XFA");
1047 if (xfa.isStream() || xfa.isArray()) {
1048 res = XfaForm;
1049 } else {
1050 res = AcroForm;
1051 }
1052 }
1053
1054 return res;
1055}
1056
1057Form *Catalog::getCreateForm()
1058{
1059 catalogLocker();
1060 if (!form) {
1061
1062 Object catDict = xref->getCatalog();
1063 if (!catDict.isDict()) {
1064 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
1065 return nullptr;
1066 }
1067
1068 if (!acroForm.isDict()) {
1069 acroForm = Object(new Dict(xref));
1070 acroForm.dictSet(key: "Fields", val: Object(new Array(xref)));
1071
1072 const Ref newFormRef = xref->addIndirectObject(o: acroForm);
1073 catDict.dictSet(key: "AcroForm", val: Object(newFormRef));
1074
1075 xref->setModifiedObject(o: &catDict, r: { .num: xref->getRootNum(), .gen: xref->getRootGen() });
1076 }
1077 }
1078
1079 return getForm();
1080}
1081
1082Form *Catalog::getForm()
1083{
1084 catalogLocker();
1085 if (!form) {
1086 if (acroForm.isDict()) {
1087 form = new Form(doc);
1088 // perform form-related loading after all widgets have been loaded
1089 form->postWidgetsLoad();
1090 }
1091 }
1092
1093 return form;
1094}
1095
1096void Catalog::addFormToAcroForm(const Ref formRef)
1097{
1098 catalogLocker();
1099
1100 if (!acroForm.isDict()) {
1101 getCreateForm();
1102 }
1103
1104 // append to field array
1105 Ref fieldRef;
1106 Object fieldArray = acroForm.getDict()->lookup(key: "Fields", returnRef: &fieldRef);
1107 fieldArray.getArray()->add(elem: Object(formRef));
1108
1109 setAcroFormModified();
1110}
1111
1112void Catalog::setAcroFormModified()
1113{
1114 Object catDict = xref->getCatalog();
1115 Ref acroFormRef;
1116 catDict.getDict()->lookup(key: "AcroForm", returnRef: &acroFormRef);
1117
1118 if (acroFormRef != Ref::INVALID()) {
1119 xref->setModifiedObject(o: &acroForm, r: acroFormRef);
1120 } else {
1121 catDict.dictSet(key: "AcroForm", val: acroForm.copy());
1122 xref->setModifiedObject(o: &catDict, r: { .num: xref->getRootNum(), .gen: xref->getRootGen() });
1123 }
1124}
1125
1126void Catalog::removeFormFromAcroForm(const Ref formRef)
1127{
1128 catalogLocker();
1129
1130 Object catDict = xref->getCatalog();
1131 if (acroForm.isDict()) {
1132 // remove from field array
1133 Ref fieldRef;
1134 Object fieldArrayO = acroForm.getDict()->lookup(key: "Fields", returnRef: &fieldRef);
1135 Array *fieldArray = fieldArrayO.getArray();
1136 for (int i = 0; i < fieldArray->getLength(); ++i) {
1137 const Object &o = fieldArray->getNF(i);
1138 if (o.isRef() && o.getRef() == formRef) {
1139 fieldArray->remove(i);
1140 break;
1141 }
1142 }
1143
1144 setAcroFormModified();
1145 }
1146}
1147
1148ViewerPreferences *Catalog::getViewerPreferences()
1149{
1150 catalogLocker();
1151 if (!viewerPrefs) {
1152 if (viewerPreferences.isDict()) {
1153 viewerPrefs = new ViewerPreferences(viewerPreferences.getDict());
1154 }
1155 }
1156
1157 return viewerPrefs;
1158}
1159
1160Object *Catalog::getNames()
1161{
1162 if (names.isNone()) {
1163 Object catDict = xref->getCatalog();
1164 if (catDict.isDict()) {
1165 names = catDict.dictLookup(key: "Names");
1166 } else {
1167 error(category: errSyntaxError, pos: -1, msg: "Catalog object is wrong type ({0:s})", catDict.getTypeName());
1168 names.setToNull();
1169 }
1170 }
1171
1172 return &names;
1173}
1174
1175NameTree *Catalog::getDestNameTree()
1176{
1177 if (!destNameTree) {
1178
1179 destNameTree = new NameTree();
1180
1181 if (getNames()->isDict()) {
1182 Object obj = getNames()->dictLookup(key: "Dests");
1183 destNameTree->init(xrefA: xref, tree: &obj);
1184 }
1185 }
1186
1187 return destNameTree;
1188}
1189
1190NameTree *Catalog::getEmbeddedFileNameTree()
1191{
1192 if (!embeddedFileNameTree) {
1193
1194 embeddedFileNameTree = new NameTree();
1195
1196 if (getNames()->isDict()) {
1197 Object obj = getNames()->dictLookup(key: "EmbeddedFiles");
1198 embeddedFileNameTree->init(xrefA: xref, tree: &obj);
1199 }
1200 }
1201
1202 return embeddedFileNameTree;
1203}
1204
1205NameTree *Catalog::getJSNameTree()
1206{
1207 if (!jsNameTree) {
1208
1209 jsNameTree = new NameTree();
1210
1211 if (getNames()->isDict()) {
1212 Object obj = getNames()->dictLookup(key: "JavaScript");
1213 jsNameTree->init(xrefA: xref, tree: &obj);
1214 }
1215 }
1216
1217 return jsNameTree;
1218}
1219
1220std::unique_ptr<LinkAction> Catalog::getAdditionalAction(DocumentAdditionalActionsType type)
1221{
1222 Object additionalActionsObject = additionalActions.fetch(xref: doc->getXRef());
1223 if (additionalActionsObject.isDict()) {
1224 const char *key = (type == actionCloseDocument ? "WC"
1225 : type == actionSaveDocumentStart ? "WS"
1226 : type == actionSaveDocumentFinish ? "DS"
1227 : type == actionPrintDocumentStart ? "WP"
1228 : type == actionPrintDocumentFinish ? "DP"
1229 : nullptr);
1230
1231 Object actionObject = additionalActionsObject.dictLookup(key);
1232 if (actionObject.isDict()) {
1233 return LinkAction::parseAction(obj: &actionObject, baseURI: doc->getCatalog()->getBaseURI());
1234 }
1235 }
1236 return nullptr;
1237}
1238

source code of poppler/poppler/Catalog.cc