1//========================================================================
2//
3// Catalog.h
4//
5// Copyright 1996-2007 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com>
17// Copyright (C) 2005, 2007, 2009-2011, 2013, 2017-2023 Albert Astals Cid <aacid@kde.org>
18// Copyright (C) 2005 Jonathan Blandford <jrb@redhat.com>
19// Copyright (C) 2005, 2006, 2008 Brad Hards <bradh@frogmouth.net>
20// Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
21// Copyright (C) 2008, 2011 Pino Toscano <pino@kde.org>
22// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
23// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
24// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
25// Copyright (C) 2013 Adrian Perez de Castro <aperez@igalia.com>
26// Copyright (C) 2013, 2017 Adrian Johnson <ajohnson@redneon.com>
27// Copyright (C) 2013 José Aliste <jaliste@src.gnome.org>
28// Copyright (C) 2016 Masamichi Hosoda <trueroad@trueroad.jp>
29// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
30// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
31// Copyright (C) 2020 Oliver Sander <oliver.sander@tu-dresden.de>
32// Copyright (C) 2020 Katarina Behrens <Katarina.Behrens@cib.de>
33// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by Technische Universität Dresden
34// Copyright (C) 2021 RM <rm+git@arcsin.org>
35//
36// To see a description of the changes please see the Changelog file that
37// came with your tarball or type make ChangeLog if you are building from git
38//
39//========================================================================
40
41#ifndef CATALOG_H
42#define CATALOG_H
43
44#include "poppler-config.h"
45#include "poppler_private_export.h"
46#include "Object.h"
47#include "Link.h"
48
49#include <memory>
50#include <optional>
51#include <vector>
52
53class PDFDoc;
54class XRef;
55class Object;
56class Page;
57class PageAttrs;
58struct Ref;
59class PageLabelInfo;
60class Form;
61class OCGs;
62class ViewerPreferences;
63class FileSpec;
64class StructTreeRoot;
65
66//------------------------------------------------------------------------
67// NameTree
68//------------------------------------------------------------------------
69
70class POPPLER_PRIVATE_EXPORT NameTree
71{
72public:
73 NameTree();
74 ~NameTree();
75
76 NameTree(const NameTree &) = delete;
77 NameTree &operator=(const NameTree &) = delete;
78
79 void init(XRef *xref, Object *tree);
80 Object lookup(const GooString *name);
81 int numEntries() { return length; };
82 // iterator accessor, note it returns a pointer to the internal object, do not free nor delete it
83 Object *getValue(int i);
84 const GooString *getName(int i) const;
85
86private:
87 struct Entry
88 {
89 Entry(Array *array, int index);
90 ~Entry();
91 GooString name;
92 Object value;
93 static int cmpEntry(const void *voidEntry, const void *voidOtherEntry);
94 static int cmp(const void *key, const void *entry);
95 };
96
97 void parse(const Object *tree, RefRecursionChecker &seen);
98 void addEntry(Entry *entry);
99
100 XRef *xref;
101 Entry **entries;
102 int size, length; // size is the number of entries in
103 // the array of Entry*
104 // length is the number of real Entry
105};
106
107//------------------------------------------------------------------------
108// Catalog
109//------------------------------------------------------------------------
110
111class POPPLER_PRIVATE_EXPORT Catalog
112{
113public:
114 // Constructor.
115 explicit Catalog(PDFDoc *docA);
116
117 // Destructor.
118 ~Catalog();
119
120 Catalog(const Catalog &) = delete;
121 Catalog &operator=(const Catalog &) = delete;
122
123 // Is catalog valid?
124 bool isOk() { return ok; }
125
126 // Get number of pages.
127 int getNumPages();
128
129 // Get a page.
130 Page *getPage(int i);
131
132 // Get the reference for a page object.
133 Ref *getPageRef(int i);
134
135 // Return base URI, or NULL if none.
136 const std::optional<std::string> &getBaseURI() const { return baseURI; }
137
138 // Return the contents of the metadata stream, or NULL if there is
139 // no metadata.
140 std::unique_ptr<GooString> readMetadata();
141
142 // Return the structure tree root object.
143 StructTreeRoot *getStructTreeRoot();
144
145 // Return values from the MarkInfo dictionary as flags in a bitfield.
146 enum MarkInfoFlags
147 {
148 markInfoNull = 1 << 0,
149 markInfoMarked = 1 << 1,
150 markInfoUserProperties = 1 << 2,
151 markInfoSuspects = 1 << 3,
152 };
153 unsigned int getMarkInfo();
154
155 // Find a page, given its object ID. Returns page number, or 0 if
156 // not found.
157 int findPage(const Ref pageRef);
158
159 // Find a named destination. Returns the link destination, or
160 // NULL if <name> is not a destination.
161 std::unique_ptr<LinkDest> findDest(const GooString *name);
162
163 Object *getDests();
164
165 // Get the number of named destinations in name-dict
166 int numDests();
167
168 // Get the i'th named destination name in name-dict
169 const char *getDestsName(int i);
170
171 // Get the i'th named destination link destination in name-dict
172 std::unique_ptr<LinkDest> getDestsDest(int i);
173
174 // Get the number of named destinations in name-tree
175 int numDestNameTree() { return getDestNameTree()->numEntries(); }
176
177 // Get the i'th named destination name in name-tree
178 const GooString *getDestNameTreeName(int i) { return getDestNameTree()->getName(i); }
179
180 // Get the i'th named destination link destination in name-tree
181 std::unique_ptr<LinkDest> getDestNameTreeDest(int i);
182
183 // Get the number of embedded files
184 int numEmbeddedFiles() { return getEmbeddedFileNameTree()->numEntries(); }
185
186 // Get the i'th file embedded (at the Document level) in the document
187 std::unique_ptr<FileSpec> embeddedFile(int i);
188
189 // Is there an embedded file with the given name?
190 bool hasEmbeddedFile(const std::string &fileName);
191
192 // Adds and embeddedFile
193 // If there is already an existing embedded file with the given fileName
194 // it gets replaced, if that's not what you want check hasEmbeddedFile first
195 void addEmbeddedFile(GooFile *file, const std::string &fileName);
196
197 // Get the number of javascript scripts
198 int numJS() { return getJSNameTree()->numEntries(); }
199 const GooString *getJSName(int i) { return getJSNameTree()->getName(i); }
200
201 // Get the i'th JavaScript script (at the Document level) in the document
202 GooString *getJS(int i);
203
204 // Convert between page indices and page labels.
205 bool labelToIndex(GooString *label, int *index);
206 bool indexToLabel(int index, GooString *label);
207
208 Object *getOutline();
209 // returns the existing outline or new one if it doesn't exist
210 Object *getCreateOutline();
211
212 Object *getAcroForm() { return &acroForm; }
213 void addFormToAcroForm(const Ref formRef);
214 void removeFormFromAcroForm(const Ref formRef);
215 void setAcroFormModified();
216
217 OCGs *getOptContentConfig() { return optContent; }
218
219 int getPDFMajorVersion() const { return catalogPdfMajorVersion; }
220 int getPDFMinorVersion() const { return catalogPdfMinorVersion; }
221
222 enum FormType
223 {
224 NoForm,
225 AcroForm,
226 XfaForm
227 };
228
229 FormType getFormType();
230 // This can return nullptr if the document is in a very damaged state
231 Form *getCreateForm();
232 Form *getForm();
233
234 ViewerPreferences *getViewerPreferences();
235
236 enum PageMode
237 {
238 pageModeNone,
239 pageModeOutlines,
240 pageModeThumbs,
241 pageModeFullScreen,
242 pageModeOC,
243 pageModeAttach,
244 pageModeNull
245 };
246 enum PageLayout
247 {
248 pageLayoutNone,
249 pageLayoutSinglePage,
250 pageLayoutOneColumn,
251 pageLayoutTwoColumnLeft,
252 pageLayoutTwoColumnRight,
253 pageLayoutTwoPageLeft,
254 pageLayoutTwoPageRight,
255 pageLayoutNull
256 };
257
258 // Returns the page mode.
259 PageMode getPageMode();
260 PageLayout getPageLayout();
261
262 enum DocumentAdditionalActionsType
263 {
264 actionCloseDocument, ///< Performed before closing the document
265 actionSaveDocumentStart, ///< Performed before saving the document
266 actionSaveDocumentFinish, ///< Performed after saving the document
267 actionPrintDocumentStart, ///< Performed before printing the document
268 actionPrintDocumentFinish, ///< Performed after printing the document
269 };
270
271 std::unique_ptr<LinkAction> getAdditionalAction(DocumentAdditionalActionsType type);
272
273private:
274 // Get page label info.
275 PageLabelInfo *getPageLabelInfo();
276
277 PDFDoc *doc;
278 XRef *xref; // the xref table for this PDF file
279 std::vector<std::pair<std::unique_ptr<Page>, Ref>> pages;
280 std::vector<Object> *pagesList;
281 std::vector<Ref> *pagesRefList;
282 std::vector<PageAttrs *> *attrsList;
283 std::vector<int> *kidsIdxList;
284 Form *form;
285 ViewerPreferences *viewerPrefs;
286 int numPages; // number of pages
287 Object dests; // named destination dictionary
288 Object names; // named names dictionary
289 NameTree *destNameTree; // named destination name-tree
290 NameTree *embeddedFileNameTree; // embedded file name-tree
291 NameTree *jsNameTree; // Java Script name-tree
292 std::optional<std::string> baseURI; // base URI for URI-type links
293 Object metadata; // metadata stream
294 StructTreeRoot *structTreeRoot; // structure tree root
295 unsigned int markInfo; // Flags from MarkInfo dictionary
296 Object outline; // outline dictionary
297 Object acroForm; // AcroForm dictionary
298 Object viewerPreferences; // ViewerPreference dictionary
299 OCGs *optContent; // Optional Content groups
300 bool ok; // true if catalog is valid
301 PageLabelInfo *pageLabelInfo; // info about page labels
302 PageMode pageMode; // page mode
303 PageLayout pageLayout; // page layout
304 Object additionalActions; // page additional actions
305
306 bool cachePageTree(int page); // Cache first <page> pages.
307 Object *findDestInTree(Object *tree, GooString *name, Object *obj);
308
309 Object *getNames();
310 NameTree *getDestNameTree();
311 NameTree *getEmbeddedFileNameTree();
312 NameTree *getJSNameTree();
313 std::unique_ptr<LinkDest> createLinkDest(Object *obj);
314
315 int catalogPdfMajorVersion = -1;
316 int catalogPdfMinorVersion = -1;
317
318 mutable std::recursive_mutex mutex;
319};
320
321#endif
322

source code of poppler/poppler/Catalog.h