1//========================================================================
2//
3// StructTreeRoot.cc
4//
5// This file is licensed under the GPLv2 or later
6//
7// Copyright 2013, 2014 Igalia S.L.
8// Copyright 2014 Fabio D'Urso <fabiodurso@hotmail.it>
9// Copyright 2017 Jan-Erik S <janerik234678@gmail.com>
10// Copyright 2017-2019, 2023 Albert Astals Cid <aacid@kde.org>
11// Copyright 2017, 2018 Adrian Johnson <ajohnson@redneon.com>
12// Copyright 2018, Adam Reichold <adam.reichold@t-online.de>
13//
14//========================================================================
15
16#include "goo/GooString.h"
17#include "StructTreeRoot.h"
18#include "StructElement.h"
19#include "PDFDoc.h"
20#include "Object.h"
21#include "Dict.h"
22#include <set>
23#include <cassert>
24
25StructTreeRoot::StructTreeRoot(PDFDoc *docA, Dict *structTreeRootDict) : doc(docA)
26{
27 assert(doc);
28 assert(structTreeRootDict);
29 parse(rootDict: structTreeRootDict);
30}
31
32StructTreeRoot::~StructTreeRoot()
33{
34 for (StructElement *element : elements) {
35 delete element;
36 }
37}
38
39void StructTreeRoot::parse(Dict *root)
40{
41 // The RoleMap/ClassMap dictionaries are needed by all the parsing
42 // functions, which will resolve the custom names to canonical
43 // standard names.
44 roleMap = root->lookup(key: "RoleMap");
45 classMap = root->lookup(key: "ClassMap");
46
47 // ParentTree (optional). If present, it must be a number tree,
48 // otherwise it is not possible to map stream objects to their
49 // corresponding structure element. Here only the references are
50 // loaded into the array, the pointers to the StructElements will
51 // be filled-in later when parsing them.
52 const Object parentTreeObj = root->lookup(key: "ParentTree");
53 if (parentTreeObj.isDict()) {
54 parseNumberTreeNode(node: parentTreeObj.getDict());
55 }
56
57 RefRecursionChecker seenElements;
58
59 // Parse the children StructElements
60 const bool marked = doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked;
61 Object kids = root->lookup(key: "K");
62 if (kids.isArray()) {
63 if (marked && kids.arrayGetLength() > 1) {
64 error(category: errSyntaxWarning, pos: -1, msg: "K in StructTreeRoot has more than one children in a tagged PDF");
65 }
66 for (int i = 0; i < kids.arrayGetLength(); i++) {
67 const Object &ref = kids.arrayGetNF(i);
68 if (ref.isRef()) {
69 seenElements.insert(ref: ref.getRef());
70 }
71 Object obj = kids.arrayGet(i);
72 if (obj.isDict()) {
73 StructElement *child = new StructElement(obj.getDict(), this, nullptr, seenElements);
74 if (child->isOk()) {
75 if (marked && !(child->getType() == StructElement::Document || child->getType() == StructElement::Part || child->getType() == StructElement::Art || child->getType() == StructElement::Div)) {
76 error(category: errSyntaxWarning, pos: -1, msg: "StructTreeRoot element of tagged PDF is wrong type ({0:s})", child->getTypeName());
77 }
78 appendChild(element: child);
79 if (ref.isRef()) {
80 parentTreeAdd(objectRef: ref.getRef(), element: child);
81 }
82 } else {
83 error(category: errSyntaxWarning, pos: -1, msg: "StructTreeRoot element could not be parsed");
84 delete child;
85 }
86 } else {
87 error(category: errSyntaxWarning, pos: -1, msg: "K has a child of wrong type ({0:s})", obj.getTypeName());
88 }
89 }
90 } else if (kids.isDict()) {
91 StructElement *child = new StructElement(kids.getDict(), this, nullptr, seenElements);
92 if (child->isOk()) {
93 appendChild(element: child);
94 const Object &ref = root->lookupNF(key: "K");
95 if (ref.isRef()) {
96 parentTreeAdd(objectRef: ref.getRef(), element: child);
97 }
98 } else {
99 error(category: errSyntaxWarning, pos: -1, msg: "StructTreeRoot element could not be parsed");
100 delete child;
101 }
102 } else if (!kids.isNull()) {
103 error(category: errSyntaxWarning, pos: -1, msg: "K in StructTreeRoot is wrong type ({0:s})", kids.getTypeName());
104 }
105
106 // refToParentMap is only used during parsing. Ensure all memory used by it is freed.
107 std::multimap<Ref, Parent *>().swap(x&: refToParentMap);
108}
109
110void StructTreeRoot::parseNumberTreeNode(Dict *node)
111{
112 Object kids = node->lookup(key: "Kids");
113 if (kids.isArray()) {
114 for (int i = 0; i < kids.arrayGetLength(); i++) {
115 Object obj = kids.arrayGet(i);
116 if (obj.isDict()) {
117 parseNumberTreeNode(node: obj.getDict());
118 } else {
119 error(category: errSyntaxError, pos: -1, msg: "Kids item at position {0:d} is wrong type ({1:s})", i, obj.getTypeName());
120 }
121 }
122 return;
123 } else if (!kids.isNull()) {
124 error(category: errSyntaxError, pos: -1, msg: "Kids object is wrong type ({0:s})", kids.getTypeName());
125 }
126
127 Object nums = node->lookup(key: "Nums");
128 if (nums.isArray()) {
129 if (nums.arrayGetLength() % 2 == 0) {
130 // keys in even positions, references in odd ones
131 for (int i = 0; i < nums.arrayGetLength(); i += 2) {
132 Object key = nums.arrayGet(i);
133
134 if (!key.isInt()) {
135 error(category: errSyntaxError, pos: -1, msg: "Nums item at position {0:d} is wrong type ({1:s})", i, key.getTypeName());
136 continue;
137 }
138 int keyVal = key.getInt();
139 std::vector<Parent> &vec = parentTree[keyVal];
140
141 Object valueArray = nums.arrayGet(i: i + 1);
142 if (valueArray.isArray()) {
143 vec.resize(new_size: valueArray.arrayGetLength());
144 for (int j = 0; j < valueArray.arrayGetLength(); j++) {
145 const Object &itemvalue = valueArray.arrayGetNF(i: j);
146 if (itemvalue.isRef()) {
147 Ref ref = itemvalue.getRef();
148 vec[j].ref = ref;
149 refToParentMap.insert(x: std::pair<Ref, Parent *>(ref, &vec[j]));
150 } else if (!itemvalue.isNull()) {
151 error(category: errSyntaxError, pos: -1, msg: "Nums array item at position {0:d}/{1:d} is invalid type ({2:s})", i, j, itemvalue.getTypeName());
152 }
153 }
154 } else {
155 const Object &valueRef = nums.arrayGetNF(i: i + 1);
156 if (valueRef.isRef()) {
157 Ref ref = valueRef.getRef();
158 vec.resize(new_size: 1);
159 vec[0].ref = ref;
160 refToParentMap.insert(x: std::pair<Ref, Parent *>(ref, &vec[0]));
161 } else {
162 error(category: errSyntaxError, pos: -1, msg: "Nums item at position {0:d} is wrong type ({1:s})", i + 1, valueRef.getTypeName());
163 }
164 }
165 }
166 } else {
167 error(category: errSyntaxError, pos: -1, msg: "Nums array length is not a even ({0:d})", nums.arrayGetLength());
168 }
169 } else {
170 error(category: errSyntaxError, pos: -1, msg: "Nums object is wrong type ({0:s})", nums.getTypeName());
171 }
172}
173
174void StructTreeRoot::parentTreeAdd(const Ref objectRef, StructElement *element)
175{
176 auto range = refToParentMap.equal_range(x: objectRef);
177 for (auto it = range.first; it != range.second; ++it) {
178 it->second->element = element;
179 }
180}
181

source code of poppler/poppler/StructTreeRoot.cc