1 | //======================================================================== |
2 | // |
3 | // StructTreeRoot.cc |
4 | // |
5 | // This file is licensed under the GPLv2 or later |
6 | // |
7 | // Copyright 2013, 2014 Igalia S.L. |
8 | // Copyright 2014 Fabio D'Urso <fabiodurso@hotmail.it> |
9 | // Copyright 2017 Jan-Erik S <janerik234678@gmail.com> |
10 | // Copyright 2017-2019, 2023 Albert Astals Cid <aacid@kde.org> |
11 | // Copyright 2017, 2018 Adrian Johnson <ajohnson@redneon.com> |
12 | // Copyright 2018, Adam Reichold <adam.reichold@t-online.de> |
13 | // |
14 | //======================================================================== |
15 | |
16 | #include "goo/GooString.h" |
17 | #include "StructTreeRoot.h" |
18 | #include "StructElement.h" |
19 | #include "PDFDoc.h" |
20 | #include "Object.h" |
21 | #include "Dict.h" |
22 | #include <set> |
23 | #include <cassert> |
24 | |
25 | StructTreeRoot::StructTreeRoot(PDFDoc *docA, Dict *structTreeRootDict) : doc(docA) |
26 | { |
27 | assert(doc); |
28 | assert(structTreeRootDict); |
29 | parse(rootDict: structTreeRootDict); |
30 | } |
31 | |
32 | StructTreeRoot::~StructTreeRoot() |
33 | { |
34 | for (StructElement *element : elements) { |
35 | delete element; |
36 | } |
37 | } |
38 | |
39 | void StructTreeRoot::parse(Dict *root) |
40 | { |
41 | // The RoleMap/ClassMap dictionaries are needed by all the parsing |
42 | // functions, which will resolve the custom names to canonical |
43 | // standard names. |
44 | roleMap = root->lookup(key: "RoleMap" ); |
45 | classMap = root->lookup(key: "ClassMap" ); |
46 | |
47 | // ParentTree (optional). If present, it must be a number tree, |
48 | // otherwise it is not possible to map stream objects to their |
49 | // corresponding structure element. Here only the references are |
50 | // loaded into the array, the pointers to the StructElements will |
51 | // be filled-in later when parsing them. |
52 | const Object parentTreeObj = root->lookup(key: "ParentTree" ); |
53 | if (parentTreeObj.isDict()) { |
54 | parseNumberTreeNode(node: parentTreeObj.getDict()); |
55 | } |
56 | |
57 | RefRecursionChecker seenElements; |
58 | |
59 | // Parse the children StructElements |
60 | const bool marked = doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked; |
61 | Object kids = root->lookup(key: "K" ); |
62 | if (kids.isArray()) { |
63 | if (marked && kids.arrayGetLength() > 1) { |
64 | error(category: errSyntaxWarning, pos: -1, msg: "K in StructTreeRoot has more than one children in a tagged PDF" ); |
65 | } |
66 | for (int i = 0; i < kids.arrayGetLength(); i++) { |
67 | const Object &ref = kids.arrayGetNF(i); |
68 | if (ref.isRef()) { |
69 | seenElements.insert(ref: ref.getRef()); |
70 | } |
71 | Object obj = kids.arrayGet(i); |
72 | if (obj.isDict()) { |
73 | StructElement *child = new StructElement(obj.getDict(), this, nullptr, seenElements); |
74 | if (child->isOk()) { |
75 | if (marked && !(child->getType() == StructElement::Document || child->getType() == StructElement::Part || child->getType() == StructElement::Art || child->getType() == StructElement::Div)) { |
76 | error(category: errSyntaxWarning, pos: -1, msg: "StructTreeRoot element of tagged PDF is wrong type ({0:s})" , child->getTypeName()); |
77 | } |
78 | appendChild(element: child); |
79 | if (ref.isRef()) { |
80 | parentTreeAdd(objectRef: ref.getRef(), element: child); |
81 | } |
82 | } else { |
83 | error(category: errSyntaxWarning, pos: -1, msg: "StructTreeRoot element could not be parsed" ); |
84 | delete child; |
85 | } |
86 | } else { |
87 | error(category: errSyntaxWarning, pos: -1, msg: "K has a child of wrong type ({0:s})" , obj.getTypeName()); |
88 | } |
89 | } |
90 | } else if (kids.isDict()) { |
91 | StructElement *child = new StructElement(kids.getDict(), this, nullptr, seenElements); |
92 | if (child->isOk()) { |
93 | appendChild(element: child); |
94 | const Object &ref = root->lookupNF(key: "K" ); |
95 | if (ref.isRef()) { |
96 | parentTreeAdd(objectRef: ref.getRef(), element: child); |
97 | } |
98 | } else { |
99 | error(category: errSyntaxWarning, pos: -1, msg: "StructTreeRoot element could not be parsed" ); |
100 | delete child; |
101 | } |
102 | } else if (!kids.isNull()) { |
103 | error(category: errSyntaxWarning, pos: -1, msg: "K in StructTreeRoot is wrong type ({0:s})" , kids.getTypeName()); |
104 | } |
105 | |
106 | // refToParentMap is only used during parsing. Ensure all memory used by it is freed. |
107 | std::multimap<Ref, Parent *>().swap(x&: refToParentMap); |
108 | } |
109 | |
110 | void StructTreeRoot::parseNumberTreeNode(Dict *node) |
111 | { |
112 | Object kids = node->lookup(key: "Kids" ); |
113 | if (kids.isArray()) { |
114 | for (int i = 0; i < kids.arrayGetLength(); i++) { |
115 | Object obj = kids.arrayGet(i); |
116 | if (obj.isDict()) { |
117 | parseNumberTreeNode(node: obj.getDict()); |
118 | } else { |
119 | error(category: errSyntaxError, pos: -1, msg: "Kids item at position {0:d} is wrong type ({1:s})" , i, obj.getTypeName()); |
120 | } |
121 | } |
122 | return; |
123 | } else if (!kids.isNull()) { |
124 | error(category: errSyntaxError, pos: -1, msg: "Kids object is wrong type ({0:s})" , kids.getTypeName()); |
125 | } |
126 | |
127 | Object nums = node->lookup(key: "Nums" ); |
128 | if (nums.isArray()) { |
129 | if (nums.arrayGetLength() % 2 == 0) { |
130 | // keys in even positions, references in odd ones |
131 | for (int i = 0; i < nums.arrayGetLength(); i += 2) { |
132 | Object key = nums.arrayGet(i); |
133 | |
134 | if (!key.isInt()) { |
135 | error(category: errSyntaxError, pos: -1, msg: "Nums item at position {0:d} is wrong type ({1:s})" , i, key.getTypeName()); |
136 | continue; |
137 | } |
138 | int keyVal = key.getInt(); |
139 | std::vector<Parent> &vec = parentTree[keyVal]; |
140 | |
141 | Object valueArray = nums.arrayGet(i: i + 1); |
142 | if (valueArray.isArray()) { |
143 | vec.resize(new_size: valueArray.arrayGetLength()); |
144 | for (int j = 0; j < valueArray.arrayGetLength(); j++) { |
145 | const Object &itemvalue = valueArray.arrayGetNF(i: j); |
146 | if (itemvalue.isRef()) { |
147 | Ref ref = itemvalue.getRef(); |
148 | vec[j].ref = ref; |
149 | refToParentMap.insert(x: std::pair<Ref, Parent *>(ref, &vec[j])); |
150 | } else if (!itemvalue.isNull()) { |
151 | error(category: errSyntaxError, pos: -1, msg: "Nums array item at position {0:d}/{1:d} is invalid type ({2:s})" , i, j, itemvalue.getTypeName()); |
152 | } |
153 | } |
154 | } else { |
155 | const Object &valueRef = nums.arrayGetNF(i: i + 1); |
156 | if (valueRef.isRef()) { |
157 | Ref ref = valueRef.getRef(); |
158 | vec.resize(new_size: 1); |
159 | vec[0].ref = ref; |
160 | refToParentMap.insert(x: std::pair<Ref, Parent *>(ref, &vec[0])); |
161 | } else { |
162 | error(category: errSyntaxError, pos: -1, msg: "Nums item at position {0:d} is wrong type ({1:s})" , i + 1, valueRef.getTypeName()); |
163 | } |
164 | } |
165 | } |
166 | } else { |
167 | error(category: errSyntaxError, pos: -1, msg: "Nums array length is not a even ({0:d})" , nums.arrayGetLength()); |
168 | } |
169 | } else { |
170 | error(category: errSyntaxError, pos: -1, msg: "Nums object is wrong type ({0:s})" , nums.getTypeName()); |
171 | } |
172 | } |
173 | |
174 | void StructTreeRoot::parentTreeAdd(const Ref objectRef, StructElement *element) |
175 | { |
176 | auto range = refToParentMap.equal_range(x: objectRef); |
177 | for (auto it = range.first; it != range.second; ++it) { |
178 | it->second->element = element; |
179 | } |
180 | } |
181 | |