1 | //======================================================================== |
2 | // |
3 | // StructElement.cc |
4 | // |
5 | // This file is licensed under the GPLv2 or later |
6 | // |
7 | // Copyright 2013, 2014 Igalia S.L. |
8 | // Copyright 2014 Luigi Scarso <luigi.scarso@gmail.com> |
9 | // Copyright 2014, 2017-2019, 2021, 2023 Albert Astals Cid <aacid@kde.org> |
10 | // Copyright 2015 Dmytro Morgun <lztoad@gmail.com> |
11 | // Copyright 2018, 2021, 2023 Adrian Johnson <ajohnson@redneon.com> |
12 | // Copyright 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
13 | // Copyright 2018 Adam Reichold <adam.reichold@t-online.de> |
14 | // |
15 | //======================================================================== |
16 | |
17 | #include "StructElement.h" |
18 | #include "StructTreeRoot.h" |
19 | #include "GlobalParams.h" |
20 | #include "UnicodeMap.h" |
21 | #include "PDFDoc.h" |
22 | #include "Dict.h" |
23 | |
24 | #include <cassert> |
25 | |
26 | class GfxState; |
27 | |
28 | static bool isPlacementName(Object *value) |
29 | { |
30 | return value->isName(nameA: "Block" ) || value->isName(nameA: "Inline" ) || value->isName(nameA: "Before" ) || value->isName(nameA: "Start" ) || value->isName(nameA: "End" ); |
31 | } |
32 | |
33 | static bool isWritingModeName(Object *value) |
34 | { |
35 | return value->isName(nameA: "LrTb" ) || value->isName(nameA: "RlTb" ) || value->isName(nameA: "TbRl" ); |
36 | } |
37 | |
38 | static bool isBorderStyleName(Object *value) |
39 | { |
40 | return value->isName(nameA: "None" ) || value->isName(nameA: "Hidden" ) || value->isName(nameA: "Dotted" ) || value->isName(nameA: "Dashed" ) || value->isName(nameA: "Solid" ) || value->isName(nameA: "Double" ) || value->isName(nameA: "Groove" ) || value->isName(nameA: "Ridge" ) |
41 | || value->isName(nameA: "Inset" ) || value->isName(nameA: "Outset" ); |
42 | } |
43 | |
44 | static bool isTextAlignName(Object *value) |
45 | { |
46 | return value->isName(nameA: "Start" ) || value->isName(nameA: "End" ) || value->isName(nameA: "Center" ) || value->isName(nameA: "Justify" ); |
47 | } |
48 | |
49 | static bool isBlockAlignName(Object *value) |
50 | { |
51 | return value->isName(nameA: "Before" ) || value->isName(nameA: "Middle" ) || value->isName(nameA: "After" ) || value->isName(nameA: "Justify" ); |
52 | } |
53 | |
54 | static bool isInlineAlignName(Object *value) |
55 | { |
56 | return value->isName(nameA: "Start" ) || value->isName(nameA: "End" ) || value->isName(nameA: "Center" ); |
57 | } |
58 | |
59 | static bool isNumber(Object *value) |
60 | { |
61 | return value->isNum(); |
62 | } |
63 | |
64 | static bool isLineHeight(Object *value) |
65 | { |
66 | return value->isName(nameA: "Normal" ) || value->isName(nameA: "Auto" ) || isNumber(value); |
67 | } |
68 | |
69 | static bool isTextDecorationName(Object *value) |
70 | { |
71 | return value->isName(nameA: "None" ) || value->isName(nameA: "Underline" ) || value->isName(nameA: "Overline" ) || value->isName(nameA: "LineThrough" ); |
72 | } |
73 | |
74 | static bool isRubyAlignName(Object *value) |
75 | { |
76 | return value->isName(nameA: "Start" ) || value->isName(nameA: "End" ) || value->isName(nameA: "Center" ) || value->isName(nameA: "Justify" ) || value->isName(nameA: "Distribute" ); |
77 | } |
78 | |
79 | static bool isRubyPositionName(Object *value) |
80 | { |
81 | return value->isName(nameA: "Before" ) || value->isName(nameA: "After" ) || value->isName(nameA: "Warichu" ) || value->isName(nameA: "Inline" ); |
82 | } |
83 | |
84 | static bool isGlyphOrientationName(Object *value) |
85 | { |
86 | return value->isName(nameA: "Auto" ) || value->isName(nameA: "90" ) || value->isName(nameA: "180" ) || value->isName(nameA: "270" ) || value->isName(nameA: "360" ) || value->isName(nameA: "-90" ) || value->isName(nameA: "-180" ); |
87 | } |
88 | |
89 | static bool isListNumberingName(Object *value) |
90 | { |
91 | return value->isName(nameA: "None" ) || value->isName(nameA: "Disc" ) || value->isName(nameA: "Circle" ) || value->isName(nameA: "Square" ) || value->isName(nameA: "Decimal" ) || value->isName(nameA: "UpperRoman" ) || value->isName(nameA: "LowerRoman" ) || value->isName(nameA: "UpperAlpha" ) |
92 | || value->isName(nameA: "LowerAlpha" ); |
93 | } |
94 | |
95 | static bool isFieldRoleName(Object *value) |
96 | { |
97 | return value->isName(nameA: "rb" ) || value->isName(nameA: "cb" ) || value->isName(nameA: "pb" ) || value->isName(nameA: "tv" ); |
98 | } |
99 | |
100 | static bool isFieldCheckedName(Object *value) |
101 | { |
102 | return value->isName(nameA: "on" ) || value->isName(nameA: "off" ) || value->isName(nameA: "neutral" ); |
103 | } |
104 | |
105 | static bool isTableScopeName(Object *value) |
106 | { |
107 | return value->isName(nameA: "Row" ) || value->isName(nameA: "Column" ) || value->isName(nameA: "Both" ); |
108 | } |
109 | |
110 | static bool isRGBColor(Object *value) |
111 | { |
112 | if (!(value->isArray() && value->arrayGetLength() == 3)) { |
113 | return false; |
114 | } |
115 | |
116 | bool okay = true; |
117 | for (int i = 0; i < 3; i++) { |
118 | Object obj = value->arrayGet(i); |
119 | if (!obj.isNum()) { |
120 | okay = false; |
121 | break; |
122 | } |
123 | if (obj.getNum() < 0.0 || obj.getNum() > 1.0) { |
124 | okay = false; |
125 | break; |
126 | } |
127 | } |
128 | |
129 | return okay; |
130 | } |
131 | |
132 | static bool isNatural(Object *value) |
133 | { |
134 | return (value->isInt() && value->getInt() > 0) || (value->isInt64() && value->getInt64() > 0); |
135 | } |
136 | |
137 | static bool isPositive(Object *value) |
138 | { |
139 | return value->isNum() && value->getNum() >= 0.0; |
140 | } |
141 | |
142 | static bool isNumberOrAuto(Object *value) |
143 | { |
144 | return isNumber(value) || value->isName(nameA: "Auto" ); |
145 | } |
146 | |
147 | static bool isTextString(Object *value) |
148 | { |
149 | // XXX: Shall isName() also be checked? |
150 | return value->isString(); |
151 | } |
152 | |
153 | #define ARRAY_CHECKER(name, checkItem, length, allowSingle, allowNulls) \ |
154 | static bool name(Object *value) \ |
155 | { \ |
156 | if (!value->isArray()) \ |
157 | return allowSingle ? checkItem(value) : false; \ |
158 | \ |
159 | if (length && value->arrayGetLength() != length) \ |
160 | return false; \ |
161 | \ |
162 | bool okay = true; \ |
163 | for (int i = 0; i < value->arrayGetLength(); i++) { \ |
164 | Object obj = value->arrayGet(i); \ |
165 | if ((!allowNulls && obj.isNull()) || !checkItem(&obj)) { \ |
166 | okay = false; \ |
167 | break; \ |
168 | } \ |
169 | } \ |
170 | return okay; \ |
171 | } |
172 | |
173 | ARRAY_CHECKER(isRGBColorOrOptionalArray4, isRGBColor, 4, true, true) |
174 | ARRAY_CHECKER(isPositiveOrOptionalArray4, isPositive, 4, true, true) |
175 | ARRAY_CHECKER(isPositiveOrArray4, isPositive, 4, true, false) |
176 | ARRAY_CHECKER(isBorderStyle, isBorderStyleName, 4, true, true) |
177 | ARRAY_CHECKER(isNumberArray4, isNumber, 4, false, false) |
178 | ARRAY_CHECKER(isNumberOrArrayN, isNumber, 0, true, false) |
179 | ARRAY_CHECKER(, isTextString, 0, false, false) |
180 | |
181 | // Type of functions used to do type-checking on attribute values |
182 | typedef bool (*AttributeCheckFunc)(Object *); |
183 | |
184 | // Maps attributes to their names and whether the attribute can be inherited. |
185 | struct AttributeMapEntry |
186 | { |
187 | Attribute::Type type; |
188 | const char *name; |
189 | const Object *defval; |
190 | bool inherit; |
191 | AttributeCheckFunc check; |
192 | }; |
193 | |
194 | struct AttributeDefaults |
195 | { |
196 | AttributeDefaults() {}; // needed to support old clang |
197 | |
198 | Object Inline = Object(objName, "Inline" ); |
199 | Object LrTb = Object(objName, "LrTb" ); |
200 | Object Normal = Object(objName, "Normal" ); |
201 | Object Distribute = Object(objName, "Distribute" ); |
202 | Object off = Object(objName, "off" ); |
203 | Object Zero = Object(0.0); |
204 | Object Auto = Object(objName, "Auto" ); |
205 | Object Start = Object(objName, "Start" ); |
206 | Object None = Object(objName, "None" ); |
207 | Object Before = Object(objName, "Before" ); |
208 | Object Nat1 = Object(1); |
209 | }; |
210 | |
211 | static const AttributeDefaults attributeDefaults; |
212 | |
213 | #define ATTR_LIST_END \ |
214 | { \ |
215 | Attribute::Unknown, nullptr, nullptr, false, nullptr \ |
216 | } |
217 | |
218 | #define ATTR_WITH_DEFAULT(name, inherit, check, defval) \ |
219 | { \ |
220 | Attribute::name, #name, &attributeDefaults.defval, inherit, check \ |
221 | } |
222 | |
223 | #define ATTR(name, inherit, check) \ |
224 | { \ |
225 | Attribute::name, #name, nullptr, inherit, check \ |
226 | } |
227 | |
228 | static const AttributeMapEntry attributeMapCommonShared[] = { ATTR_WITH_DEFAULT(Placement, false, isPlacementName, Inline), |
229 | ATTR_WITH_DEFAULT(WritingMode, true, isWritingModeName, LrTb), |
230 | ATTR(BackgroundColor, false, isRGBColor), |
231 | ATTR(BorderColor, true, isRGBColorOrOptionalArray4), |
232 | ATTR_WITH_DEFAULT(BorderStyle, false, isBorderStyle, None), |
233 | ATTR(BorderThickness, true, isPositiveOrOptionalArray4), |
234 | ATTR_WITH_DEFAULT(Padding, false, isPositiveOrArray4, Zero), |
235 | ATTR(Color, true, isRGBColor), |
236 | ATTR_LIST_END }; |
237 | |
238 | static const AttributeMapEntry attributeMapCommonBlock[] = { ATTR_WITH_DEFAULT(SpaceBefore, false, isPositive, Zero), |
239 | ATTR_WITH_DEFAULT(SpaceAfter, false, isPositive, Zero), |
240 | ATTR_WITH_DEFAULT(StartIndent, true, isNumber, Zero), |
241 | ATTR_WITH_DEFAULT(EndIndent, true, isNumber, Zero), |
242 | ATTR_WITH_DEFAULT(TextIndent, true, isNumber, Zero), |
243 | ATTR_WITH_DEFAULT(TextAlign, true, isTextAlignName, Start), |
244 | ATTR(BBox, false, isNumberArray4), |
245 | ATTR_WITH_DEFAULT(Width, false, isNumberOrAuto, Auto), |
246 | ATTR_WITH_DEFAULT(Height, false, isNumberOrAuto, Auto), |
247 | ATTR_WITH_DEFAULT(BlockAlign, true, isBlockAlignName, Before), |
248 | ATTR_WITH_DEFAULT(InlineAlign, true, isInlineAlignName, Start), |
249 | ATTR_LIST_END }; |
250 | |
251 | static const AttributeMapEntry attributeMapCommonInline[] = { ATTR_WITH_DEFAULT(BaselineShift, false, isNumber, Zero), |
252 | ATTR_WITH_DEFAULT(LineHeight, true, isLineHeight, Normal), |
253 | ATTR(TextDecorationColor, true, isRGBColor), |
254 | ATTR(TextDecorationThickness, true, isPositive), |
255 | ATTR_WITH_DEFAULT(TextDecorationType, false, isTextDecorationName, None), |
256 | ATTR_WITH_DEFAULT(GlyphOrientationVertical, true, isGlyphOrientationName, Auto), |
257 | ATTR_LIST_END }; |
258 | |
259 | static const AttributeMapEntry attributeMapCommonRubyText[] = { ATTR_WITH_DEFAULT(RubyPosition, true, isRubyPositionName, Before), ATTR_WITH_DEFAULT(RubyAlign, true, isRubyAlignName, Distribute), ATTR_LIST_END }; |
260 | |
261 | static const AttributeMapEntry attributeMapCommonColumns[] = { ATTR_WITH_DEFAULT(ColumnCount, false, isNatural, Nat1), ATTR(ColumnGap, false, isNumberOrArrayN), ATTR(ColumnWidths, false, isNumberOrArrayN), ATTR_LIST_END }; |
262 | |
263 | static const AttributeMapEntry attributeMapCommonList[] = { ATTR_WITH_DEFAULT(ListNumbering, true, isListNumberingName, None), ATTR_LIST_END }; |
264 | |
265 | static const AttributeMapEntry attributeMapCommonPrintField[] = { ATTR(Role, false, isFieldRoleName), ATTR_WITH_DEFAULT(checked, false, isFieldCheckedName, off), ATTR(Desc, false, isTextString), ATTR_LIST_END }; |
266 | |
267 | static const AttributeMapEntry attributeMapCommonTable[] = { ATTR(Headers, false, isTableHeaders), ATTR(Scope, false, isTableScopeName), ATTR(Summary, false, isTextString), ATTR_LIST_END }; |
268 | |
269 | static const AttributeMapEntry attributeMapCommonTableCell[] = { ATTR_WITH_DEFAULT(RowSpan, false, isNatural, Nat1), ATTR_WITH_DEFAULT(ColSpan, false, isNatural, Nat1), ATTR_WITH_DEFAULT(TBorderStyle, true, isBorderStyle, None), |
270 | ATTR_WITH_DEFAULT(TPadding, true, isPositiveOrArray4, Zero), ATTR_LIST_END }; |
271 | |
272 | #undef ATTR_WITH_DEFAULT |
273 | #undef ATTR |
274 | |
275 | static const AttributeMapEntry *attributeMapAll[] = { |
276 | attributeMapCommonShared, attributeMapCommonBlock, attributeMapCommonInline, attributeMapCommonRubyText, attributeMapCommonColumns, |
277 | attributeMapCommonList, attributeMapCommonPrintField, attributeMapCommonTable, attributeMapCommonTableCell, nullptr, |
278 | }; |
279 | |
280 | static const AttributeMapEntry *attributeMapShared[] = { |
281 | attributeMapCommonShared, |
282 | nullptr, |
283 | }; |
284 | |
285 | static const AttributeMapEntry *attributeMapBlock[] = { |
286 | attributeMapCommonShared, |
287 | attributeMapCommonBlock, |
288 | nullptr, |
289 | }; |
290 | |
291 | static const AttributeMapEntry *attributeMapInline[] = { |
292 | attributeMapCommonShared, |
293 | attributeMapCommonInline, |
294 | nullptr, |
295 | }; |
296 | |
297 | static const AttributeMapEntry *attributeMapTableCell[] = { |
298 | attributeMapCommonShared, attributeMapCommonBlock, attributeMapCommonTable, attributeMapCommonTableCell, nullptr, |
299 | }; |
300 | |
301 | static const AttributeMapEntry *attributeMapRubyText[] = { |
302 | attributeMapCommonShared, |
303 | attributeMapCommonInline, |
304 | attributeMapCommonRubyText, |
305 | nullptr, |
306 | }; |
307 | |
308 | static const AttributeMapEntry *attributeMapColumns[] = { |
309 | attributeMapCommonShared, |
310 | attributeMapCommonInline, |
311 | attributeMapCommonColumns, |
312 | nullptr, |
313 | }; |
314 | |
315 | static const AttributeMapEntry *attributeMapList[] = { |
316 | attributeMapCommonShared, |
317 | attributeMapCommonList, |
318 | nullptr, |
319 | }; |
320 | |
321 | static const AttributeMapEntry *attributeMapTable[] = { |
322 | attributeMapCommonShared, |
323 | attributeMapCommonBlock, |
324 | attributeMapCommonTable, |
325 | nullptr, |
326 | }; |
327 | |
328 | static const AttributeMapEntry *attributeMapIllustration[] = { |
329 | // XXX: Illustrations may have some attributes from the "shared", "inline", |
330 | // the "block" sets. This is a loose specification; making it better |
331 | // means duplicating entries from the sets. This seems good enough... |
332 | attributeMapCommonShared, |
333 | attributeMapCommonBlock, |
334 | attributeMapCommonInline, |
335 | nullptr, |
336 | }; |
337 | |
338 | // Table mapping owners of attributes to their names. |
339 | static const struct OwnerMapEntry |
340 | { |
341 | Attribute::Owner owner; |
342 | const char *name; |
343 | } ownerMap[] = { |
344 | // XXX: Those are sorted in the owner priority resolution order. If the |
345 | // same attribute is defined with two owners, the order in the table |
346 | // can be used to know which one has more priority. |
347 | { .owner: Attribute::XML_1_00, .name: "XML-1.00" }, { .owner: Attribute::HTML_3_20, .name: "HTML-3.20" }, { .owner: Attribute::HTML_4_01, .name: "HTML-4.01" }, { .owner: Attribute::OEB_1_00, .name: "OEB-1.00" }, |
348 | { .owner: Attribute::RTF_1_05, .name: "RTF-1.05" }, { .owner: Attribute::CSS_1_00, .name: "CSS-1.00" }, { .owner: Attribute::CSS_2_00, .name: "CSS-2.00" }, { .owner: Attribute::Layout, .name: "Layout" }, |
349 | { .owner: Attribute::PrintField, .name: "PrintField" }, { .owner: Attribute::Table, .name: "Table" }, { .owner: Attribute::List, .name: "List" }, { .owner: Attribute::UserProperties, .name: "UserProperties" }, |
350 | }; |
351 | |
352 | static bool ownerHasMorePriority(Attribute::Owner a, Attribute::Owner b) |
353 | { |
354 | size_t aIndex, bIndex, i; |
355 | |
356 | for (i = aIndex = bIndex = 0; i < sizeof(ownerMap) / sizeof(ownerMap[0]); i++) { |
357 | if (ownerMap[i].owner == a) { |
358 | aIndex = i; |
359 | } |
360 | if (ownerMap[i].owner == b) { |
361 | bIndex = i; |
362 | } |
363 | } |
364 | |
365 | return aIndex < bIndex; |
366 | } |
367 | |
368 | // Maps element types to their names and also serves as lookup table |
369 | // for additional element type attributes. |
370 | |
371 | enum ElementType |
372 | { |
373 | elementTypeUndefined, |
374 | elementTypeGrouping, |
375 | elementTypeInline, |
376 | elementTypeBlock, |
377 | }; |
378 | |
379 | static const struct TypeMapEntry |
380 | { |
381 | StructElement::Type type; |
382 | const char *name; |
383 | ElementType elementType; |
384 | const AttributeMapEntry **attributes; |
385 | } typeMap[] = { |
386 | { .type: StructElement::Document, .name: "Document" , .elementType: elementTypeGrouping, .attributes: attributeMapShared }, |
387 | { .type: StructElement::Part, .name: "Part" , .elementType: elementTypeGrouping, .attributes: attributeMapShared }, |
388 | { .type: StructElement::Art, .name: "Art" , .elementType: elementTypeGrouping, .attributes: attributeMapColumns }, |
389 | { .type: StructElement::Sect, .name: "Sect" , .elementType: elementTypeGrouping, .attributes: attributeMapColumns }, |
390 | { .type: StructElement::Div, .name: "Div" , .elementType: elementTypeGrouping, .attributes: attributeMapColumns }, |
391 | { .type: StructElement::BlockQuote, .name: "BlockQuote" , .elementType: elementTypeGrouping, .attributes: attributeMapInline }, |
392 | { .type: StructElement::Caption, .name: "Caption" , .elementType: elementTypeGrouping, .attributes: attributeMapInline }, |
393 | { .type: StructElement::NonStruct, .name: "NonStruct" , .elementType: elementTypeGrouping, .attributes: attributeMapInline }, |
394 | { .type: StructElement::Index, .name: "Index" , .elementType: elementTypeGrouping, .attributes: attributeMapInline }, |
395 | { .type: StructElement::Private, .name: "Private" , .elementType: elementTypeGrouping, .attributes: attributeMapInline }, |
396 | { .type: StructElement::Span, .name: "Span" , .elementType: elementTypeInline, .attributes: attributeMapInline }, |
397 | { .type: StructElement::Quote, .name: "Quote" , .elementType: elementTypeInline, .attributes: attributeMapInline }, |
398 | { .type: StructElement::Note, .name: "Note" , .elementType: elementTypeInline, .attributes: attributeMapInline }, |
399 | { .type: StructElement::Reference, .name: "Reference" , .elementType: elementTypeInline, .attributes: attributeMapInline }, |
400 | { .type: StructElement::BibEntry, .name: "BibEntry" , .elementType: elementTypeInline, .attributes: attributeMapInline }, |
401 | { .type: StructElement::Code, .name: "Code" , .elementType: elementTypeInline, .attributes: attributeMapInline }, |
402 | { .type: StructElement::Link, .name: "Link" , .elementType: elementTypeInline, .attributes: attributeMapInline }, |
403 | { .type: StructElement::Annot, .name: "Annot" , .elementType: elementTypeInline, .attributes: attributeMapInline }, |
404 | { .type: StructElement::Ruby, .name: "Ruby" , .elementType: elementTypeInline, .attributes: attributeMapRubyText }, |
405 | { .type: StructElement::RB, .name: "RB" , .elementType: elementTypeUndefined, .attributes: attributeMapRubyText }, |
406 | { .type: StructElement::RT, .name: "RT" , .elementType: elementTypeUndefined, .attributes: attributeMapRubyText }, |
407 | { .type: StructElement::RP, .name: "RP" , .elementType: elementTypeUndefined, .attributes: attributeMapShared }, |
408 | { .type: StructElement::Warichu, .name: "Warichu" , .elementType: elementTypeInline, .attributes: attributeMapRubyText }, |
409 | { .type: StructElement::WT, .name: "WT" , .elementType: elementTypeUndefined, .attributes: attributeMapShared }, |
410 | { .type: StructElement::WP, .name: "WP" , .elementType: elementTypeUndefined, .attributes: attributeMapShared }, |
411 | { .type: StructElement::P, .name: "P" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
412 | { .type: StructElement::H, .name: "H" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
413 | { .type: StructElement::H1, .name: "H1" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
414 | { .type: StructElement::H2, .name: "H2" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
415 | { .type: StructElement::H3, .name: "H3" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
416 | { .type: StructElement::H4, .name: "H4" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
417 | { .type: StructElement::H5, .name: "H5" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
418 | { .type: StructElement::H6, .name: "H6" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
419 | { .type: StructElement::L, .name: "L" , .elementType: elementTypeBlock, .attributes: attributeMapList }, |
420 | { .type: StructElement::LI, .name: "LI" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
421 | { .type: StructElement::Lbl, .name: "Lbl" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
422 | { .type: StructElement::LBody, .name: "LBody" , .elementType: elementTypeBlock, .attributes: attributeMapBlock }, |
423 | { .type: StructElement::Table, .name: "Table" , .elementType: elementTypeBlock, .attributes: attributeMapTable }, |
424 | { .type: StructElement::TR, .name: "TR" , .elementType: elementTypeUndefined, .attributes: attributeMapShared }, |
425 | { .type: StructElement::TH, .name: "TH" , .elementType: elementTypeUndefined, .attributes: attributeMapTableCell }, |
426 | { .type: StructElement::TD, .name: "TD" , .elementType: elementTypeUndefined, .attributes: attributeMapTableCell }, |
427 | { .type: StructElement::THead, .name: "THead" , .elementType: elementTypeUndefined, .attributes: attributeMapShared }, |
428 | { .type: StructElement::TFoot, .name: "TFoot" , .elementType: elementTypeUndefined, .attributes: attributeMapShared }, |
429 | { .type: StructElement::TBody, .name: "TBody" , .elementType: elementTypeUndefined, .attributes: attributeMapShared }, |
430 | { .type: StructElement::Figure, .name: "Figure" , .elementType: elementTypeUndefined, .attributes: attributeMapIllustration }, |
431 | { .type: StructElement::Formula, .name: "Formula" , .elementType: elementTypeUndefined, .attributes: attributeMapIllustration }, |
432 | { .type: StructElement::Form, .name: "Form" , .elementType: elementTypeUndefined, .attributes: attributeMapIllustration }, |
433 | { .type: StructElement::TOC, .name: "TOC" , .elementType: elementTypeGrouping, .attributes: attributeMapShared }, |
434 | { .type: StructElement::TOCI, .name: "TOCI" , .elementType: elementTypeGrouping, .attributes: attributeMapShared }, |
435 | }; |
436 | |
437 | //------------------------------------------------------------------------ |
438 | // Helpers for the attribute and structure type tables |
439 | //------------------------------------------------------------------------ |
440 | |
441 | static inline const AttributeMapEntry *getAttributeMapEntry(const AttributeMapEntry **entryList, Attribute::Type type) |
442 | { |
443 | assert(entryList); |
444 | while (*entryList) { |
445 | const AttributeMapEntry *entry = *entryList; |
446 | while (entry->type != Attribute::Unknown) { |
447 | assert(entry->name); |
448 | if (type == entry->type) { |
449 | return entry; |
450 | } |
451 | entry++; |
452 | } |
453 | entryList++; |
454 | } |
455 | return nullptr; |
456 | } |
457 | |
458 | static inline const AttributeMapEntry *getAttributeMapEntry(const AttributeMapEntry **entryList, const char *name) |
459 | { |
460 | assert(entryList); |
461 | while (*entryList) { |
462 | const AttributeMapEntry *entry = *entryList; |
463 | while (entry->type != Attribute::Unknown) { |
464 | assert(entry->name); |
465 | if (strcmp(s1: name, s2: entry->name) == 0) { |
466 | return entry; |
467 | } |
468 | entry++; |
469 | } |
470 | entryList++; |
471 | } |
472 | return nullptr; |
473 | } |
474 | |
475 | static inline const OwnerMapEntry *getOwnerMapEntry(Attribute::Owner owner) |
476 | { |
477 | for (const OwnerMapEntry &entry : ownerMap) { |
478 | if (owner == entry.owner) { |
479 | return &entry; |
480 | } |
481 | } |
482 | return nullptr; |
483 | } |
484 | |
485 | static inline const OwnerMapEntry *getOwnerMapEntry(const char *name) |
486 | { |
487 | for (const OwnerMapEntry &entry : ownerMap) { |
488 | if (strcmp(s1: name, s2: entry.name) == 0) { |
489 | return &entry; |
490 | } |
491 | } |
492 | return nullptr; |
493 | } |
494 | |
495 | static const char *ownerToName(Attribute::Owner owner) |
496 | { |
497 | const OwnerMapEntry *entry = getOwnerMapEntry(owner); |
498 | return entry ? entry->name : "UnknownOwner" ; |
499 | } |
500 | |
501 | static Attribute::Owner nameToOwner(const char *name) |
502 | { |
503 | const OwnerMapEntry *entry = getOwnerMapEntry(name); |
504 | return entry ? entry->owner : Attribute::UnknownOwner; |
505 | } |
506 | |
507 | static inline const TypeMapEntry *getTypeMapEntry(StructElement::Type type) |
508 | { |
509 | for (const TypeMapEntry &entry : typeMap) { |
510 | if (type == entry.type) { |
511 | return &entry; |
512 | } |
513 | } |
514 | return nullptr; |
515 | } |
516 | |
517 | static inline const TypeMapEntry *getTypeMapEntry(const char *name) |
518 | { |
519 | for (const TypeMapEntry &entry : typeMap) { |
520 | if (strcmp(s1: name, s2: entry.name) == 0) { |
521 | return &entry; |
522 | } |
523 | } |
524 | return nullptr; |
525 | } |
526 | |
527 | static const char *typeToName(StructElement::Type type) |
528 | { |
529 | if (type == StructElement::MCID) { |
530 | return "MarkedContent" ; |
531 | } |
532 | if (type == StructElement::OBJR) { |
533 | return "ObjectReference" ; |
534 | } |
535 | |
536 | const TypeMapEntry *entry = getTypeMapEntry(type); |
537 | return entry ? entry->name : "Unknown" ; |
538 | } |
539 | |
540 | static StructElement::Type nameToType(const char *name) |
541 | { |
542 | const TypeMapEntry *entry = getTypeMapEntry(name); |
543 | return entry ? entry->type : StructElement::Unknown; |
544 | } |
545 | |
546 | //------------------------------------------------------------------------ |
547 | // Attribute |
548 | //------------------------------------------------------------------------ |
549 | |
550 | Attribute::Attribute(GooString &&nameA, Object *valueA) : type(UserProperty), owner(UserProperties), revision(0), name(std::move(nameA)), value(), hidden(false), formatted(nullptr) |
551 | { |
552 | assert(valueA); |
553 | value = valueA->copy(); |
554 | } |
555 | |
556 | Attribute::Attribute(Type typeA, Object *valueA) |
557 | : type(typeA), |
558 | owner(UserProperties), // TODO: Determine corresponding owner from Type |
559 | revision(0), |
560 | name(), |
561 | value(), |
562 | hidden(false), |
563 | formatted(nullptr) |
564 | { |
565 | assert(valueA); |
566 | |
567 | value = valueA->copy(); |
568 | |
569 | if (!checkType()) { |
570 | type = Unknown; |
571 | } |
572 | } |
573 | |
574 | Attribute::~Attribute() |
575 | { |
576 | delete formatted; |
577 | } |
578 | |
579 | const char *Attribute::getTypeName() const |
580 | { |
581 | if (type == UserProperty) { |
582 | return name.c_str(); |
583 | } |
584 | |
585 | const AttributeMapEntry *entry = getAttributeMapEntry(entryList: attributeMapAll, type); |
586 | if (entry) { |
587 | return entry->name; |
588 | } |
589 | |
590 | return "Unknown" ; |
591 | } |
592 | |
593 | const char *Attribute::getOwnerName() const |
594 | { |
595 | return ownerToName(owner); |
596 | } |
597 | |
598 | Object *Attribute::getDefaultValue(Attribute::Type type) |
599 | { |
600 | const AttributeMapEntry *entry = getAttributeMapEntry(entryList: attributeMapAll, type); |
601 | return entry ? const_cast<Object *>(entry->defval) : nullptr; |
602 | } |
603 | |
604 | void Attribute::setFormattedValue(const char *formattedA) |
605 | { |
606 | if (formattedA) { |
607 | if (formatted) { |
608 | formatted->Set(formattedA); |
609 | } else { |
610 | formatted = new GooString(formattedA); |
611 | } |
612 | } else { |
613 | delete formatted; |
614 | formatted = nullptr; |
615 | } |
616 | } |
617 | |
618 | bool Attribute::checkType(StructElement *element) |
619 | { |
620 | // If an element is passed, tighter type-checking can be done. |
621 | if (!element) { |
622 | return true; |
623 | } |
624 | |
625 | const TypeMapEntry *elementTypeEntry = getTypeMapEntry(type: element->getType()); |
626 | if (elementTypeEntry && elementTypeEntry->attributes) { |
627 | const AttributeMapEntry *entry = getAttributeMapEntry(entryList: elementTypeEntry->attributes, type); |
628 | if (entry) { |
629 | if (entry->check && !((*entry->check)(&value))) { |
630 | return false; |
631 | } |
632 | } else { |
633 | // No entry: the attribute is not valid for the containing element. |
634 | return false; |
635 | } |
636 | } |
637 | |
638 | return true; |
639 | } |
640 | |
641 | Attribute::Type Attribute::getTypeForName(const char *name, StructElement *element) |
642 | { |
643 | const AttributeMapEntry **attributes = attributeMapAll; |
644 | if (element) { |
645 | const TypeMapEntry *elementTypeEntry = getTypeMapEntry(type: element->getType()); |
646 | if (elementTypeEntry && elementTypeEntry->attributes) { |
647 | attributes = elementTypeEntry->attributes; |
648 | } |
649 | } |
650 | |
651 | const AttributeMapEntry *entry = getAttributeMapEntry(entryList: attributes, name); |
652 | return entry ? entry->type : Unknown; |
653 | } |
654 | |
655 | Attribute *Attribute::parseUserProperty(Dict *property) |
656 | { |
657 | Object obj, value; |
658 | GooString name; |
659 | |
660 | obj = property->lookup(key: "N" ); |
661 | if (obj.isString()) { |
662 | name.Set(obj.getString()); |
663 | } else if (obj.isName()) { |
664 | name.Set(obj.getName()); |
665 | } else { |
666 | error(category: errSyntaxError, pos: -1, msg: "N object is wrong type ({0:s})" , obj.getTypeName()); |
667 | return nullptr; |
668 | } |
669 | |
670 | value = property->lookup(key: "V" ); |
671 | if (value.isNull()) { |
672 | error(category: errSyntaxError, pos: -1, msg: "V object is wrong type ({0:s})" , value.getTypeName()); |
673 | return nullptr; |
674 | } |
675 | |
676 | Attribute *attribute = new Attribute(std::move(name), &value); |
677 | obj = property->lookup(key: "F" ); |
678 | if (obj.isString()) { |
679 | attribute->setFormattedValue(obj.getString()->c_str()); |
680 | } else if (!obj.isNull()) { |
681 | error(category: errSyntaxWarning, pos: -1, msg: "F object is wrong type ({0:s})" , obj.getTypeName()); |
682 | } |
683 | |
684 | obj = property->lookup(key: "H" ); |
685 | if (obj.isBool()) { |
686 | attribute->setHidden(obj.getBool()); |
687 | } else if (!obj.isNull()) { |
688 | error(category: errSyntaxWarning, pos: -1, msg: "H object is wrong type ({0:s})" , obj.getTypeName()); |
689 | } |
690 | |
691 | return attribute; |
692 | } |
693 | |
694 | //------------------------------------------------------------------------ |
695 | // StructElement |
696 | //------------------------------------------------------------------------ |
697 | |
698 | StructElement::StructData::StructData() : altText(nullptr), actualText(nullptr), id(nullptr), title(nullptr), expandedAbbr(nullptr), language(nullptr), revision(0) { } |
699 | |
700 | StructElement::StructData::~StructData() |
701 | { |
702 | delete altText; |
703 | delete actualText; |
704 | delete id; |
705 | delete title; |
706 | delete language; |
707 | for (StructElement *element : elements) { |
708 | delete element; |
709 | } |
710 | for (Attribute *attribute : attributes) { |
711 | delete attribute; |
712 | } |
713 | } |
714 | |
715 | StructElement::StructElement(Dict *element, StructTreeRoot *treeRootA, StructElement *parentA, RefRecursionChecker &seen) : type(Unknown), treeRoot(treeRootA), parent(parentA), s(new StructData()) |
716 | { |
717 | assert(treeRoot); |
718 | assert(element); |
719 | |
720 | parse(elementDict: element); |
721 | parseChildren(element, seen); |
722 | } |
723 | |
724 | StructElement::StructElement(int mcid, StructTreeRoot *treeRootA, StructElement *parentA) : type(MCID), treeRoot(treeRootA), parent(parentA), c(new ContentData(mcid)) |
725 | { |
726 | assert(treeRoot); |
727 | assert(parent); |
728 | } |
729 | |
730 | StructElement::StructElement(const Ref ref, StructTreeRoot *treeRootA, StructElement *parentA) : type(OBJR), treeRoot(treeRootA), parent(parentA), c(new ContentData(ref)) |
731 | { |
732 | assert(treeRoot); |
733 | assert(parent); |
734 | } |
735 | |
736 | StructElement::~StructElement() |
737 | { |
738 | if (isContent()) { |
739 | delete c; |
740 | } else { |
741 | delete s; |
742 | } |
743 | } |
744 | |
745 | bool StructElement::isBlock() const |
746 | { |
747 | const TypeMapEntry *entry = getTypeMapEntry(type); |
748 | return entry ? (entry->elementType == elementTypeBlock) : false; |
749 | } |
750 | |
751 | bool StructElement::isInline() const |
752 | { |
753 | const TypeMapEntry *entry = getTypeMapEntry(type); |
754 | return entry ? (entry->elementType == elementTypeInline) : false; |
755 | } |
756 | |
757 | bool StructElement::isGrouping() const |
758 | { |
759 | const TypeMapEntry *entry = getTypeMapEntry(type); |
760 | return entry ? (entry->elementType == elementTypeGrouping) : false; |
761 | } |
762 | |
763 | bool StructElement::() const |
764 | { |
765 | return pageRef.isRef() || (parent && parent->hasPageRef()); |
766 | } |
767 | |
768 | bool StructElement::(Ref &ref) const |
769 | { |
770 | if (pageRef.isRef()) { |
771 | ref = pageRef.getRef(); |
772 | return true; |
773 | } |
774 | |
775 | if (parent) { |
776 | return parent->getPageRef(ref); |
777 | } |
778 | |
779 | return false; |
780 | } |
781 | |
782 | bool StructElement::getStmRef(Ref &ref) const |
783 | { |
784 | if (stmRef.isRef()) { |
785 | ref = stmRef.getRef(); |
786 | return true; |
787 | } |
788 | return false; |
789 | } |
790 | |
791 | const char *StructElement::getTypeName() const |
792 | { |
793 | return typeToName(type); |
794 | } |
795 | |
796 | const Attribute *StructElement::findAttribute(Attribute::Type attributeType, bool inherit, Attribute::Owner attributeOwner) const |
797 | { |
798 | if (isContent()) { |
799 | return parent->findAttribute(attributeType, inherit, attributeOwner); |
800 | } |
801 | |
802 | if (attributeType == Attribute::Unknown || attributeType == Attribute::UserProperty) { |
803 | return nullptr; |
804 | } |
805 | |
806 | const Attribute *result = nullptr; |
807 | |
808 | if (attributeOwner == Attribute::UnknownOwner) { |
809 | // Search for the attribute, no matter who the owner is |
810 | for (unsigned i = 0; i < getNumAttributes(); i++) { |
811 | const Attribute *attr = getAttribute(i); |
812 | if (attributeType == attr->getType()) { |
813 | if (!result || ownerHasMorePriority(a: attr->getOwner(), b: result->getOwner())) { |
814 | result = attr; |
815 | } |
816 | } |
817 | } |
818 | } else { |
819 | // Search for the attribute, with a specific owner |
820 | for (unsigned i = 0; i < getNumAttributes(); i++) { |
821 | const Attribute *attr = getAttribute(i); |
822 | if (attributeType == attr->getType() && attributeOwner == attr->getOwner()) { |
823 | result = attr; |
824 | break; |
825 | } |
826 | } |
827 | } |
828 | |
829 | if (result) { |
830 | return result; |
831 | } |
832 | |
833 | if (inherit && parent) { |
834 | const AttributeMapEntry *entry = getAttributeMapEntry(entryList: attributeMapAll, type: attributeType); |
835 | assert(entry); |
836 | // TODO: Take into account special inheritance cases, for example: |
837 | // inline elements which have been changed to be block using |
838 | // "/Placement/Block" have slightly different rules. |
839 | if (entry->inherit) { |
840 | return parent->findAttribute(attributeType, inherit, attributeOwner); |
841 | } |
842 | } |
843 | |
844 | return nullptr; |
845 | } |
846 | |
847 | GooString *StructElement::appendSubTreeText(GooString *string, bool recursive) const |
848 | { |
849 | if (isContent() && !isObjectRef()) { |
850 | MarkedContentOutputDev mcdev(getMCID(), stmRef); |
851 | const TextSpanArray &spans(getTextSpansInternal(mcdev)); |
852 | |
853 | if (!string) { |
854 | string = new GooString(); |
855 | } |
856 | |
857 | for (const TextSpan &span : spans) { |
858 | string->append(str: span.getText()); |
859 | } |
860 | |
861 | return string; |
862 | } |
863 | |
864 | if (!recursive) { |
865 | return nullptr; |
866 | } |
867 | |
868 | // Do a depth-first traversal, to get elements in logical order |
869 | if (!string) { |
870 | string = new GooString(); |
871 | } |
872 | |
873 | for (unsigned i = 0; i < getNumChildren(); i++) { |
874 | getChild(i)->appendSubTreeText(string, recursive); |
875 | } |
876 | |
877 | return string; |
878 | } |
879 | |
880 | const TextSpanArray &StructElement::getTextSpansInternal(MarkedContentOutputDev &mcdev) const |
881 | { |
882 | assert(isContent()); |
883 | |
884 | int startPage = 0, endPage = 0; |
885 | |
886 | Ref ref; |
887 | if (getPageRef(ref)) { |
888 | startPage = endPage = treeRoot->getDoc()->findPage(ref); |
889 | } |
890 | |
891 | if (!(startPage && endPage)) { |
892 | startPage = 1; |
893 | endPage = treeRoot->getDoc()->getNumPages(); |
894 | } |
895 | |
896 | treeRoot->getDoc()->displayPages(out: &mcdev, firstPage: startPage, lastPage: endPage, hDPI: 72.0, vDPI: 72.0, rotate: 0, useMediaBox: true, crop: false, printing: false); |
897 | return mcdev.getTextSpans(); |
898 | } |
899 | |
900 | static StructElement::Type roleMapResolve(Dict *roleMap, const char *name, const char *curName) |
901 | { |
902 | // Circular reference |
903 | if (curName && !strcmp(s1: name, s2: curName)) { |
904 | return StructElement::Unknown; |
905 | } |
906 | |
907 | Object resolved = roleMap->lookup(key: curName ? curName : name); |
908 | if (resolved.isName()) { |
909 | StructElement::Type type = nameToType(name: resolved.getName()); |
910 | return type == StructElement::Unknown ? roleMapResolve(roleMap, name, curName: resolved.getName()) : type; |
911 | } |
912 | |
913 | if (!resolved.isNull()) { |
914 | error(category: errSyntaxWarning, pos: -1, msg: "RoleMap entry is wrong type ({0:s})" , resolved.getTypeName()); |
915 | } |
916 | return StructElement::Unknown; |
917 | } |
918 | |
919 | void StructElement::parse(Dict *element) |
920 | { |
921 | Object obj; |
922 | |
923 | // Type is optional, but if present must be StructElem |
924 | obj = element->lookup(key: "Type" ); |
925 | if (!obj.isNull() && !obj.isName(nameA: "StructElem" )) { |
926 | error(category: errSyntaxError, pos: -1, msg: "Type of StructElem object is wrong" ); |
927 | return; |
928 | } |
929 | |
930 | // Parent object reference (required). |
931 | const Object &objP = element->lookupNF(key: "P" ); |
932 | if (!objP.isRef()) { |
933 | error(category: errSyntaxError, pos: -1, msg: "P object is wrong type ({0:s})" , obj.getTypeName()); |
934 | return; |
935 | } |
936 | s->parentRef = objP.getRef(); |
937 | |
938 | // Check whether the S-type is valid for the top level |
939 | // element and create a node of the appropriate type. |
940 | obj = element->lookup(key: "S" ); |
941 | if (!obj.isName()) { |
942 | error(category: errSyntaxError, pos: -1, msg: "S object is wrong type ({0:s})" , obj.getTypeName()); |
943 | return; |
944 | } |
945 | |
946 | // Type name may not be standard, resolve through RoleMap first. |
947 | if (treeRoot->getRoleMap()) { |
948 | type = roleMapResolve(roleMap: treeRoot->getRoleMap(), name: obj.getName(), curName: nullptr); |
949 | } |
950 | |
951 | // Resolving through RoleMap may leave type as Unknown, e.g. for types |
952 | // which are not present in it, yet they are standard element types. |
953 | if (type == Unknown) { |
954 | type = nameToType(name: obj.getName()); |
955 | } |
956 | |
957 | // At this point either the type name must have been resolved. |
958 | if (type == Unknown) { |
959 | error(category: errSyntaxError, pos: -1, msg: "StructElem object is wrong type ({0:s})" , obj.getName()); |
960 | return; |
961 | } |
962 | |
963 | // Object ID (optional), to be looked at the IDTree in the tree root. |
964 | obj = element->lookup(key: "ID" ); |
965 | if (obj.isString()) { |
966 | s->id = obj.getString()->copy(); |
967 | } |
968 | |
969 | // Page reference (optional) in which at least one of the child items |
970 | // is to be rendered in. Note: each element stores only the /Pg value |
971 | // contained by it, and StructElement::getPageRef() may look in parent |
972 | // elements to find the page where an element belongs. |
973 | pageRef = element->lookupNF(key: "Pg" ).copy(); |
974 | |
975 | // Revision number (optional). |
976 | obj = element->lookup(key: "R" ); |
977 | if (obj.isInt()) { |
978 | s->revision = obj.getInt(); |
979 | } |
980 | |
981 | // Element title (optional). |
982 | obj = element->lookup(key: "T" ); |
983 | if (obj.isString()) { |
984 | s->title = obj.getString()->copy(); |
985 | } |
986 | |
987 | // Language (optional). |
988 | obj = element->lookup(key: "Lang" ); |
989 | if (obj.isString()) { |
990 | s->language = obj.getString()->copy(); |
991 | } |
992 | |
993 | // Alternative text (optional). |
994 | obj = element->lookup(key: "Alt" ); |
995 | if (obj.isString()) { |
996 | s->altText = obj.getString()->copy(); |
997 | } |
998 | |
999 | // Expanded form of an abbreviation (optional). |
1000 | obj = element->lookup(key: "E" ); |
1001 | if (obj.isString()) { |
1002 | s->expandedAbbr = obj.getString()->copy(); |
1003 | } |
1004 | |
1005 | // Actual text (optional). |
1006 | obj = element->lookup(key: "ActualText" ); |
1007 | if (obj.isString()) { |
1008 | s->actualText = obj.getString()->copy(); |
1009 | } |
1010 | |
1011 | // Attributes directly attached to the element (optional). |
1012 | obj = element->lookup(key: "A" ); |
1013 | if (obj.isDict()) { |
1014 | parseAttributes(attributes: obj.getDict()); |
1015 | } else if (obj.isArray()) { |
1016 | unsigned attrIndex = getNumAttributes(); |
1017 | for (int i = 0; i < obj.arrayGetLength(); i++) { |
1018 | Object iobj = obj.arrayGet(i); |
1019 | if (iobj.isDict()) { |
1020 | attrIndex = getNumAttributes(); |
1021 | parseAttributes(attributes: iobj.getDict()); |
1022 | } else if (iobj.isInt()) { |
1023 | const int revision = iobj.getInt(); |
1024 | // Set revision numbers for the elements previously created. |
1025 | for (unsigned j = attrIndex; j < getNumAttributes(); j++) { |
1026 | getAttribute(i: j)->setRevision(revision); |
1027 | } |
1028 | } else { |
1029 | error(category: errSyntaxWarning, pos: -1, msg: "A item is wrong type ({0:s})" , iobj.getTypeName()); |
1030 | } |
1031 | } |
1032 | } else if (!obj.isNull()) { |
1033 | error(category: errSyntaxWarning, pos: -1, msg: "A is wrong type ({0:s})" , obj.getTypeName()); |
1034 | } |
1035 | |
1036 | // Attributes referenced indirectly through the ClassMap (optional). |
1037 | if (treeRoot->getClassMap()) { |
1038 | Object classes = element->lookup(key: "C" ); |
1039 | if (classes.isName()) { |
1040 | Object attr = treeRoot->getClassMap()->lookup(key: classes.getName()); |
1041 | if (attr.isDict()) { |
1042 | parseAttributes(attributes: attr.getDict(), keepExisting: true); |
1043 | } else if (attr.isArray()) { |
1044 | for (int i = 0; i < attr.arrayGetLength(); i++) { |
1045 | unsigned attrIndex = getNumAttributes(); |
1046 | Object iobj = attr.arrayGet(i); |
1047 | if (iobj.isDict()) { |
1048 | attrIndex = getNumAttributes(); |
1049 | parseAttributes(attributes: iobj.getDict(), keepExisting: true); |
1050 | } else if (iobj.isInt()) { |
1051 | // Set revision numbers for the elements previously created. |
1052 | const int revision = iobj.getInt(); |
1053 | for (unsigned j = attrIndex; j < getNumAttributes(); j++) { |
1054 | getAttribute(i: j)->setRevision(revision); |
1055 | } |
1056 | } else { |
1057 | error(category: errSyntaxWarning, pos: -1, msg: "C item is wrong type ({0:s})" , iobj.getTypeName()); |
1058 | } |
1059 | } |
1060 | } else if (!attr.isNull()) { |
1061 | error(category: errSyntaxWarning, pos: -1, msg: "C object is wrong type ({0:s})" , classes.getTypeName()); |
1062 | } |
1063 | } |
1064 | } |
1065 | } |
1066 | |
1067 | StructElement *StructElement::parseChild(const Object *ref, Object *childObj, RefRecursionChecker &seen) |
1068 | { |
1069 | assert(childObj); |
1070 | assert(ref); |
1071 | |
1072 | StructElement *child = nullptr; |
1073 | |
1074 | if (childObj->isInt()) { |
1075 | child = new StructElement(childObj->getInt(), treeRoot, this); |
1076 | } else if (childObj->isDict(dictType: "MCR" )) { |
1077 | /* |
1078 | * TODO: The optional StmOwn attribute is not handled. |
1079 | */ |
1080 | |
1081 | Object mcidObj = childObj->dictLookup(key: "MCID" ); |
1082 | if (!mcidObj.isInt()) { |
1083 | error(category: errSyntaxError, pos: -1, msg: "MCID object is wrong type ({0:s})" , mcidObj.getTypeName()); |
1084 | return nullptr; |
1085 | } |
1086 | |
1087 | child = new StructElement(mcidObj.getInt(), treeRoot, this); |
1088 | |
1089 | Object = childObj->dictLookupNF(key: "Pg" ).copy(); |
1090 | if (pageRefObj.isRef()) { |
1091 | child->pageRef = std::move(pageRefObj); |
1092 | } |
1093 | |
1094 | const Object &stmObj = childObj->dictLookupNF(key: "Stm" ); |
1095 | if (stmObj.isRef()) { |
1096 | child->stmRef = stmObj.copy(); |
1097 | } else if (!stmObj.isNull()) { |
1098 | error(category: errSyntaxError, pos: -1, msg: "Stm object is wrong type ({0:s})" , stmObj.getTypeName()); |
1099 | delete child; |
1100 | return nullptr; |
1101 | } |
1102 | |
1103 | } else if (childObj->isDict(dictType: "OBJR" )) { |
1104 | const Object &refObj = childObj->dictLookupNF(key: "Obj" ); |
1105 | if (refObj.isRef()) { |
1106 | |
1107 | child = new StructElement(refObj.getRef(), treeRoot, this); |
1108 | |
1109 | Object = childObj->dictLookupNF(key: "Pg" ).copy(); |
1110 | if (pageRefObj.isRef()) { |
1111 | child->pageRef = std::move(pageRefObj); |
1112 | } |
1113 | } else { |
1114 | error(category: errSyntaxError, pos: -1, msg: "Obj object is wrong type ({0:s})" , refObj.getTypeName()); |
1115 | } |
1116 | } else if (childObj->isDict()) { |
1117 | if (!ref->isRef()) { |
1118 | error(category: errSyntaxError, pos: -1, msg: "Structure element dictionary is not an indirect reference ({0:s})" , ref->getTypeName()); |
1119 | } else if (seen.insert(ref: ref->getRef())) { |
1120 | child = new StructElement(childObj->getDict(), treeRoot, this, seen); |
1121 | } else { |
1122 | error(category: errSyntaxWarning, pos: -1, msg: "Loop detected in structure tree, skipping subtree at object {0:d}:{1:d}" , ref->getRefNum(), ref->getRefGen()); |
1123 | } |
1124 | } else { |
1125 | error(category: errSyntaxWarning, pos: -1, msg: "K has a child of wrong type ({0:s})" , childObj->getTypeName()); |
1126 | } |
1127 | |
1128 | if (child) { |
1129 | if (child->isOk()) { |
1130 | appendChild(element: child); |
1131 | if (ref->isRef()) { |
1132 | treeRoot->parentTreeAdd(objectRef: ref->getRef(), element: child); |
1133 | } |
1134 | } else { |
1135 | delete child; |
1136 | child = nullptr; |
1137 | } |
1138 | } |
1139 | |
1140 | return child; |
1141 | } |
1142 | |
1143 | void StructElement::parseChildren(Dict *element, RefRecursionChecker &seen) |
1144 | { |
1145 | Object kids = element->lookup(key: "K" ); |
1146 | if (kids.isArray()) { |
1147 | for (int i = 0; i < kids.arrayGetLength(); i++) { |
1148 | Object obj = kids.arrayGet(i); |
1149 | const Object &ref = kids.arrayGetNF(i); |
1150 | parseChild(ref: &ref, childObj: &obj, seen); |
1151 | } |
1152 | } else if (kids.isDict() || kids.isInt()) { |
1153 | const Object &ref = element->lookupNF(key: "K" ); |
1154 | parseChild(ref: &ref, childObj: &kids, seen); |
1155 | } |
1156 | } |
1157 | |
1158 | void StructElement::parseAttributes(Dict *attributes, bool keepExisting) |
1159 | { |
1160 | Object owner = attributes->lookup(key: "O" ); |
1161 | if (owner.isName(nameA: "UserProperties" )) { |
1162 | // In this case /P is an array of UserProperty dictionaries |
1163 | Object userProperties = attributes->lookup(key: "P" ); |
1164 | if (userProperties.isArray()) { |
1165 | for (int i = 0; i < userProperties.arrayGetLength(); i++) { |
1166 | Object property = userProperties.arrayGet(i); |
1167 | if (property.isDict()) { |
1168 | Attribute *attribute = Attribute::parseUserProperty(property: property.getDict()); |
1169 | if (attribute && attribute->isOk()) { |
1170 | appendAttribute(attribute); |
1171 | } else { |
1172 | error(category: errSyntaxWarning, pos: -1, msg: "Item in P is invalid" ); |
1173 | delete attribute; |
1174 | } |
1175 | } else { |
1176 | error(category: errSyntaxWarning, pos: -1, msg: "Item in P is wrong type ({0:s})" , property.getTypeName()); |
1177 | } |
1178 | } |
1179 | } |
1180 | } else if (owner.isName()) { |
1181 | // In this case /P contains standard attributes. |
1182 | // Check first if the owner is a valid standard one. |
1183 | Attribute::Owner ownerValue = nameToOwner(name: owner.getName()); |
1184 | if (ownerValue != Attribute::UnknownOwner) { |
1185 | // Iterate over the entries of the "attributes" dictionary. |
1186 | // The /O entry (owner) is skipped. |
1187 | for (int i = 0; i < attributes->getLength(); i++) { |
1188 | const char *key = attributes->getKey(i); |
1189 | if (strcmp(s1: key, s2: "O" ) != 0) { |
1190 | Attribute::Type t = Attribute::getTypeForName(name: key, element: this); |
1191 | |
1192 | // Check if the attribute is already defined. |
1193 | if (keepExisting) { |
1194 | bool exists = false; |
1195 | for (unsigned j = 0; j < getNumAttributes(); j++) { |
1196 | if (getAttribute(i: j)->getType() == t) { |
1197 | exists = true; |
1198 | break; |
1199 | } |
1200 | } |
1201 | if (exists) { |
1202 | continue; |
1203 | } |
1204 | } |
1205 | |
1206 | if (t != Attribute::Unknown) { |
1207 | Object value = attributes->getVal(i); |
1208 | bool typeCheckOk = true; |
1209 | Attribute *attribute = new Attribute(t, &value); |
1210 | |
1211 | if (attribute->isOk() && (typeCheckOk = attribute->checkType(element: this))) { |
1212 | appendAttribute(attribute); |
1213 | } else { |
1214 | // It is not needed to free "value", the Attribute instance |
1215 | // owns the contents, so deleting "attribute" is enough. |
1216 | if (!typeCheckOk) { |
1217 | error(category: errSyntaxWarning, pos: -1, msg: "Attribute {0:s} value is of wrong type ({1:s})" , attribute->getTypeName(), attribute->getValue()->getTypeName()); |
1218 | } |
1219 | delete attribute; |
1220 | } |
1221 | } else { |
1222 | error(category: errSyntaxWarning, pos: -1, msg: "Wrong Attribute '{0:s}' in element {1:s}" , key, getTypeName()); |
1223 | } |
1224 | } |
1225 | } |
1226 | } else { |
1227 | error(category: errSyntaxWarning, pos: -1, msg: "O object is invalid value ({0:s})" , owner.getName()); |
1228 | } |
1229 | } else if (!owner.isNull()) { |
1230 | error(category: errSyntaxWarning, pos: -1, msg: "O is wrong type ({0:s})" , owner.getTypeName()); |
1231 | } |
1232 | } |
1233 | |