1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qtexthtmlparser_p.h"
5
6#include <qbytearray.h>
7#include <qstack.h>
8#include <qdebug.h>
9#include <qthread.h>
10#include <qguiapplication.h>
11
12#include "qtextdocument.h"
13#include "qtextformat_p.h"
14#include "qtextdocument_p.h"
15#include "qtextcursor.h"
16#include "qfont_p.h"
17
18#include <algorithm>
19
20#ifndef QT_NO_TEXTHTMLPARSER
21
22QT_BEGIN_NAMESPACE
23
24using namespace Qt::StringLiterals;
25
26// see also tst_qtextdocumentfragment.cpp
27#define MAX_ENTITY 258
28static const struct QTextHtmlEntity { const char name[9]; char16_t code; } entities[]= {
29 { .name: "AElig", .code: 0x00c6 },
30 { .name: "AMP", .code: 38 },
31 { .name: "Aacute", .code: 0x00c1 },
32 { .name: "Acirc", .code: 0x00c2 },
33 { .name: "Agrave", .code: 0x00c0 },
34 { .name: "Alpha", .code: 0x0391 },
35 { .name: "Aring", .code: 0x00c5 },
36 { .name: "Atilde", .code: 0x00c3 },
37 { .name: "Auml", .code: 0x00c4 },
38 { .name: "Beta", .code: 0x0392 },
39 { .name: "Ccedil", .code: 0x00c7 },
40 { .name: "Chi", .code: 0x03a7 },
41 { .name: "Dagger", .code: 0x2021 },
42 { .name: "Delta", .code: 0x0394 },
43 { .name: "ETH", .code: 0x00d0 },
44 { .name: "Eacute", .code: 0x00c9 },
45 { .name: "Ecirc", .code: 0x00ca },
46 { .name: "Egrave", .code: 0x00c8 },
47 { .name: "Epsilon", .code: 0x0395 },
48 { .name: "Eta", .code: 0x0397 },
49 { .name: "Euml", .code: 0x00cb },
50 { .name: "GT", .code: 62 },
51 { .name: "Gamma", .code: 0x0393 },
52 { .name: "Iacute", .code: 0x00cd },
53 { .name: "Icirc", .code: 0x00ce },
54 { .name: "Igrave", .code: 0x00cc },
55 { .name: "Iota", .code: 0x0399 },
56 { .name: "Iuml", .code: 0x00cf },
57 { .name: "Kappa", .code: 0x039a },
58 { .name: "LT", .code: 60 },
59 { .name: "Lambda", .code: 0x039b },
60 { .name: "Mu", .code: 0x039c },
61 { .name: "Ntilde", .code: 0x00d1 },
62 { .name: "Nu", .code: 0x039d },
63 { .name: "OElig", .code: 0x0152 },
64 { .name: "Oacute", .code: 0x00d3 },
65 { .name: "Ocirc", .code: 0x00d4 },
66 { .name: "Ograve", .code: 0x00d2 },
67 { .name: "Omega", .code: 0x03a9 },
68 { .name: "Omicron", .code: 0x039f },
69 { .name: "Oslash", .code: 0x00d8 },
70 { .name: "Otilde", .code: 0x00d5 },
71 { .name: "Ouml", .code: 0x00d6 },
72 { .name: "Phi", .code: 0x03a6 },
73 { .name: "Pi", .code: 0x03a0 },
74 { .name: "Prime", .code: 0x2033 },
75 { .name: "Psi", .code: 0x03a8 },
76 { .name: "QUOT", .code: 34 },
77 { .name: "Rho", .code: 0x03a1 },
78 { .name: "Scaron", .code: 0x0160 },
79 { .name: "Sigma", .code: 0x03a3 },
80 { .name: "THORN", .code: 0x00de },
81 { .name: "Tau", .code: 0x03a4 },
82 { .name: "Theta", .code: 0x0398 },
83 { .name: "Uacute", .code: 0x00da },
84 { .name: "Ucirc", .code: 0x00db },
85 { .name: "Ugrave", .code: 0x00d9 },
86 { .name: "Upsilon", .code: 0x03a5 },
87 { .name: "Uuml", .code: 0x00dc },
88 { .name: "Xi", .code: 0x039e },
89 { .name: "Yacute", .code: 0x00dd },
90 { .name: "Yuml", .code: 0x0178 },
91 { .name: "Zeta", .code: 0x0396 },
92 { .name: "aacute", .code: 0x00e1 },
93 { .name: "acirc", .code: 0x00e2 },
94 { .name: "acute", .code: 0x00b4 },
95 { .name: "aelig", .code: 0x00e6 },
96 { .name: "agrave", .code: 0x00e0 },
97 { .name: "alefsym", .code: 0x2135 },
98 { .name: "alpha", .code: 0x03b1 },
99 { .name: "amp", .code: 38 },
100 { .name: "and", .code: 0x22a5 },
101 { .name: "ang", .code: 0x2220 },
102 { .name: "apos", .code: 0x0027 },
103 { .name: "aring", .code: 0x00e5 },
104 { .name: "asymp", .code: 0x2248 },
105 { .name: "atilde", .code: 0x00e3 },
106 { .name: "auml", .code: 0x00e4 },
107 { .name: "bdquo", .code: 0x201e },
108 { .name: "beta", .code: 0x03b2 },
109 { .name: "brvbar", .code: 0x00a6 },
110 { .name: "bull", .code: 0x2022 },
111 { .name: "cap", .code: 0x2229 },
112 { .name: "ccedil", .code: 0x00e7 },
113 { .name: "cedil", .code: 0x00b8 },
114 { .name: "cent", .code: 0x00a2 },
115 { .name: "chi", .code: 0x03c7 },
116 { .name: "circ", .code: 0x02c6 },
117 { .name: "clubs", .code: 0x2663 },
118 { .name: "cong", .code: 0x2245 },
119 { .name: "copy", .code: 0x00a9 },
120 { .name: "crarr", .code: 0x21b5 },
121 { .name: "cup", .code: 0x222a },
122 { .name: "curren", .code: 0x00a4 },
123 { .name: "dArr", .code: 0x21d3 },
124 { .name: "dagger", .code: 0x2020 },
125 { .name: "darr", .code: 0x2193 },
126 { .name: "deg", .code: 0x00b0 },
127 { .name: "delta", .code: 0x03b4 },
128 { .name: "diams", .code: 0x2666 },
129 { .name: "divide", .code: 0x00f7 },
130 { .name: "eacute", .code: 0x00e9 },
131 { .name: "ecirc", .code: 0x00ea },
132 { .name: "egrave", .code: 0x00e8 },
133 { .name: "empty", .code: 0x2205 },
134 { .name: "emsp", .code: 0x2003 },
135 { .name: "ensp", .code: 0x2002 },
136 { .name: "epsilon", .code: 0x03b5 },
137 { .name: "equiv", .code: 0x2261 },
138 { .name: "eta", .code: 0x03b7 },
139 { .name: "eth", .code: 0x00f0 },
140 { .name: "euml", .code: 0x00eb },
141 { .name: "euro", .code: 0x20ac },
142 { .name: "exist", .code: 0x2203 },
143 { .name: "fnof", .code: 0x0192 },
144 { .name: "forall", .code: 0x2200 },
145 { .name: "frac12", .code: 0x00bd },
146 { .name: "frac14", .code: 0x00bc },
147 { .name: "frac34", .code: 0x00be },
148 { .name: "frasl", .code: 0x2044 },
149 { .name: "gamma", .code: 0x03b3 },
150 { .name: "ge", .code: 0x2265 },
151 { .name: "gt", .code: 62 },
152 { .name: "hArr", .code: 0x21d4 },
153 { .name: "harr", .code: 0x2194 },
154 { .name: "hearts", .code: 0x2665 },
155 { .name: "hellip", .code: 0x2026 },
156 { .name: "iacute", .code: 0x00ed },
157 { .name: "icirc", .code: 0x00ee },
158 { .name: "iexcl", .code: 0x00a1 },
159 { .name: "igrave", .code: 0x00ec },
160 { .name: "image", .code: 0x2111 },
161 { .name: "infin", .code: 0x221e },
162 { .name: "int", .code: 0x222b },
163 { .name: "iota", .code: 0x03b9 },
164 { .name: "iquest", .code: 0x00bf },
165 { .name: "isin", .code: 0x2208 },
166 { .name: "iuml", .code: 0x00ef },
167 { .name: "kappa", .code: 0x03ba },
168 { .name: "lArr", .code: 0x21d0 },
169 { .name: "lambda", .code: 0x03bb },
170 { .name: "lang", .code: 0x2329 },
171 { .name: "laquo", .code: 0x00ab },
172 { .name: "larr", .code: 0x2190 },
173 { .name: "lceil", .code: 0x2308 },
174 { .name: "ldquo", .code: 0x201c },
175 { .name: "le", .code: 0x2264 },
176 { .name: "lfloor", .code: 0x230a },
177 { .name: "lowast", .code: 0x2217 },
178 { .name: "loz", .code: 0x25ca },
179 { .name: "lrm", .code: 0x200e },
180 { .name: "lsaquo", .code: 0x2039 },
181 { .name: "lsquo", .code: 0x2018 },
182 { .name: "lt", .code: 60 },
183 { .name: "macr", .code: 0x00af },
184 { .name: "mdash", .code: 0x2014 },
185 { .name: "micro", .code: 0x00b5 },
186 { .name: "middot", .code: 0x00b7 },
187 { .name: "minus", .code: 0x2212 },
188 { .name: "mu", .code: 0x03bc },
189 { .name: "nabla", .code: 0x2207 },
190 { .name: "nbsp", .code: 0x00a0 },
191 { .name: "ndash", .code: 0x2013 },
192 { .name: "ne", .code: 0x2260 },
193 { .name: "ni", .code: 0x220b },
194 { .name: "not", .code: 0x00ac },
195 { .name: "notin", .code: 0x2209 },
196 { .name: "nsub", .code: 0x2284 },
197 { .name: "ntilde", .code: 0x00f1 },
198 { .name: "nu", .code: 0x03bd },
199 { .name: "oacute", .code: 0x00f3 },
200 { .name: "ocirc", .code: 0x00f4 },
201 { .name: "oelig", .code: 0x0153 },
202 { .name: "ograve", .code: 0x00f2 },
203 { .name: "oline", .code: 0x203e },
204 { .name: "omega", .code: 0x03c9 },
205 { .name: "omicron", .code: 0x03bf },
206 { .name: "oplus", .code: 0x2295 },
207 { .name: "or", .code: 0x22a6 },
208 { .name: "ordf", .code: 0x00aa },
209 { .name: "ordm", .code: 0x00ba },
210 { .name: "oslash", .code: 0x00f8 },
211 { .name: "otilde", .code: 0x00f5 },
212 { .name: "otimes", .code: 0x2297 },
213 { .name: "ouml", .code: 0x00f6 },
214 { .name: "para", .code: 0x00b6 },
215 { .name: "part", .code: 0x2202 },
216 { .name: "percnt", .code: 0x0025 },
217 { .name: "permil", .code: 0x2030 },
218 { .name: "perp", .code: 0x22a5 },
219 { .name: "phi", .code: 0x03c6 },
220 { .name: "pi", .code: 0x03c0 },
221 { .name: "piv", .code: 0x03d6 },
222 { .name: "plusmn", .code: 0x00b1 },
223 { .name: "pound", .code: 0x00a3 },
224 { .name: "prime", .code: 0x2032 },
225 { .name: "prod", .code: 0x220f },
226 { .name: "prop", .code: 0x221d },
227 { .name: "psi", .code: 0x03c8 },
228 { .name: "quot", .code: 34 },
229 { .name: "rArr", .code: 0x21d2 },
230 { .name: "radic", .code: 0x221a },
231 { .name: "rang", .code: 0x232a },
232 { .name: "raquo", .code: 0x00bb },
233 { .name: "rarr", .code: 0x2192 },
234 { .name: "rceil", .code: 0x2309 },
235 { .name: "rdquo", .code: 0x201d },
236 { .name: "real", .code: 0x211c },
237 { .name: "reg", .code: 0x00ae },
238 { .name: "rfloor", .code: 0x230b },
239 { .name: "rho", .code: 0x03c1 },
240 { .name: "rlm", .code: 0x200f },
241 { .name: "rsaquo", .code: 0x203a },
242 { .name: "rsquo", .code: 0x2019 },
243 { .name: "sbquo", .code: 0x201a },
244 { .name: "scaron", .code: 0x0161 },
245 { .name: "sdot", .code: 0x22c5 },
246 { .name: "sect", .code: 0x00a7 },
247 { .name: "shy", .code: 0x00ad },
248 { .name: "sigma", .code: 0x03c3 },
249 { .name: "sigmaf", .code: 0x03c2 },
250 { .name: "sim", .code: 0x223c },
251 { .name: "spades", .code: 0x2660 },
252 { .name: "sub", .code: 0x2282 },
253 { .name: "sube", .code: 0x2286 },
254 { .name: "sum", .code: 0x2211 },
255 { .name: "sup", .code: 0x2283 },
256 { .name: "sup1", .code: 0x00b9 },
257 { .name: "sup2", .code: 0x00b2 },
258 { .name: "sup3", .code: 0x00b3 },
259 { .name: "supe", .code: 0x2287 },
260 { .name: "szlig", .code: 0x00df },
261 { .name: "tau", .code: 0x03c4 },
262 { .name: "there4", .code: 0x2234 },
263 { .name: "theta", .code: 0x03b8 },
264 { .name: "thetasym", .code: 0x03d1 },
265 { .name: "thinsp", .code: 0x2009 },
266 { .name: "thorn", .code: 0x00fe },
267 { .name: "tilde", .code: 0x02dc },
268 { .name: "times", .code: 0x00d7 },
269 { .name: "trade", .code: 0x2122 },
270 { .name: "uArr", .code: 0x21d1 },
271 { .name: "uacute", .code: 0x00fa },
272 { .name: "uarr", .code: 0x2191 },
273 { .name: "ucirc", .code: 0x00fb },
274 { .name: "ugrave", .code: 0x00f9 },
275 { .name: "uml", .code: 0x00a8 },
276 { .name: "upsih", .code: 0x03d2 },
277 { .name: "upsilon", .code: 0x03c5 },
278 { .name: "uuml", .code: 0x00fc },
279 { .name: "weierp", .code: 0x2118 },
280 { .name: "xi", .code: 0x03be },
281 { .name: "yacute", .code: 0x00fd },
282 { .name: "yen", .code: 0x00a5 },
283 { .name: "yuml", .code: 0x00ff },
284 { .name: "zeta", .code: 0x03b6 },
285 { .name: "zwj", .code: 0x200d },
286 { .name: "zwnj", .code: 0x200c }
287};
288static_assert(MAX_ENTITY == sizeof entities / sizeof *entities);
289
290#if defined(Q_CC_MSVC_ONLY) && _MSC_VER < 1600
291bool operator<(const QTextHtmlEntity &entity1, const QTextHtmlEntity &entity2)
292{
293 return QLatin1StringView(entity1.name) < QLatin1StringView(entity2.name);
294}
295#endif
296
297static bool operator<(QStringView entityStr, const QTextHtmlEntity &entity)
298{
299 return entityStr < QLatin1StringView(entity.name);
300}
301
302static bool operator<(const QTextHtmlEntity &entity, QStringView entityStr)
303{
304 return QLatin1StringView(entity.name) < entityStr;
305}
306
307static QChar resolveEntity(QStringView entity)
308{
309 const QTextHtmlEntity *start = &entities[0];
310 const QTextHtmlEntity *end = &entities[MAX_ENTITY];
311 const QTextHtmlEntity *e = std::lower_bound(first: start, last: end, val: entity);
312 if (e == end || (entity < *e))
313 return QChar();
314 return e->code;
315}
316
317static const ushort windowsLatin1ExtendedCharacters[0xA0 - 0x80] = {
318 0x20ac, // 0x80
319 0x0081, // 0x81 direct mapping
320 0x201a, // 0x82
321 0x0192, // 0x83
322 0x201e, // 0x84
323 0x2026, // 0x85
324 0x2020, // 0x86
325 0x2021, // 0x87
326 0x02C6, // 0x88
327 0x2030, // 0x89
328 0x0160, // 0x8A
329 0x2039, // 0x8B
330 0x0152, // 0x8C
331 0x008D, // 0x8D direct mapping
332 0x017D, // 0x8E
333 0x008F, // 0x8F directmapping
334 0x0090, // 0x90 directmapping
335 0x2018, // 0x91
336 0x2019, // 0x92
337 0x201C, // 0x93
338 0X201D, // 0x94
339 0x2022, // 0x95
340 0x2013, // 0x96
341 0x2014, // 0x97
342 0x02DC, // 0x98
343 0x2122, // 0x99
344 0x0161, // 0x9A
345 0x203A, // 0x9B
346 0x0153, // 0x9C
347 0x009D, // 0x9D direct mapping
348 0x017E, // 0x9E
349 0x0178 // 0x9F
350};
351
352// the displayMode value is according to the what are blocks in the piecetable, not
353// what the w3c defines.
354static const QTextHtmlElement elements[Html_NumElements]= {
355 { .name: "a", .id: Html_a, .displayMode: QTextHtmlElement::DisplayInline },
356 { .name: "address", .id: Html_address, .displayMode: QTextHtmlElement::DisplayInline },
357 { .name: "b", .id: Html_b, .displayMode: QTextHtmlElement::DisplayInline },
358 { .name: "big", .id: Html_big, .displayMode: QTextHtmlElement::DisplayInline },
359 { .name: "blockquote", .id: Html_blockquote, .displayMode: QTextHtmlElement::DisplayBlock },
360 { .name: "body", .id: Html_body, .displayMode: QTextHtmlElement::DisplayBlock },
361 { .name: "br", .id: Html_br, .displayMode: QTextHtmlElement::DisplayInline },
362 { .name: "caption", .id: Html_caption, .displayMode: QTextHtmlElement::DisplayBlock },
363 { .name: "center", .id: Html_center, .displayMode: QTextHtmlElement::DisplayBlock },
364 { .name: "cite", .id: Html_cite, .displayMode: QTextHtmlElement::DisplayInline },
365 { .name: "code", .id: Html_code, .displayMode: QTextHtmlElement::DisplayInline },
366 { .name: "dd", .id: Html_dd, .displayMode: QTextHtmlElement::DisplayBlock },
367 { .name: "dfn", .id: Html_dfn, .displayMode: QTextHtmlElement::DisplayInline },
368 { .name: "div", .id: Html_div, .displayMode: QTextHtmlElement::DisplayBlock },
369 { .name: "dl", .id: Html_dl, .displayMode: QTextHtmlElement::DisplayBlock },
370 { .name: "dt", .id: Html_dt, .displayMode: QTextHtmlElement::DisplayBlock },
371 { .name: "em", .id: Html_em, .displayMode: QTextHtmlElement::DisplayInline },
372 { .name: "font", .id: Html_font, .displayMode: QTextHtmlElement::DisplayInline },
373 { .name: "h1", .id: Html_h1, .displayMode: QTextHtmlElement::DisplayBlock },
374 { .name: "h2", .id: Html_h2, .displayMode: QTextHtmlElement::DisplayBlock },
375 { .name: "h3", .id: Html_h3, .displayMode: QTextHtmlElement::DisplayBlock },
376 { .name: "h4", .id: Html_h4, .displayMode: QTextHtmlElement::DisplayBlock },
377 { .name: "h5", .id: Html_h5, .displayMode: QTextHtmlElement::DisplayBlock },
378 { .name: "h6", .id: Html_h6, .displayMode: QTextHtmlElement::DisplayBlock },
379 { .name: "head", .id: Html_head, .displayMode: QTextHtmlElement::DisplayNone },
380 { .name: "hr", .id: Html_hr, .displayMode: QTextHtmlElement::DisplayBlock },
381 { .name: "html", .id: Html_html, .displayMode: QTextHtmlElement::DisplayInline },
382 { .name: "i", .id: Html_i, .displayMode: QTextHtmlElement::DisplayInline },
383 { .name: "img", .id: Html_img, .displayMode: QTextHtmlElement::DisplayInline },
384 { .name: "kbd", .id: Html_kbd, .displayMode: QTextHtmlElement::DisplayInline },
385 { .name: "li", .id: Html_li, .displayMode: QTextHtmlElement::DisplayBlock },
386 { .name: "link", .id: Html_link, .displayMode: QTextHtmlElement::DisplayNone },
387 { .name: "meta", .id: Html_meta, .displayMode: QTextHtmlElement::DisplayNone },
388 { .name: "nobr", .id: Html_nobr, .displayMode: QTextHtmlElement::DisplayInline },
389 { .name: "ol", .id: Html_ol, .displayMode: QTextHtmlElement::DisplayBlock },
390 { .name: "p", .id: Html_p, .displayMode: QTextHtmlElement::DisplayBlock },
391 { .name: "pre", .id: Html_pre, .displayMode: QTextHtmlElement::DisplayBlock },
392 { .name: "qt", .id: Html_body /*deliberate mapping*/, .displayMode: QTextHtmlElement::DisplayBlock },
393 { .name: "s", .id: Html_s, .displayMode: QTextHtmlElement::DisplayInline },
394 { .name: "samp", .id: Html_samp, .displayMode: QTextHtmlElement::DisplayInline },
395 { .name: "script", .id: Html_script, .displayMode: QTextHtmlElement::DisplayNone },
396 { .name: "small", .id: Html_small, .displayMode: QTextHtmlElement::DisplayInline },
397 { .name: "span", .id: Html_span, .displayMode: QTextHtmlElement::DisplayInline },
398 { .name: "strong", .id: Html_strong, .displayMode: QTextHtmlElement::DisplayInline },
399 { .name: "style", .id: Html_style, .displayMode: QTextHtmlElement::DisplayNone },
400 { .name: "sub", .id: Html_sub, .displayMode: QTextHtmlElement::DisplayInline },
401 { .name: "sup", .id: Html_sup, .displayMode: QTextHtmlElement::DisplayInline },
402 { .name: "table", .id: Html_table, .displayMode: QTextHtmlElement::DisplayTable },
403 { .name: "tbody", .id: Html_tbody, .displayMode: QTextHtmlElement::DisplayTable },
404 { .name: "td", .id: Html_td, .displayMode: QTextHtmlElement::DisplayBlock },
405 { .name: "tfoot", .id: Html_tfoot, .displayMode: QTextHtmlElement::DisplayTable },
406 { .name: "th", .id: Html_th, .displayMode: QTextHtmlElement::DisplayBlock },
407 { .name: "thead", .id: Html_thead, .displayMode: QTextHtmlElement::DisplayTable },
408 { .name: "title", .id: Html_title, .displayMode: QTextHtmlElement::DisplayNone },
409 { .name: "tr", .id: Html_tr, .displayMode: QTextHtmlElement::DisplayTable },
410 { .name: "tt", .id: Html_tt, .displayMode: QTextHtmlElement::DisplayInline },
411 { .name: "u", .id: Html_u, .displayMode: QTextHtmlElement::DisplayInline },
412 { .name: "ul", .id: Html_ul, .displayMode: QTextHtmlElement::DisplayBlock },
413 { .name: "var", .id: Html_var, .displayMode: QTextHtmlElement::DisplayInline },
414};
415
416static bool operator<(const QString &str, const QTextHtmlElement &e)
417{
418 return str < QLatin1StringView(e.name);
419}
420
421static bool operator<(const QTextHtmlElement &e, const QString &str)
422{
423 return QLatin1StringView(e.name) < str;
424}
425
426static const QTextHtmlElement *lookupElementHelper(const QString &element)
427{
428 const QTextHtmlElement *start = &elements[0];
429 const QTextHtmlElement *end = &elements[Html_NumElements];
430 const QTextHtmlElement *e = std::lower_bound(first: start, last: end, val: element);
431 if ((e == end) || (element < *e))
432 return nullptr;
433 return e;
434}
435
436int QTextHtmlParser::lookupElement(const QString &element)
437{
438 const QTextHtmlElement *e = lookupElementHelper(element);
439 if (!e)
440 return -1;
441 return e->id;
442}
443
444// quotes newlines as "\\n"
445static QString quoteNewline(const QString &s)
446{
447 QString n = s;
448 n.replace(c: u'\n', after: "\\n"_L1);
449 return n;
450}
451
452QTextHtmlParserNode::QTextHtmlParserNode()
453 : parent(0), id(Html_unknown),
454 cssFloat(QTextFrameFormat::InFlow), hasOwnListStyle(false), hasOwnLineHeightType(false), hasLineHeightMultiplier(false),
455 hasCssListIndent(false), isEmptyParagraph(false), isTextFrame(false), isRootFrame(false),
456 displayMode(QTextHtmlElement::DisplayInline), hasHref(false),
457 listStyle(QTextListFormat::ListStyleUndefined), imageWidth(-1), imageHeight(-1), tableBorder(0),
458 tableCellRowSpan(1), tableCellColSpan(1), tableCellSpacing(2), tableCellPadding(0),
459 borderBrush(Qt::darkGray), borderStyle(QTextFrameFormat::BorderStyle_Outset),
460 borderCollapse(false),
461 userState(-1), cssListIndent(0), wsm(WhiteSpaceModeUndefined)
462{
463 margin[QTextHtmlParser::MarginLeft] = 0;
464 margin[QTextHtmlParser::MarginRight] = 0;
465 margin[QTextHtmlParser::MarginTop] = 0;
466 margin[QTextHtmlParser::MarginBottom] = 0;
467
468 for (int i = 0; i < 4; ++i) {
469 tableCellBorderStyle[i] = QTextFrameFormat::BorderStyle_None;
470 tableCellBorder[i] = 0;
471 tableCellBorderBrush[i] = Qt::NoBrush;
472 }
473}
474
475void QTextHtmlParser::dumpHtml()
476{
477 for (int i = 0; i < count(); ++i) {
478 qDebug().nospace() << qPrintable(QString(depth(i) * 4, u' '))
479 << qPrintable(at(i).tag) << ':'
480 << quoteNewline(s: at(i).text);
481 }
482}
483
484QTextHtmlParserNode *QTextHtmlParser::newNode(int parent)
485{
486 QTextHtmlParserNode *lastNode = nodes.last();
487 QTextHtmlParserNode *newNode = nullptr;
488
489 bool reuseLastNode = true;
490
491 if (nodes.size() == 1) {
492 reuseLastNode = false;
493 } else if (lastNode->tag.isEmpty()) {
494
495 if (lastNode->text.isEmpty()) {
496 reuseLastNode = true;
497 } else { // last node is a text node (empty tag) with some text
498
499 if (lastNode->text.size() == 1 && lastNode->text.at(i: 0).isSpace()) {
500
501 int lastSibling = count() - 2;
502 while (lastSibling
503 && at(i: lastSibling).parent != lastNode->parent
504 && at(i: lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
505 lastSibling = at(i: lastSibling).parent;
506 }
507
508 if (at(i: lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
509 reuseLastNode = false;
510 } else {
511 reuseLastNode = true;
512 }
513 } else {
514 // text node with real (non-whitespace) text -> nothing to re-use
515 reuseLastNode = false;
516 }
517
518 }
519
520 } else {
521 // last node had a proper tag -> nothing to re-use
522 reuseLastNode = false;
523 }
524
525 if (reuseLastNode) {
526 newNode = lastNode;
527 newNode->tag.clear();
528 newNode->text.clear();
529 newNode->id = Html_unknown;
530 } else {
531 nodes.append(t: new QTextHtmlParserNode);
532 newNode = nodes.last();
533 }
534
535 newNode->parent = parent;
536 return newNode;
537}
538
539void QTextHtmlParser::parse(const QString &text, const QTextDocument *_resourceProvider)
540{
541 qDeleteAll(c: nodes);
542 nodes.clear();
543 nodes.append(t: new QTextHtmlParserNode);
544 txt = text;
545 pos = 0;
546 len = txt.size();
547 textEditMode = false;
548 resourceProvider = _resourceProvider;
549 parse();
550 //dumpHtml();
551}
552
553int QTextHtmlParser::depth(int i) const
554{
555 int depth = 0;
556 while (i) {
557 i = at(i).parent;
558 ++depth;
559 }
560 return depth;
561}
562
563int QTextHtmlParser::margin(int i, int mar) const {
564 int m = 0;
565 const QTextHtmlParserNode *node;
566 if (mar == MarginLeft
567 || mar == MarginRight) {
568 while (i) {
569 node = &at(i);
570 if (!node->isBlock() && node->id != Html_table)
571 break;
572 if (node->isTableCell())
573 break;
574 m += node->margin[mar];
575 i = node->parent;
576 }
577 }
578 return m;
579}
580
581int QTextHtmlParser::topMargin(int i) const
582{
583 if (!i)
584 return 0;
585 return at(i).margin[MarginTop];
586}
587
588int QTextHtmlParser::bottomMargin(int i) const
589{
590 if (!i)
591 return 0;
592 return at(i).margin[MarginBottom];
593}
594
595void QTextHtmlParser::eatSpace()
596{
597 while (pos < len && txt.at(i: pos).isSpace() && txt.at(i: pos) != QChar::ParagraphSeparator)
598 pos++;
599}
600
601void QTextHtmlParser::parse()
602{
603 while (pos < len) {
604 QChar c = txt.at(i: pos++);
605 if (c == u'<') {
606 parseTag();
607 } else if (c == u'&') {
608 nodes.last()->text += parseEntity();
609 } else {
610 nodes.last()->text += c;
611 }
612 }
613}
614
615// parses a tag after "<"
616void QTextHtmlParser::parseTag()
617{
618 eatSpace();
619
620 // handle comments and other exclamation mark declarations
621 if (hasPrefix(c: u'!')) {
622 parseExclamationTag();
623 if (nodes.last()->wsm != QTextHtmlParserNode::WhiteSpacePre
624 && nodes.last()->wsm != QTextHtmlParserNode::WhiteSpacePreWrap
625 && !textEditMode)
626 eatSpace();
627 return;
628 }
629
630 // if close tag just close
631 if (hasPrefix(c: u'/')) {
632 if (nodes.last()->id == Html_style) {
633#ifndef QT_NO_CSSPARSER
634 QCss::Parser parser(nodes.constLast()->text);
635 QCss::StyleSheet sheet;
636 sheet.origin = QCss::StyleSheetOrigin_Author;
637 parser.parse(styleSheet: &sheet, nameCaseSensitivity: Qt::CaseInsensitive);
638 inlineStyleSheets.append(t: sheet);
639 resolveStyleSheetImports(sheet);
640#endif
641 }
642 parseCloseTag();
643 return;
644 }
645
646 int p = last();
647 while (p && at(i: p).tag.size() == 0)
648 p = at(i: p).parent;
649
650 QTextHtmlParserNode *node = newNode(parent: p);
651
652 // parse tag name
653 node->tag = parseWord().toLower();
654
655 const QTextHtmlElement *elem = lookupElementHelper(element: node->tag);
656 if (elem) {
657 node->id = elem->id;
658 node->displayMode = elem->displayMode;
659 } else {
660 node->id = Html_unknown;
661 }
662
663 node->attributes.clear();
664 // _need_ at least one space after the tag name, otherwise there can't be attributes
665 if (pos < len && txt.at(i: pos).isSpace())
666 node->attributes = parseAttributes();
667
668 // resolveParent() may have to change the order in the tree and
669 // insert intermediate nodes for buggy HTML, so re-initialize the 'node'
670 // pointer through the return value
671 node = resolveParent();
672 resolveNode();
673
674#ifndef QT_NO_CSSPARSER
675 const int nodeIndex = nodes.size() - 1; // this new node is always the last
676 node->applyCssDeclarations(declarations: declarationsForNode(node: nodeIndex), resourceProvider);
677#endif
678 applyAttributes(attributes: node->attributes);
679
680 // finish tag
681 bool tagClosed = false;
682 while (pos < len && txt.at(i: pos) != u'>') {
683 if (txt.at(i: pos) == u'/')
684 tagClosed = true;
685
686 pos++;
687 }
688 pos++;
689
690 // in a white-space preserving environment strip off a initial newline
691 // since the element itself already generates a newline
692 if ((node->wsm == QTextHtmlParserNode::WhiteSpacePre
693 || node->wsm == QTextHtmlParserNode::WhiteSpacePreWrap
694 || node->wsm == QTextHtmlParserNode::WhiteSpacePreLine)
695 && node->isBlock()) {
696 if (pos < len - 1 && txt.at(i: pos) == u'\n')
697 ++pos;
698 }
699
700 if (node->mayNotHaveChildren() || tagClosed) {
701 newNode(parent: node->parent);
702 resolveNode();
703 }
704}
705
706// parses a tag beginning with "/"
707void QTextHtmlParser::parseCloseTag()
708{
709 ++pos;
710 QString tag = parseWord().toLower().trimmed();
711 while (pos < len) {
712 QChar c = txt.at(i: pos++);
713 if (c == u'>')
714 break;
715 }
716
717 // find corresponding open node
718 int p = last();
719 if (p > 0
720 && at(i: p - 1).tag == tag
721 && at(i: p - 1).mayNotHaveChildren())
722 p--;
723
724 while (p && at(i: p).tag != tag)
725 p = at(i: p).parent;
726
727 // simply ignore the tag if we can't find
728 // a corresponding open node, for broken
729 // html such as <font>blah</font></font>
730 if (!p)
731 return;
732
733 // in a white-space preserving environment strip off a trailing newline
734 // since the closing of the opening block element will automatically result
735 // in a new block for elements following the <pre>
736 // ...foo\n</pre><p>blah -> foo</pre><p>blah
737 if ((at(i: p).wsm == QTextHtmlParserNode::WhiteSpacePre
738 || at(i: p).wsm == QTextHtmlParserNode::WhiteSpacePreWrap
739 || at(i: p).wsm == QTextHtmlParserNode::WhiteSpacePreLine)
740 && at(i: p).isBlock()) {
741 if (at(i: last()).text.endsWith(c: u'\n'))
742 nodes[last()]->text.chop(n: 1);
743 }
744
745 newNode(parent: at(i: p).parent);
746 resolveNode();
747}
748
749// parses a tag beginning with "!"
750void QTextHtmlParser::parseExclamationTag()
751{
752 ++pos;
753 if (hasPrefix(c: u'-') && hasPrefix(c: u'-', lookahead: 1)) {
754 pos += 2;
755 // eat comments
756 int end = txt.indexOf(s: "-->"_L1, from: pos);
757 pos = (end >= 0 ? end + 3 : len);
758 } else {
759 // eat internal tags
760 while (pos < len) {
761 QChar c = txt.at(i: pos++);
762 if (c == u'>')
763 break;
764 }
765 }
766}
767
768QString QTextHtmlParser::parseEntity(QStringView entity)
769{
770 QChar resolved = resolveEntity(entity);
771 if (!resolved.isNull())
772 return QString(resolved);
773
774 if (entity.size() > 1 && entity.at(n: 0) == u'#') {
775 entity = entity.mid(pos: 1); // removing leading #
776
777 int base = 10;
778 bool ok = false;
779
780 if (entity.at(n: 0).toLower() == u'x') { // hex entity?
781 entity = entity.mid(pos: 1);
782 base = 16;
783 }
784
785 uint uc = entity.toUInt(ok: &ok, base);
786 if (ok) {
787 if (uc >= 0x80 && uc < 0x80 + (sizeof(windowsLatin1ExtendedCharacters)/sizeof(windowsLatin1ExtendedCharacters[0])))
788 uc = windowsLatin1ExtendedCharacters[uc - 0x80];
789 return QStringView{QChar::fromUcs4(c: uc)}.toString();
790 }
791 }
792 return {};
793}
794
795// parses an entity after "&", and returns it
796QString QTextHtmlParser::parseEntity()
797{
798 const int recover = pos;
799 int entityLen = 0;
800 while (pos < len) {
801 QChar c = txt.at(i: pos++);
802 if (c.isSpace() || pos - recover > 9) {
803 goto error;
804 }
805 if (c == u';')
806 break;
807 ++entityLen;
808 }
809 if (entityLen) {
810 const QStringView entity = QStringView(txt).mid(pos: recover, n: entityLen);
811 QString parsedEntity = parseEntity(entity);
812 if (!parsedEntity.isNull()) {
813 return parsedEntity;
814 }
815 }
816error:
817 pos = recover;
818 return "&"_L1;
819}
820
821// parses one word, possibly quoted, and returns it
822QString QTextHtmlParser::parseWord()
823{
824 QString word;
825 if (hasPrefix(c: u'\"')) { // double quotes
826 ++pos;
827 while (pos < len) {
828 QChar c = txt.at(i: pos++);
829 if (c == u'\"')
830 break;
831 else if (c == u'&')
832 word += parseEntity();
833 else
834 word += c;
835 }
836 } else if (hasPrefix(c: u'\'')) { // single quotes
837 ++pos;
838 while (pos < len) {
839 QChar c = txt.at(i: pos++);
840 // Allow for escaped single quotes as they may be part of the string
841 if (c == u'\'' && (txt.size() > 1 && txt.at(i: pos - 2) != u'\\'))
842 break;
843 else
844 word += c;
845 }
846 } else { // normal text
847 while (pos < len) {
848 QChar c = txt.at(i: pos++);
849 if (c == u'>' || (c == u'/' && hasPrefix(c: u'>'))
850 || c == u'<' || c == u'=' || c.isSpace()) {
851 --pos;
852 break;
853 }
854 if (c == u'&')
855 word += parseEntity();
856 else
857 word += c;
858 }
859 }
860 return word;
861}
862
863// gives the new node the right parent
864QTextHtmlParserNode *QTextHtmlParser::resolveParent()
865{
866 QTextHtmlParserNode *node = nodes.last();
867
868 int p = node->parent;
869
870 // Excel gives us buggy HTML with just tr without surrounding table tags
871 // or with just td tags
872
873 if (node->id == Html_td) {
874 int n = p;
875 while (n && at(i: n).id != Html_tr)
876 n = at(i: n).parent;
877
878 if (!n) {
879 nodes.insert(i: nodes.size() - 1, t: new QTextHtmlParserNode);
880 nodes.insert(i: nodes.size() - 1, t: new QTextHtmlParserNode);
881
882 QTextHtmlParserNode *table = nodes[nodes.size() - 3];
883 table->parent = p;
884 table->id = Html_table;
885 table->tag = "table"_L1;
886 table->children.append(t: nodes.size() - 2); // add row as child
887
888 QTextHtmlParserNode *row = nodes[nodes.size() - 2];
889 row->parent = nodes.size() - 3; // table as parent
890 row->id = Html_tr;
891 row->tag = "tr"_L1;
892
893 p = nodes.size() - 2;
894 node = nodes.last(); // re-initialize pointer
895 }
896 }
897
898 if (node->id == Html_tr) {
899 int n = p;
900 while (n && at(i: n).id != Html_table)
901 n = at(i: n).parent;
902
903 if (!n) {
904 nodes.insert(i: nodes.size() - 1, t: new QTextHtmlParserNode);
905 QTextHtmlParserNode *table = nodes[nodes.size() - 2];
906 table->parent = p;
907 table->id = Html_table;
908 table->tag = "table"_L1;
909 p = nodes.size() - 2;
910 node = nodes.last(); // re-initialize pointer
911 }
912 }
913
914 // permit invalid html by letting block elements be children
915 // of inline elements with the exception of paragraphs:
916 //
917 // a new paragraph closes parent inline elements (while loop),
918 // unless they themselves are children of a non-paragraph block
919 // element (if statement)
920 //
921 // For example:
922 //
923 // <body><p><b>Foo<p>Bar <-- second <p> implicitly closes <b> that
924 // belongs to the first <p>. The self-nesting
925 // check further down prevents the second <p>
926 // from nesting into the first one then.
927 // so Bar is not bold.
928 //
929 // <body><b><p>Foo <-- Foo should be bold.
930 //
931 // <body><b><p>Foo<p>Bar <-- Foo and Bar should be bold.
932 //
933 if (node->id == Html_p) {
934 while (p && !at(i: p).isBlock())
935 p = at(i: p).parent;
936
937 if (!p || at(i: p).id != Html_p)
938 p = node->parent;
939 }
940
941 // some elements are not self nesting
942 if (node->id == at(i: p).id
943 && node->isNotSelfNesting())
944 p = at(i: p).parent;
945
946 // some elements are not allowed in certain contexts
947 while ((p && !node->allowedInContext(parentId: at(i: p).id))
948 // ### make new styles aware of empty tags
949 || at(i: p).mayNotHaveChildren()
950 ) {
951 p = at(i: p).parent;
952 }
953
954 node->parent = p;
955
956 // makes it easier to traverse the tree, later
957 nodes[p]->children.append(t: nodes.size() - 1);
958 return node;
959}
960
961// sets all properties on the new node
962void QTextHtmlParser::resolveNode()
963{
964 QTextHtmlParserNode *node = nodes.last();
965 const QTextHtmlParserNode *parent = nodes.at(i: node->parent);
966 node->initializeProperties(parent, parser: this);
967}
968
969bool QTextHtmlParserNode::isNestedList(const QTextHtmlParser *parser) const
970{
971 if (!isListStart())
972 return false;
973
974 int p = parent;
975 while (p) {
976 if (parser->at(i: p).isListStart())
977 return true;
978 p = parser->at(i: p).parent;
979 }
980 return false;
981}
982
983void QTextHtmlParserNode::initializeProperties(const QTextHtmlParserNode *parent, const QTextHtmlParser *parser)
984{
985 // inherit properties from parent element
986 charFormat = parent->charFormat;
987
988 if (id == Html_html)
989 blockFormat.setLayoutDirection(Qt::LeftToRight); // HTML default
990 else if (parent->blockFormat.hasProperty(propertyId: QTextFormat::LayoutDirection))
991 blockFormat.setLayoutDirection(parent->blockFormat.layoutDirection());
992
993 if (parent->displayMode == QTextHtmlElement::DisplayNone)
994 displayMode = QTextHtmlElement::DisplayNone;
995
996 if (parent->id != Html_table || id == Html_caption) {
997 if (parent->blockFormat.hasProperty(propertyId: QTextFormat::BlockAlignment))
998 blockFormat.setAlignment(parent->blockFormat.alignment());
999 else
1000 blockFormat.clearProperty(propertyId: QTextFormat::BlockAlignment);
1001 }
1002 // we don't paint per-row background colors, yet. so as an
1003 // exception inherit the background color here
1004 // we also inherit the background between inline elements
1005 // we also inherit from non-body block elements since we merge them together
1006 if ((parent->id != Html_tr || !isTableCell())
1007 && (displayMode != QTextHtmlElement::DisplayInline || parent->displayMode != QTextHtmlElement::DisplayInline)
1008 && (parent->id == Html_body || displayMode != QTextHtmlElement::DisplayBlock || parent->displayMode != QTextHtmlElement::DisplayBlock)
1009 ) {
1010 charFormat.clearProperty(propertyId: QTextFormat::BackgroundBrush);
1011 }
1012
1013 listStyle = parent->listStyle;
1014 // makes no sense to inherit that property, a named anchor is a single point
1015 // in the document, which is set by the DocumentFragment
1016 charFormat.clearProperty(propertyId: QTextFormat::AnchorName);
1017 wsm = parent->wsm;
1018
1019 // initialize remaining properties
1020 margin[QTextHtmlParser::MarginLeft] = 0;
1021 margin[QTextHtmlParser::MarginRight] = 0;
1022 margin[QTextHtmlParser::MarginTop] = 0;
1023 margin[QTextHtmlParser::MarginBottom] = 0;
1024 cssFloat = QTextFrameFormat::InFlow;
1025
1026 for (int i = 0; i < 4; ++i)
1027 padding[i] = -1;
1028
1029 // set element specific attributes
1030 switch (id) {
1031 case Html_a:
1032 for (int i = 0; i < attributes.size(); i += 2) {
1033 const QString key = attributes.at(i);
1034 if (key.compare(other: "href"_L1, cs: Qt::CaseInsensitive) == 0
1035 && !attributes.at(i: i + 1).isEmpty()) {
1036 hasHref = true;
1037 }
1038 }
1039 charFormat.setAnchor(true);
1040 break;
1041 case Html_big:
1042 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(1));
1043 break;
1044 case Html_small:
1045 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(-1));
1046 break;
1047 case Html_h1:
1048 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(3));
1049 margin[QTextHtmlParser::MarginTop] = 18;
1050 margin[QTextHtmlParser::MarginBottom] = 12;
1051 break;
1052 case Html_h2:
1053 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(2));
1054 margin[QTextHtmlParser::MarginTop] = 16;
1055 margin[QTextHtmlParser::MarginBottom] = 12;
1056 break;
1057 case Html_h3:
1058 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(1));
1059 margin[QTextHtmlParser::MarginTop] = 14;
1060 margin[QTextHtmlParser::MarginBottom] = 12;
1061 break;
1062 case Html_h4:
1063 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(0));
1064 margin[QTextHtmlParser::MarginTop] = 12;
1065 margin[QTextHtmlParser::MarginBottom] = 12;
1066 break;
1067 case Html_h5:
1068 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(-1));
1069 margin[QTextHtmlParser::MarginTop] = 12;
1070 margin[QTextHtmlParser::MarginBottom] = 4;
1071 break;
1072 case Html_p:
1073 margin[QTextHtmlParser::MarginTop] = 12;
1074 margin[QTextHtmlParser::MarginBottom] = 12;
1075 break;
1076 case Html_ul:
1077 // nested lists don't have margins, except for the toplevel one
1078 if (!isNestedList(parser)) {
1079 margin[QTextHtmlParser::MarginTop] = 12;
1080 margin[QTextHtmlParser::MarginBottom] = 12;
1081 }
1082 // no left margin as we use indenting instead
1083 break;
1084 case Html_ol:
1085 // nested lists don't have margins, except for the toplevel one
1086 if (!isNestedList(parser)) {
1087 margin[QTextHtmlParser::MarginTop] = 12;
1088 margin[QTextHtmlParser::MarginBottom] = 12;
1089 }
1090 // no left margin as we use indenting instead
1091 break;
1092 case Html_br:
1093 text = QChar(QChar::LineSeparator);
1094 break;
1095 case Html_pre:
1096 margin[QTextHtmlParser::MarginTop] = 12;
1097 margin[QTextHtmlParser::MarginBottom] = 12;
1098 break;
1099 case Html_blockquote:
1100 margin[QTextHtmlParser::MarginTop] = 12;
1101 margin[QTextHtmlParser::MarginBottom] = 12;
1102 margin[QTextHtmlParser::MarginLeft] = 40;
1103 margin[QTextHtmlParser::MarginRight] = 40;
1104 blockFormat.setProperty(propertyId: QTextFormat::BlockQuoteLevel, value: 1);
1105 break;
1106 case Html_dl:
1107 margin[QTextHtmlParser::MarginTop] = 8;
1108 margin[QTextHtmlParser::MarginBottom] = 8;
1109 break;
1110 case Html_dd:
1111 margin[QTextHtmlParser::MarginLeft] = 30;
1112 break;
1113 default: break;
1114 }
1115}
1116
1117#ifndef QT_NO_CSSPARSER
1118void QTextHtmlParserNode::setListStyle(const QList<QCss::Value> &cssValues)
1119{
1120 for (int i = 0; i < cssValues.size(); ++i) {
1121 if (cssValues.at(i).type == QCss::Value::KnownIdentifier) {
1122 switch (static_cast<QCss::KnownValue>(cssValues.at(i).variant.toInt())) {
1123 case QCss::Value_None: hasOwnListStyle = true; listStyle = QTextListFormat::ListStyleUndefined; break;
1124 case QCss::Value_Disc: hasOwnListStyle = true; listStyle = QTextListFormat::ListDisc; break;
1125 case QCss::Value_Square: hasOwnListStyle = true; listStyle = QTextListFormat::ListSquare; break;
1126 case QCss::Value_Circle: hasOwnListStyle = true; listStyle = QTextListFormat::ListCircle; break;
1127 case QCss::Value_Decimal: hasOwnListStyle = true; listStyle = QTextListFormat::ListDecimal; break;
1128 case QCss::Value_LowerAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerAlpha; break;
1129 case QCss::Value_UpperAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperAlpha; break;
1130 case QCss::Value_LowerRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerRoman; break;
1131 case QCss::Value_UpperRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperRoman; break;
1132 default: break;
1133 }
1134 }
1135 }
1136 // allow individual list items to override the style
1137 if (id == Html_li && hasOwnListStyle)
1138 blockFormat.setProperty(propertyId: QTextFormat::ListStyle, value: listStyle);
1139}
1140
1141static QTextFrameFormat::BorderStyle toQTextFrameFormat(QCss::BorderStyle cssStyle)
1142{
1143 switch (cssStyle) {
1144 case QCss::BorderStyle::BorderStyle_Dotted:
1145 return QTextFrameFormat::BorderStyle::BorderStyle_Dotted;
1146 case QCss::BorderStyle::BorderStyle_Dashed:
1147 return QTextFrameFormat::BorderStyle::BorderStyle_Dashed;
1148 case QCss::BorderStyle::BorderStyle_Solid:
1149 return QTextFrameFormat::BorderStyle::BorderStyle_Solid;
1150 case QCss::BorderStyle::BorderStyle_Double:
1151 return QTextFrameFormat::BorderStyle::BorderStyle_Double;
1152 case QCss::BorderStyle::BorderStyle_DotDash:
1153 return QTextFrameFormat::BorderStyle::BorderStyle_DotDash;
1154 case QCss::BorderStyle::BorderStyle_DotDotDash:
1155 return QTextFrameFormat::BorderStyle::BorderStyle_DotDotDash;
1156 case QCss::BorderStyle::BorderStyle_Groove:
1157 return QTextFrameFormat::BorderStyle::BorderStyle_Groove;
1158 case QCss::BorderStyle::BorderStyle_Ridge:
1159 return QTextFrameFormat::BorderStyle::BorderStyle_Ridge;
1160 case QCss::BorderStyle::BorderStyle_Inset:
1161 return QTextFrameFormat::BorderStyle::BorderStyle_Inset;
1162 case QCss::BorderStyle::BorderStyle_Outset:
1163 return QTextFrameFormat::BorderStyle::BorderStyle_Outset;
1164 case QCss::BorderStyle::BorderStyle_Unknown:
1165 case QCss::BorderStyle::BorderStyle_None:
1166 case QCss::BorderStyle::BorderStyle_Native:
1167 return QTextFrameFormat::BorderStyle::BorderStyle_None;
1168 case QCss::BorderStyle::NumKnownBorderStyles:
1169 break;
1170 // Intentionally no "default" to allow a compiler warning when extending the enum
1171 // without updating this here. clang gives such a warning.
1172 }
1173 // Must not happen, intentionally trigger undefined behavior which sanitizers will detect.
1174 // Having all cases covered in switch is not sufficient:
1175 // MSVC would warn when there is no "default".
1176 return static_cast<QTextFrameFormat::BorderStyle>(-1);
1177}
1178
1179void QTextHtmlParserNode::applyCssDeclarations(const QList<QCss::Declaration> &declarations, const QTextDocument *resourceProvider)
1180{
1181 QCss::ValueExtractor extractor(declarations);
1182 extractor.extractBox(margins: margin, paddings: padding);
1183
1184 if (id == Html_td || id == Html_th) {
1185 QCss::BorderStyle cssStyles[4];
1186 int cssBorder[4];
1187 QSize cssRadii[4]; // unused
1188 for (int i = 0; i < 4; ++i) {
1189 cssStyles[i] = QCss::BorderStyle_None;
1190 cssBorder[i] = 0;
1191 }
1192 // this will parse (and cache) "border-width" as a list so the
1193 // QCss::BorderWidth parsing below which expects a single value
1194 // will not work as expected - which in this case does not matter
1195 // because tableBorder is not relevant for cells.
1196 extractor.extractBorder(borders: cssBorder, colors: tableCellBorderBrush, Styles: cssStyles, radii: cssRadii);
1197 for (int i = 0; i < 4; ++i) {
1198 tableCellBorderStyle[i] = toQTextFrameFormat(cssStyle: cssStyles[i]);
1199 tableCellBorder[i] = static_cast<qreal>(cssBorder[i]);
1200 }
1201 }
1202
1203 for (int i = 0; i < declarations.size(); ++i) {
1204 const QCss::Declaration &decl = declarations.at(i);
1205 if (decl.d->values.isEmpty()) continue;
1206
1207 QCss::KnownValue identifier = QCss::UnknownValue;
1208 if (decl.d->values.first().type == QCss::Value::KnownIdentifier)
1209 identifier = static_cast<QCss::KnownValue>(decl.d->values.first().variant.toInt());
1210
1211 switch (decl.d->propertyId) {
1212 case QCss::BorderColor: borderBrush = QBrush(decl.colorValue()); break;
1213 case QCss::BorderStyles:
1214 if (decl.styleValue() != QCss::BorderStyle_Unknown && decl.styleValue() != QCss::BorderStyle_Native)
1215 borderStyle = static_cast<QTextFrameFormat::BorderStyle>(decl.styleValue() - 1);
1216 break;
1217 case QCss::BorderWidth: {
1218 int borders[4];
1219 extractor.lengthValues(decl, m: borders);
1220 tableBorder = borders[0];
1221 }
1222 break;
1223 case QCss::BorderCollapse:
1224 borderCollapse = decl.borderCollapseValue();
1225 break;
1226 case QCss::Color: charFormat.setForeground(decl.colorValue()); break;
1227 case QCss::Float:
1228 cssFloat = QTextFrameFormat::InFlow;
1229 switch (identifier) {
1230 case QCss::Value_Left: cssFloat = QTextFrameFormat::FloatLeft; break;
1231 case QCss::Value_Right: cssFloat = QTextFrameFormat::FloatRight; break;
1232 default: break;
1233 }
1234 break;
1235 case QCss::QtBlockIndent:
1236 blockFormat.setIndent(decl.d->values.first().variant.toInt());
1237 break;
1238 case QCss::QtLineHeightType: {
1239 QString lineHeightTypeName = decl.d->values.first().variant.toString();
1240 QTextBlockFormat::LineHeightTypes lineHeightType;
1241 if (lineHeightTypeName.compare(other: "proportional"_L1, cs: Qt::CaseInsensitive) == 0)
1242 lineHeightType = QTextBlockFormat::ProportionalHeight;
1243 else if (lineHeightTypeName.compare(other: "fixed"_L1, cs: Qt::CaseInsensitive) == 0)
1244 lineHeightType = QTextBlockFormat::FixedHeight;
1245 else if (lineHeightTypeName.compare(other: "minimum"_L1, cs: Qt::CaseInsensitive) == 0)
1246 lineHeightType = QTextBlockFormat::MinimumHeight;
1247 else if (lineHeightTypeName.compare(other: "line-distance"_L1, cs: Qt::CaseInsensitive) == 0)
1248 lineHeightType = QTextBlockFormat::LineDistanceHeight;
1249 else
1250 lineHeightType = QTextBlockFormat::SingleHeight;
1251
1252 if (hasLineHeightMultiplier) {
1253 qreal lineHeight = blockFormat.lineHeight() / 100.0;
1254 blockFormat.setProperty(propertyId: QTextBlockFormat::LineHeight, value: lineHeight);
1255 }
1256
1257 blockFormat.setProperty(propertyId: QTextBlockFormat::LineHeightType, value: lineHeightType);
1258 hasOwnLineHeightType = true;
1259 }
1260 break;
1261 case QCss::LineHeight: {
1262 qreal lineHeight;
1263 QTextBlockFormat::LineHeightTypes lineHeightType;
1264 if (decl.realValue(r: &lineHeight, unit: "px")) {
1265 lineHeightType = QTextBlockFormat::MinimumHeight;
1266 } else {
1267 bool ok;
1268 QCss::Value cssValue = decl.d->values.first();
1269 QString value = cssValue.toString();
1270 lineHeight = value.toDouble(ok: &ok);
1271 if (ok) {
1272 if (!hasOwnLineHeightType && cssValue.type == QCss::Value::Number) {
1273 lineHeight *= 100.0;
1274 hasLineHeightMultiplier = true;
1275 }
1276 lineHeightType = QTextBlockFormat::ProportionalHeight;
1277 } else {
1278 lineHeight = 0.0;
1279 lineHeightType = QTextBlockFormat::SingleHeight;
1280 }
1281 }
1282
1283 // Only override line height type if specified in same node
1284 if (hasOwnLineHeightType)
1285 lineHeightType = QTextBlockFormat::LineHeightTypes(blockFormat.lineHeightType());
1286
1287 blockFormat.setLineHeight(height: lineHeight, heightType: lineHeightType);
1288 break;
1289 }
1290 case QCss::TextIndent: {
1291 qreal indent = 0;
1292 if (decl.realValue(r: &indent, unit: "px"))
1293 blockFormat.setTextIndent(indent);
1294 break; }
1295 case QCss::QtListIndent:
1296 if (decl.intValue(i: &cssListIndent))
1297 hasCssListIndent = true;
1298 break;
1299 case QCss::QtParagraphType:
1300 if (decl.d->values.first().variant.toString().compare(other: "empty"_L1, cs: Qt::CaseInsensitive) == 0)
1301 isEmptyParagraph = true;
1302 break;
1303 case QCss::QtTableType:
1304 if (decl.d->values.first().variant.toString().compare(other: "frame"_L1, cs: Qt::CaseInsensitive) == 0)
1305 isTextFrame = true;
1306 else if (decl.d->values.first().variant.toString().compare(other: "root"_L1, cs: Qt::CaseInsensitive) == 0) {
1307 isTextFrame = true;
1308 isRootFrame = true;
1309 }
1310 break;
1311 case QCss::QtUserState:
1312 userState = decl.d->values.first().variant.toInt();
1313 break;
1314 case QCss::Whitespace:
1315 switch (identifier) {
1316 case QCss::Value_Normal: wsm = QTextHtmlParserNode::WhiteSpaceNormal; break;
1317 case QCss::Value_Pre: wsm = QTextHtmlParserNode::WhiteSpacePre; break;
1318 case QCss::Value_NoWrap: wsm = QTextHtmlParserNode::WhiteSpaceNoWrap; break;
1319 case QCss::Value_PreWrap: wsm = QTextHtmlParserNode::WhiteSpacePreWrap; break;
1320 case QCss::Value_PreLine: wsm = QTextHtmlParserNode::WhiteSpacePreLine; break;
1321 default: break;
1322 }
1323 break;
1324 case QCss::VerticalAlignment:
1325 switch (identifier) {
1326 case QCss::Value_Sub: charFormat.setVerticalAlignment(QTextCharFormat::AlignSubScript); break;
1327 case QCss::Value_Super: charFormat.setVerticalAlignment(QTextCharFormat::AlignSuperScript); break;
1328 case QCss::Value_Middle: charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle); break;
1329 case QCss::Value_Top: charFormat.setVerticalAlignment(QTextCharFormat::AlignTop); break;
1330 case QCss::Value_Bottom: charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom); break;
1331 default: charFormat.setVerticalAlignment(QTextCharFormat::AlignNormal); break;
1332 }
1333 break;
1334 case QCss::PageBreakBefore:
1335 switch (identifier) {
1336 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysBefore); break;
1337 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysBefore); break;
1338 default: break;
1339 }
1340 break;
1341 case QCss::PageBreakAfter:
1342 switch (identifier) {
1343 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysAfter); break;
1344 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysAfter); break;
1345 default: break;
1346 }
1347 break;
1348 case QCss::TextUnderlineStyle:
1349 switch (identifier) {
1350 case QCss::Value_None: charFormat.setUnderlineStyle(QTextCharFormat::NoUnderline); break;
1351 case QCss::Value_Solid: charFormat.setUnderlineStyle(QTextCharFormat::SingleUnderline); break;
1352 case QCss::Value_Dashed: charFormat.setUnderlineStyle(QTextCharFormat::DashUnderline); break;
1353 case QCss::Value_Dotted: charFormat.setUnderlineStyle(QTextCharFormat::DotLine); break;
1354 case QCss::Value_DotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotLine); break;
1355 case QCss::Value_DotDotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotDotLine); break;
1356 case QCss::Value_Wave: charFormat.setUnderlineStyle(QTextCharFormat::WaveUnderline); break;
1357 default: break;
1358 }
1359 break;
1360 case QCss::TextDecorationColor: charFormat.setUnderlineColor(decl.colorValue()); break;
1361 case QCss::ListStyleType:
1362 case QCss::ListStyle:
1363 setListStyle(decl.d->values);
1364 break;
1365 case QCss::QtListNumberPrefix:
1366 textListNumberPrefix = decl.d->values.first().variant.toString();
1367 break;
1368 case QCss::QtListNumberSuffix:
1369 textListNumberSuffix = decl.d->values.first().variant.toString();
1370 break;
1371 case QCss::TextAlignment:
1372 switch (identifier) {
1373 case QCss::Value_Left: blockFormat.setAlignment(Qt::AlignLeft); break;
1374 case QCss::Value_Center: blockFormat.setAlignment(Qt::AlignCenter); break;
1375 case QCss::Value_Right: blockFormat.setAlignment(Qt::AlignRight); break;
1376 default: break;
1377 }
1378 break;
1379
1380 case QCss::QtForegroundTextureCacheKey:
1381 {
1382 if (resourceProvider != nullptr && QTextDocumentPrivate::get(document: resourceProvider) != nullptr) {
1383 bool ok;
1384 qint64 searchKey = decl.d->values.first().variant.toLongLong(ok: &ok);
1385 if (ok)
1386 applyForegroundImage(cacheKey: searchKey, resourceProvider);
1387 }
1388 break;
1389 }
1390 default: break;
1391 }
1392 }
1393
1394 QFont f;
1395 int adjustment = -255;
1396 extractor.extractFont(font: &f, fontSizeAdjustment: &adjustment);
1397 if (f.pixelSize() > INT32_MAX / 2)
1398 f.setPixelSize(INT32_MAX / 2); // avoid even more extreme values
1399 charFormat.setFont(font: f, behavior: QTextCharFormat::FontPropertiesSpecifiedOnly);
1400
1401 if (adjustment >= -1)
1402 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: adjustment);
1403
1404 {
1405 Qt::Alignment ignoredAlignment;
1406 QCss::Repeat ignoredRepeat;
1407 QString bgImage;
1408 QBrush bgBrush;
1409 QCss::Origin ignoredOrigin, ignoredClip;
1410 QCss::Attachment ignoredAttachment;
1411 extractor.extractBackground(&bgBrush, &bgImage, &ignoredRepeat, &ignoredAlignment,
1412 &ignoredOrigin, &ignoredAttachment, &ignoredClip);
1413
1414 if (!bgImage.isEmpty() && resourceProvider) {
1415 applyBackgroundImage(url: bgImage, resourceProvider);
1416 } else if (bgBrush.style() != Qt::NoBrush) {
1417 charFormat.setBackground(bgBrush);
1418 if (id == Html_hr)
1419 blockFormat.setProperty(propertyId: QTextFormat::BackgroundBrush, value: bgBrush);
1420 }
1421 }
1422}
1423
1424#endif // QT_NO_CSSPARSER
1425
1426void QTextHtmlParserNode::applyForegroundImage(qint64 searchKey, const QTextDocument *resourceProvider)
1427{
1428 const QTextDocumentPrivate *priv = QTextDocumentPrivate::get(document: resourceProvider);
1429 for (int i = 0; i < priv->formats.numFormats(); ++i) {
1430 QTextCharFormat format = priv->formats.charFormat(index: i);
1431 if (format.isValid()) {
1432 QBrush brush = format.foreground();
1433 if (brush.style() == Qt::TexturePattern) {
1434 const bool isPixmap = qHasPixmapTexture(brush);
1435
1436 if (isPixmap && QCoreApplication::instance()->thread() != QThread::currentThread()) {
1437 qWarning(msg: "Can't apply QPixmap outside of GUI thread");
1438 return;
1439 }
1440
1441 const qint64 cacheKey = isPixmap ? brush.texture().cacheKey() : brush.textureImage().cacheKey();
1442 if (cacheKey == searchKey) {
1443 QBrush b;
1444 if (isPixmap)
1445 b.setTexture(brush.texture());
1446 else
1447 b.setTextureImage(brush.textureImage());
1448 b.setStyle(Qt::TexturePattern);
1449 charFormat.setForeground(b);
1450 }
1451 }
1452 }
1453 }
1454
1455}
1456
1457void QTextHtmlParserNode::applyBackgroundImage(const QString &url, const QTextDocument *resourceProvider)
1458{
1459 if (!url.isEmpty() && resourceProvider) {
1460 QVariant val = resourceProvider->resource(type: QTextDocument::ImageResource, name: url);
1461
1462 if (QCoreApplication::instance()->thread() != QThread::currentThread()) {
1463 // must use images in non-GUI threads
1464 if (val.userType() == QMetaType::QImage) {
1465 QImage image = qvariant_cast<QImage>(v: val);
1466 charFormat.setBackground(image);
1467 } else if (val.userType() == QMetaType::QByteArray) {
1468 QImage image;
1469 if (image.loadFromData(data: val.toByteArray())) {
1470 charFormat.setBackground(image);
1471 }
1472 }
1473 } else {
1474 if (val.userType() == QMetaType::QImage || val.userType() == QMetaType::QPixmap) {
1475 charFormat.setBackground(qvariant_cast<QPixmap>(v: val));
1476 } else if (val.userType() == QMetaType::QByteArray) {
1477 QPixmap pm;
1478 if (pm.loadFromData(buf: val.toByteArray())) {
1479 charFormat.setBackground(pm);
1480 }
1481 }
1482 }
1483 }
1484 if (!url.isEmpty())
1485 charFormat.setProperty(propertyId: QTextFormat::BackgroundImageUrl, value: url);
1486}
1487
1488bool QTextHtmlParserNode::hasOnlyWhitespace() const
1489{
1490 for (int i = 0; i < text.size(); ++i)
1491 if (!text.at(i).isSpace() || text.at(i) == QChar::LineSeparator)
1492 return false;
1493 return true;
1494}
1495
1496static bool setIntAttribute(int *destination, const QString &value)
1497{
1498 bool ok = false;
1499 int val = value.toInt(ok: &ok);
1500 if (ok)
1501 *destination = val;
1502
1503 return ok;
1504}
1505
1506static bool setFloatAttribute(qreal *destination, const QString &value)
1507{
1508 bool ok = false;
1509 qreal val = value.toDouble(ok: &ok);
1510 if (ok)
1511 *destination = val;
1512
1513 return ok;
1514}
1515
1516static void setWidthAttribute(QTextLength *width, const QString &valueStr)
1517{
1518 bool ok = false;
1519 qreal realVal = valueStr.toDouble(ok: &ok);
1520 if (ok) {
1521 *width = QTextLength(QTextLength::FixedLength, realVal);
1522 } else {
1523 auto value = QStringView(valueStr).trimmed();
1524 if (!value.isEmpty() && value.endsWith(c: u'%')) {
1525 value.truncate(n: value.size() - 1);
1526 realVal = value.toDouble(ok: &ok);
1527 if (ok)
1528 *width = QTextLength(QTextLength::PercentageLength, realVal);
1529 }
1530 }
1531}
1532
1533#ifndef QT_NO_CSSPARSER
1534void QTextHtmlParserNode::parseStyleAttribute(const QString &value, const QTextDocument *resourceProvider)
1535{
1536 const QString css = "* {"_L1 + value + u'}';
1537 QCss::Parser parser(css);
1538 QCss::StyleSheet sheet;
1539 parser.parse(styleSheet: &sheet, nameCaseSensitivity: Qt::CaseInsensitive);
1540 if (sheet.styleRules.size() != 1) return;
1541 applyCssDeclarations(declarations: sheet.styleRules.at(i: 0).declarations, resourceProvider);
1542}
1543#endif
1544
1545QStringList QTextHtmlParser::parseAttributes()
1546{
1547 QStringList attrs;
1548
1549 while (pos < len) {
1550 eatSpace();
1551 if (hasPrefix(c: u'>') || hasPrefix(c: u'/'))
1552 break;
1553 QString key = parseWord().toLower();
1554 QString value = "1"_L1;
1555 if (key.size() == 0)
1556 break;
1557 eatSpace();
1558 if (hasPrefix(c: u'=')){
1559 pos++;
1560 eatSpace();
1561 value = parseWord();
1562 }
1563 if (value.size() == 0)
1564 continue;
1565 attrs << key << value;
1566 }
1567
1568 return attrs;
1569}
1570
1571void QTextHtmlParser::applyAttributes(const QStringList &attributes)
1572{
1573 // local state variable for qt3 textedit mode
1574 bool seenQt3Richtext = false;
1575 QString linkHref;
1576 QString linkType;
1577
1578 if (attributes.size() % 2 == 1)
1579 return;
1580
1581 QTextHtmlParserNode *node = nodes.last();
1582
1583 for (int i = 0; i < attributes.size(); i += 2) {
1584 QString key = attributes.at(i);
1585 QString value = attributes.at(i: i + 1);
1586
1587 switch (node->id) {
1588 case Html_font:
1589 // the infamous font tag
1590 if (key == "size"_L1 && value.size()) {
1591 int n = value.toInt();
1592 if (value.at(i: 0) != u'+' && value.at(i: 0) != u'-')
1593 n -= 3;
1594 node->charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: n);
1595 } else if (key == "face"_L1) {
1596 if (value.contains(c: u',')) {
1597 const QStringList values = value.split(sep: u',');
1598 QStringList families;
1599 for (const QString &family : values)
1600 families << family.trimmed();
1601 node->charFormat.setFontFamilies(families);
1602 } else {
1603 node->charFormat.setFontFamilies(QStringList(value));
1604 }
1605 } else if (key == "color"_L1) {
1606 QColor c = QColor::fromString(name: value);
1607 if (!c.isValid())
1608 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1609 node->charFormat.setForeground(c);
1610 }
1611 break;
1612 case Html_ol:
1613 case Html_ul:
1614 if (key == "type"_L1) {
1615 node->hasOwnListStyle = true;
1616 if (value == "1"_L1) {
1617 node->listStyle = QTextListFormat::ListDecimal;
1618 } else if (value == "a"_L1) {
1619 node->listStyle = QTextListFormat::ListLowerAlpha;
1620 } else if (value == "A"_L1) {
1621 node->listStyle = QTextListFormat::ListUpperAlpha;
1622 } else if (value == "i"_L1) {
1623 node->listStyle = QTextListFormat::ListLowerRoman;
1624 } else if (value == "I"_L1) {
1625 node->listStyle = QTextListFormat::ListUpperRoman;
1626 } else {
1627 value = std::move(value).toLower();
1628 if (value == "square"_L1)
1629 node->listStyle = QTextListFormat::ListSquare;
1630 else if (value == "disc"_L1)
1631 node->listStyle = QTextListFormat::ListDisc;
1632 else if (value == "circle"_L1)
1633 node->listStyle = QTextListFormat::ListCircle;
1634 else if (value == "none"_L1)
1635 node->listStyle = QTextListFormat::ListStyleUndefined;
1636 }
1637 } else if (key == "start"_L1) {
1638 setIntAttribute(destination: &node->listStart, value);
1639 }
1640 break;
1641 case Html_li:
1642 if (key == "class"_L1) {
1643 if (value == "unchecked"_L1)
1644 node->blockFormat.setMarker(QTextBlockFormat::MarkerType::Unchecked);
1645 else if (value == "checked"_L1)
1646 node->blockFormat.setMarker(QTextBlockFormat::MarkerType::Checked);
1647 }
1648 break;
1649 case Html_a:
1650 if (key == "href"_L1)
1651 node->charFormat.setAnchorHref(value);
1652 else if (key == "name"_L1)
1653 node->charFormat.setAnchorNames({value});
1654 break;
1655 case Html_img:
1656 if (key == "src"_L1 || key == "source"_L1) {
1657 node->imageName = value;
1658 } else if (key == "width"_L1) {
1659 node->imageWidth = -2; // register that there is a value for it.
1660 setFloatAttribute(destination: &node->imageWidth, value);
1661 } else if (key == "height"_L1) {
1662 node->imageHeight = -2; // register that there is a value for it.
1663 setFloatAttribute(destination: &node->imageHeight, value);
1664 } else if (key == "alt"_L1) {
1665 node->imageAlt = value;
1666 } else if (key == "title"_L1) {
1667 node->text = value;
1668 }
1669 break;
1670 case Html_tr:
1671 case Html_body:
1672 if (key == "bgcolor"_L1) {
1673 QColor c = QColor::fromString(name: value);
1674 if (!c.isValid())
1675 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1676 node->charFormat.setBackground(c);
1677 } else if (key == "background"_L1) {
1678 node->applyBackgroundImage(url: value, resourceProvider);
1679 }
1680 break;
1681 case Html_th:
1682 case Html_td:
1683 if (key == "width"_L1) {
1684 setWidthAttribute(width: &node->width, valueStr: value);
1685 } else if (key == "bgcolor"_L1) {
1686 QColor c = QColor::fromString(name: value);
1687 if (!c.isValid())
1688 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1689 node->charFormat.setBackground(c);
1690 } else if (key == "background"_L1) {
1691 node->applyBackgroundImage(url: value, resourceProvider);
1692 } else if (key == "rowspan"_L1) {
1693 if (setIntAttribute(destination: &node->tableCellRowSpan, value))
1694 node->tableCellRowSpan = qMax(a: 1, b: node->tableCellRowSpan);
1695 } else if (key == "colspan"_L1) {
1696 if (setIntAttribute(destination: &node->tableCellColSpan, value))
1697 node->tableCellColSpan = qBound(min: 1, val: node->tableCellColSpan, max: 20480);
1698 }
1699 break;
1700 case Html_table:
1701 if (key == "border"_L1) {
1702 setFloatAttribute(destination: &node->tableBorder, value);
1703 } else if (key == "bgcolor"_L1) {
1704 QColor c = QColor::fromString(name: value);
1705 if (!c.isValid())
1706 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1707 node->charFormat.setBackground(c);
1708 } else if (key == "bordercolor"_L1) {
1709 QColor c = QColor::fromString(name: value);
1710 if (!c.isValid())
1711 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1712 node->borderBrush = c;
1713 } else if (key == "background"_L1) {
1714 node->applyBackgroundImage(url: value, resourceProvider);
1715 } else if (key == "cellspacing"_L1) {
1716 setFloatAttribute(destination: &node->tableCellSpacing, value);
1717 } else if (key == "cellpadding"_L1) {
1718 setFloatAttribute(destination: &node->tableCellPadding, value);
1719 } else if (key == "width"_L1) {
1720 setWidthAttribute(width: &node->width, valueStr: value);
1721 } else if (key == "height"_L1) {
1722 setWidthAttribute(width: &node->height, valueStr: value);
1723 }
1724 break;
1725 case Html_meta:
1726 if (key == "name"_L1 && value == "qrichtext"_L1)
1727 seenQt3Richtext = true;
1728
1729 if (key == "content"_L1 && value == "1"_L1 && seenQt3Richtext)
1730 textEditMode = true;
1731 break;
1732 case Html_hr:
1733 if (key == "width"_L1)
1734 setWidthAttribute(width: &node->width, valueStr: value);
1735 break;
1736 case Html_link:
1737 if (key == "href"_L1)
1738 linkHref = value;
1739 else if (key == "type"_L1)
1740 linkType = value;
1741 break;
1742 case Html_pre:
1743 if (key == "class"_L1 && value.startsWith(s: "language-"_L1))
1744 node->blockFormat.setProperty(propertyId: QTextFormat::BlockCodeLanguage, value: value.mid(position: 9));
1745 break;
1746 default:
1747 break;
1748 }
1749
1750 if (key == "style"_L1) {
1751#ifndef QT_NO_CSSPARSER
1752 node->parseStyleAttribute(value, resourceProvider);
1753#endif
1754 } else if (key == "align"_L1) {
1755 value = std::move(value).toLower();
1756 bool alignmentSet = true;
1757
1758 if (value == "left"_L1)
1759 node->blockFormat.setAlignment(Qt::AlignLeft|Qt::AlignAbsolute);
1760 else if (value == "right"_L1)
1761 node->blockFormat.setAlignment(Qt::AlignRight|Qt::AlignAbsolute);
1762 else if (value == "center"_L1)
1763 node->blockFormat.setAlignment(Qt::AlignHCenter);
1764 else if (value == "justify"_L1)
1765 node->blockFormat.setAlignment(Qt::AlignJustify);
1766 else
1767 alignmentSet = false;
1768
1769 if (node->id == Html_img) {
1770 // HTML4 compat
1771 if (alignmentSet) {
1772 if (node->blockFormat.alignment() & Qt::AlignLeft)
1773 node->cssFloat = QTextFrameFormat::FloatLeft;
1774 else if (node->blockFormat.alignment() & Qt::AlignRight)
1775 node->cssFloat = QTextFrameFormat::FloatRight;
1776 } else if (value == "middle"_L1) {
1777 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1778 } else if (value == "top"_L1) {
1779 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1780 }
1781 }
1782 } else if (key == "valign"_L1) {
1783 value = std::move(value).toLower();
1784 if (value == "top"_L1)
1785 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1786 else if (value == "middle"_L1)
1787 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1788 else if (value == "bottom"_L1)
1789 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom);
1790 } else if (key == "dir"_L1) {
1791 value = std::move(value).toLower();
1792 if (value == "ltr"_L1)
1793 node->blockFormat.setLayoutDirection(Qt::LeftToRight);
1794 else if (value == "rtl"_L1)
1795 node->blockFormat.setLayoutDirection(Qt::RightToLeft);
1796 } else if (key == "title"_L1) {
1797 node->charFormat.setToolTip(value);
1798 } else if (key == "id"_L1) {
1799 node->charFormat.setAnchor(true);
1800 node->charFormat.setAnchorNames({value});
1801 }
1802 }
1803
1804#ifndef QT_NO_CSSPARSER
1805 if (resourceProvider && !linkHref.isEmpty() && linkType == "text/css"_L1)
1806 importStyleSheet(href: linkHref);
1807#endif
1808}
1809
1810#ifndef QT_NO_CSSPARSER
1811class QTextHtmlStyleSelector : public QCss::StyleSelector
1812{
1813public:
1814 inline QTextHtmlStyleSelector(const QTextHtmlParser *parser)
1815 : parser(parser) { nameCaseSensitivity = Qt::CaseInsensitive; }
1816
1817 QStringList nodeNames(NodePtr node) const override;
1818 QString attributeValue(NodePtr node, const QCss::AttributeSelector &aSelector) const override;
1819 bool hasAttributes(NodePtr node) const override;
1820 bool isNullNode(NodePtr node) const override;
1821 NodePtr parentNode(NodePtr node) const override;
1822 NodePtr previousSiblingNode(NodePtr node) const override;
1823 NodePtr duplicateNode(NodePtr node) const override;
1824 void freeNode(NodePtr node) const override;
1825
1826private:
1827 const QTextHtmlParser *parser;
1828};
1829
1830QStringList QTextHtmlStyleSelector::nodeNames(NodePtr node) const
1831{
1832 return QStringList(parser->at(i: node.id).tag.toLower());
1833}
1834
1835#endif // QT_NO_CSSPARSER
1836
1837#ifndef QT_NO_CSSPARSER
1838
1839static inline int findAttribute(const QStringList &attributes, const QString &name)
1840{
1841 int idx = -1;
1842 do {
1843 idx = attributes.indexOf(str: name, from: idx + 1);
1844 } while (idx != -1 && (idx % 2 == 1));
1845 return idx;
1846}
1847
1848QString QTextHtmlStyleSelector::attributeValue(NodePtr node, const QCss::AttributeSelector &aSelector) const
1849{
1850 const QStringList &attributes = parser->at(i: node.id).attributes;
1851 const int idx = findAttribute(attributes, name: aSelector.name);
1852 if (idx == -1)
1853 return QString();
1854 return attributes.at(i: idx + 1);
1855}
1856
1857bool QTextHtmlStyleSelector::hasAttributes(NodePtr node) const
1858{
1859 const QStringList &attributes = parser->at(i: node.id).attributes;
1860 return !attributes.isEmpty();
1861}
1862
1863bool QTextHtmlStyleSelector::isNullNode(NodePtr node) const
1864{
1865 return node.id == 0;
1866}
1867
1868QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::parentNode(NodePtr node) const
1869{
1870 NodePtr parent;
1871 parent.id = 0;
1872 if (node.id) {
1873 parent.id = parser->at(i: node.id).parent;
1874 }
1875 return parent;
1876}
1877
1878QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::duplicateNode(NodePtr node) const
1879{
1880 return node;
1881}
1882
1883QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::previousSiblingNode(NodePtr node) const
1884{
1885 NodePtr sibling;
1886 sibling.id = 0;
1887 if (!node.id)
1888 return sibling;
1889 int parent = parser->at(i: node.id).parent;
1890 if (!parent)
1891 return sibling;
1892 const int childIdx = parser->at(i: parent).children.indexOf(t: node.id);
1893 if (childIdx <= 0)
1894 return sibling;
1895 sibling.id = parser->at(i: parent).children.at(i: childIdx - 1);
1896 return sibling;
1897}
1898
1899void QTextHtmlStyleSelector::freeNode(NodePtr) const
1900{
1901}
1902
1903void QTextHtmlParser::resolveStyleSheetImports(const QCss::StyleSheet &sheet)
1904{
1905 for (int i = 0; i < sheet.importRules.size(); ++i) {
1906 const QCss::ImportRule &rule = sheet.importRules.at(i);
1907 if (rule.media.isEmpty() || rule.media.contains(str: "screen"_L1, cs: Qt::CaseInsensitive))
1908 importStyleSheet(href: rule.href);
1909 }
1910}
1911
1912void QTextHtmlParser::importStyleSheet(const QString &href)
1913{
1914 if (!resourceProvider)
1915 return;
1916 for (int i = 0; i < externalStyleSheets.size(); ++i)
1917 if (externalStyleSheets.at(i).url == href)
1918 return;
1919
1920 QVariant res = resourceProvider->resource(type: QTextDocument::StyleSheetResource, name: href);
1921 QString css;
1922 if (res.userType() == QMetaType::QString) {
1923 css = res.toString();
1924 } else if (res.userType() == QMetaType::QByteArray) {
1925 // #### detect @charset
1926 css = QString::fromUtf8(ba: res.toByteArray());
1927 }
1928 if (!css.isEmpty()) {
1929 QCss::Parser parser(css);
1930 QCss::StyleSheet sheet;
1931 parser.parse(styleSheet: &sheet, nameCaseSensitivity: Qt::CaseInsensitive);
1932 externalStyleSheets.append(t: ExternalStyleSheet(href, sheet));
1933 resolveStyleSheetImports(sheet);
1934 }
1935}
1936
1937QList<QCss::Declaration> standardDeclarationForNode(const QTextHtmlParserNode &node)
1938{
1939 QList<QCss::Declaration> decls;
1940 QCss::Declaration decl;
1941 QCss::Value val;
1942 switch (node.id) {
1943 case Html_a:
1944 case Html_u: {
1945 bool needsUnderline = (node.id == Html_u) ? true : false;
1946 if (node.id == Html_a) {
1947 for (int i = 0; i < node.attributes.size(); i += 2) {
1948 const QString key = node.attributes.at(i);
1949 if (key.compare(other: "href"_L1, cs: Qt::CaseInsensitive) == 0
1950 && !node.attributes.at(i: i + 1).isEmpty()) {
1951 needsUnderline = true;
1952 decl.d->property = "color"_L1;
1953 decl.d->propertyId = QCss::Color;
1954 val.type = QCss::Value::Function;
1955 val.variant = QStringList() << "palette"_L1 << "link"_L1;
1956 decl.d->values = QList<QCss::Value> { val };
1957 decl.d->inheritable = true;
1958 decls << decl;
1959 break;
1960 }
1961 }
1962 }
1963 if (needsUnderline) {
1964 decl = QCss::Declaration();
1965 decl.d->property = "text-decoration"_L1;
1966 decl.d->propertyId = QCss::TextDecoration;
1967 val.type = QCss::Value::KnownIdentifier;
1968 val.variant = QVariant(QCss::Value_Underline);
1969 decl.d->values = QList<QCss::Value> { val };
1970 decl.d->inheritable = true;
1971 decls << decl;
1972 }
1973 break;
1974 }
1975 case Html_b:
1976 case Html_strong:
1977 case Html_h1:
1978 case Html_h2:
1979 case Html_h3:
1980 case Html_h4:
1981 case Html_h5:
1982 case Html_th:
1983 decl = QCss::Declaration();
1984 decl.d->property = "font-weight"_L1;
1985 decl.d->propertyId = QCss::FontWeight;
1986 val.type = QCss::Value::KnownIdentifier;
1987 val.variant = QVariant(QCss::Value_Bold);
1988 decl.d->values = QList<QCss::Value> { val };
1989 decl.d->inheritable = true;
1990 decls << decl;
1991 if (node.id == Html_b || node.id == Html_strong)
1992 break;
1993 Q_FALLTHROUGH();
1994 case Html_big:
1995 case Html_small:
1996 if (node.id != Html_th) {
1997 decl = QCss::Declaration();
1998 decl.d->property = "font-size"_L1;
1999 decl.d->propertyId = QCss::FontSize;
2000 decl.d->inheritable = false;
2001 val.type = QCss::Value::KnownIdentifier;
2002 switch (node.id) {
2003 case Html_h1: val.variant = QVariant(QCss::Value_XXLarge); break;
2004 case Html_h2: val.variant = QVariant(QCss::Value_XLarge); break;
2005 case Html_h3: case Html_big: val.variant = QVariant(QCss::Value_Large); break;
2006 case Html_h4: val.variant = QVariant(QCss::Value_Medium); break;
2007 case Html_h5: case Html_small: val.variant = QVariant(QCss::Value_Small); break;
2008 default: break;
2009 }
2010 decl.d->values = QList<QCss::Value> { val };
2011 decls << decl;
2012 break;
2013 }
2014 Q_FALLTHROUGH();
2015 case Html_center:
2016 case Html_td:
2017 decl = QCss::Declaration();
2018 decl.d->property = "text-align"_L1;
2019 decl.d->propertyId = QCss::TextAlignment;
2020 val.type = QCss::Value::KnownIdentifier;
2021 val.variant = (node.id == Html_td) ? QVariant(QCss::Value_Left) : QVariant(QCss::Value_Center);
2022 decl.d->values = QList<QCss::Value> { val };
2023 decl.d->inheritable = true;
2024 decls << decl;
2025 break;
2026 case Html_s:
2027 decl = QCss::Declaration();
2028 decl.d->property = "text-decoration"_L1;
2029 decl.d->propertyId = QCss::TextDecoration;
2030 val.type = QCss::Value::KnownIdentifier;
2031 val.variant = QVariant(QCss::Value_LineThrough);
2032 decl.d->values = QList<QCss::Value> { val };
2033 decl.d->inheritable = true;
2034 decls << decl;
2035 break;
2036 case Html_em:
2037 case Html_i:
2038 case Html_cite:
2039 case Html_address:
2040 case Html_var:
2041 case Html_dfn:
2042 decl = QCss::Declaration();
2043 decl.d->property = "font-style"_L1;
2044 decl.d->propertyId = QCss::FontStyle;
2045 val.type = QCss::Value::KnownIdentifier;
2046 val.variant = QVariant(QCss::Value_Italic);
2047 decl.d->values = QList<QCss::Value> { val };
2048 decl.d->inheritable = true;
2049 decls << decl;
2050 break;
2051 case Html_sub:
2052 case Html_sup:
2053 decl = QCss::Declaration();
2054 decl.d->property = "vertical-align"_L1;
2055 decl.d->propertyId = QCss::VerticalAlignment;
2056 val.type = QCss::Value::KnownIdentifier;
2057 val.variant = (node.id == Html_sub) ? QVariant(QCss::Value_Sub) : QVariant(QCss::Value_Super);
2058 decl.d->values = QList<QCss::Value> { val };
2059 decl.d->inheritable = true;
2060 decls << decl;
2061 break;
2062 case Html_ul:
2063 case Html_ol:
2064 decl = QCss::Declaration();
2065 decl.d->property = "list-style"_L1;
2066 decl.d->propertyId = QCss::ListStyle;
2067 val.type = QCss::Value::KnownIdentifier;
2068 val.variant = (node.id == Html_ul) ? QVariant(QCss::Value_Disc) : QVariant(QCss::Value_Decimal);
2069 decl.d->values = QList<QCss::Value> { val };
2070 decl.d->inheritable = true;
2071 decls << decl;
2072 break;
2073 case Html_code:
2074 case Html_tt:
2075 case Html_kbd:
2076 case Html_samp:
2077 case Html_pre: {
2078 decl = QCss::Declaration();
2079 decl.d->property = "font-family"_L1;
2080 decl.d->propertyId = QCss::FontFamily;
2081 QList<QCss::Value> values;
2082 val.type = QCss::Value::String;
2083 val.variant = QFontDatabase::systemFont(type: QFontDatabase::FixedFont).families().first();
2084 values << val;
2085 decl.d->values = values;
2086 decl.d->inheritable = true;
2087 decls << decl;
2088 }
2089 if (node.id != Html_pre)
2090 break;
2091 Q_FALLTHROUGH();
2092 case Html_br:
2093 case Html_nobr:
2094 decl = QCss::Declaration();
2095 decl.d->property = "whitespace"_L1;
2096 decl.d->propertyId = QCss::Whitespace;
2097 val.type = QCss::Value::KnownIdentifier;
2098 switch (node.id) {
2099 case Html_br: val.variant = QVariant(QCss::Value_PreWrap); break;
2100 case Html_nobr: val.variant = QVariant(QCss::Value_NoWrap); break;
2101 case Html_pre: val.variant = QVariant(QCss::Value_Pre); break;
2102 default: break;
2103 }
2104 decl.d->values = QList<QCss::Value> { val };
2105 decl.d->inheritable = true;
2106 decls << decl;
2107 break;
2108 default:
2109 break;
2110 }
2111 return decls;
2112}
2113
2114QList<QCss::Declaration> QTextHtmlParser::declarationsForNode(int node) const
2115{
2116 QList<QCss::Declaration> decls;
2117
2118 QTextHtmlStyleSelector selector(this);
2119
2120 int idx = 0;
2121 selector.styleSheets.resize(size: (resourceProvider ? 1 : 0)
2122 + externalStyleSheets.size()
2123 + inlineStyleSheets.size());
2124 if (resourceProvider)
2125 selector.styleSheets[idx++] = QTextDocumentPrivate::get(document: resourceProvider)->parsedDefaultStyleSheet;
2126
2127 for (int i = 0; i < externalStyleSheets.size(); ++i, ++idx)
2128 selector.styleSheets[idx] = externalStyleSheets.at(i).sheet;
2129
2130 for (int i = 0; i < inlineStyleSheets.size(); ++i, ++idx)
2131 selector.styleSheets[idx] = inlineStyleSheets.at(i);
2132
2133 selector.medium = resourceProvider ? resourceProvider->metaInformation(info: QTextDocument::CssMedia) : "screen"_L1;
2134
2135 QCss::StyleSelector::NodePtr n;
2136 n.id = node;
2137
2138 const char *extraPseudo = nullptr;
2139 if (nodes.at(i: node)->id == Html_a && nodes.at(i: node)->hasHref)
2140 extraPseudo = "link";
2141 // Ensure that our own style is taken into consideration
2142 decls = standardDeclarationForNode(node: *nodes.at(i: node));
2143 decls += selector.declarationsForNode(node: n, extraPseudo);
2144 n = selector.parentNode(node: n);
2145 while (!selector.isNullNode(node: n)) {
2146 QList<QCss::Declaration> inheritedDecls;
2147 inheritedDecls = selector.declarationsForNode(node: n, extraPseudo);
2148 for (int i = 0; i < inheritedDecls.size(); ++i) {
2149 const QCss::Declaration &decl = inheritedDecls.at(i);
2150 if (decl.d->inheritable)
2151 decls.prepend(t: decl);
2152 }
2153 n = selector.parentNode(node: n);
2154 }
2155 return decls;
2156}
2157
2158bool QTextHtmlParser::nodeIsChildOf(int i, QTextHTMLElements id) const
2159{
2160 while (i) {
2161 if (at(i).id == id)
2162 return true;
2163 i = at(i).parent;
2164 }
2165 return false;
2166}
2167
2168QT_END_NAMESPACE
2169#endif // QT_NO_CSSPARSER
2170
2171#endif // QT_NO_TEXTHTMLPARSER
2172

source code of qtbase/src/gui/text/qtexthtmlparser.cpp