1 | //======================================================================== |
2 | // |
3 | // This file comes from pdftohtml project |
4 | // http://pdftohtml.sourceforge.net |
5 | // |
6 | // Copyright from: |
7 | // Gueorgui Ovtcharov |
8 | // Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/> |
9 | // Mikhail Kruk <meshko@cs.brandeis.edu> |
10 | // |
11 | //======================================================================== |
12 | |
13 | //======================================================================== |
14 | // |
15 | // Modified under the Poppler project - http://poppler.freedesktop.org |
16 | // |
17 | // All changes made under the Poppler project to this file are licensed |
18 | // under GPL version 2 or later |
19 | // |
20 | // Copyright (C) 2007, 2010, 2012, 2018, 2020 Albert Astals Cid <aacid@kde.org> |
21 | // Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru> |
22 | // Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com> |
23 | // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) |
24 | // Copyright (C) 2011 Joshua Richardson <jric@chegg.com> |
25 | // Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> |
26 | // Copyright (C) 2012 Igor Slepchin <igor.slepchin@gmail.com> |
27 | // Copyright (C) 2012 Luis Parravicini <lparravi@gmail.com> |
28 | // Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr> |
29 | // Copyright (C) 2017 Jason Crain <jason@inspiresomeone.us> |
30 | // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
31 | // Copyright (C) 2018 Steven Boswell <ulatekh@yahoo.com> |
32 | // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> |
33 | // Copyright (C) 2019, 2022 Oliver Sander <oliver.sander@tu-dresden.de> |
34 | // Copyright (C) 2020 Eddie Kohler <ekohler@gmail.com> |
35 | // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
36 | // |
37 | // To see a description of the changes please see the Changelog file that |
38 | // came with your tarball or type make ChangeLog if you are building from git |
39 | // |
40 | //======================================================================== |
41 | |
42 | #include "HtmlFonts.h" |
43 | #include "HtmlUtils.h" |
44 | #include "GlobalParams.h" |
45 | #include "UnicodeMap.h" |
46 | #include "GfxFont.h" |
47 | #include <cstdio> |
48 | |
49 | namespace { |
50 | |
51 | const char *const defaultFamilyName = "Times" ; |
52 | |
53 | const char *const styleSuffixes[] = { |
54 | "-Regular" , "-Bold" , "-BoldOblique" , "-BoldItalic" , "-Oblique" , "-Italic" , "-Roman" , |
55 | }; |
56 | |
57 | void removeStyleSuffix(std::string &familyName) |
58 | { |
59 | for (const char *const styleSuffix : styleSuffixes) { |
60 | auto pos = familyName.rfind(s: styleSuffix); |
61 | if (pos != std::string::npos) { |
62 | familyName.resize(n: pos); |
63 | return; |
64 | } |
65 | } |
66 | } |
67 | |
68 | } |
69 | |
70 | #define xoutRound(x) ((int)(x + 0.5)) |
71 | extern bool xml; |
72 | extern bool fontFullName; |
73 | |
74 | HtmlFontColor::HtmlFontColor(GfxRGB rgb, double opacity_) |
75 | { |
76 | r = static_cast<int>(rgb.r / 65535.0 * 255.0); |
77 | g = static_cast<int>(rgb.g / 65535.0 * 255.0); |
78 | b = static_cast<int>(rgb.b / 65535.0 * 255.0); |
79 | opacity = static_cast<int>(opacity_ * 255.999); |
80 | if (!(Ok(xcol: r) && Ok(xcol: b) && Ok(xcol: g) && Ok(xcol: opacity))) { |
81 | if (!globalParams->getErrQuiet()) { |
82 | fprintf(stderr, format: "Error : Bad color (%d,%d,%d,%d) reset to (0,0,0,255)\n" , r, g, b, opacity); |
83 | } |
84 | r = 0; |
85 | g = 0; |
86 | b = 0; |
87 | opacity = 255; |
88 | } |
89 | } |
90 | |
91 | GooString *HtmlFontColor::convtoX(unsigned int xcol) const |
92 | { |
93 | GooString *xret = new GooString(); |
94 | char tmp; |
95 | unsigned int k; |
96 | k = (xcol / 16); |
97 | if (k < 10) { |
98 | tmp = (char)('0' + k); |
99 | } else { |
100 | tmp = (char)('a' + k - 10); |
101 | } |
102 | xret->append(c: tmp); |
103 | k = (xcol % 16); |
104 | if (k < 10) { |
105 | tmp = (char)('0' + k); |
106 | } else { |
107 | tmp = (char)('a' + k - 10); |
108 | } |
109 | xret->append(c: tmp); |
110 | return xret; |
111 | } |
112 | |
113 | GooString *HtmlFontColor::toString() const |
114 | { |
115 | GooString *tmp = new GooString("#" ); |
116 | GooString *tmpr = convtoX(xcol: r); |
117 | GooString *tmpg = convtoX(xcol: g); |
118 | GooString *tmpb = convtoX(xcol: b); |
119 | tmp->append(str: tmpr); |
120 | tmp->append(str: tmpg); |
121 | tmp->append(str: tmpb); |
122 | delete tmpr; |
123 | delete tmpg; |
124 | delete tmpb; |
125 | return tmp; |
126 | } |
127 | |
128 | HtmlFont::HtmlFont(const GfxFont &font, int _size, GfxRGB rgb, double opacity) |
129 | { |
130 | color = HtmlFontColor(rgb, opacity); |
131 | |
132 | lineSize = -1; |
133 | |
134 | size = _size; |
135 | italic = false; |
136 | bold = false; |
137 | rotOrSkewed = false; |
138 | |
139 | if (font.isBold() || font.getWeight() >= GfxFont::W700) { |
140 | bold = true; |
141 | } |
142 | if (font.isItalic()) { |
143 | italic = true; |
144 | } |
145 | |
146 | if (const std::optional<std::string> &fontname = font.getName()) { |
147 | FontName = new GooString(*fontname); |
148 | |
149 | GooString fontnameLower(*fontname); |
150 | fontnameLower.lowerCase(); |
151 | |
152 | if (!bold && strstr(haystack: fontnameLower.c_str(), needle: "bold" )) { |
153 | bold = true; |
154 | } |
155 | |
156 | if (!italic && (strstr(haystack: fontnameLower.c_str(), needle: "italic" ) || strstr(haystack: fontnameLower.c_str(), needle: "oblique" ))) { |
157 | italic = true; |
158 | } |
159 | |
160 | familyName = fontname->c_str(); |
161 | removeStyleSuffix(familyName); |
162 | } else { |
163 | FontName = new GooString(defaultFamilyName); |
164 | familyName = defaultFamilyName; |
165 | } |
166 | |
167 | rotSkewMat[0] = rotSkewMat[1] = rotSkewMat[2] = rotSkewMat[3] = 0; |
168 | } |
169 | |
170 | HtmlFont::HtmlFont(const HtmlFont &x) |
171 | { |
172 | size = x.size; |
173 | lineSize = x.lineSize; |
174 | italic = x.italic; |
175 | bold = x.bold; |
176 | familyName = x.familyName; |
177 | color = x.color; |
178 | FontName = new GooString(x.FontName); |
179 | rotOrSkewed = x.rotOrSkewed; |
180 | memcpy(dest: rotSkewMat, src: x.rotSkewMat, n: sizeof(rotSkewMat)); |
181 | } |
182 | |
183 | HtmlFont::~HtmlFont() |
184 | { |
185 | delete FontName; |
186 | } |
187 | |
188 | HtmlFont &HtmlFont::operator=(const HtmlFont &x) |
189 | { |
190 | if (this == &x) { |
191 | return *this; |
192 | } |
193 | size = x.size; |
194 | lineSize = x.lineSize; |
195 | italic = x.italic; |
196 | bold = x.bold; |
197 | familyName = x.familyName; |
198 | color = x.color; |
199 | delete FontName; |
200 | FontName = new GooString(x.FontName); |
201 | return *this; |
202 | } |
203 | |
204 | /* |
205 | This function is used to compare font uniquely for insertion into |
206 | the list of all encountered fonts |
207 | */ |
208 | bool HtmlFont::isEqual(const HtmlFont &x) const |
209 | { |
210 | return (size == x.size) && (lineSize == x.lineSize) && (FontName->cmp(str: x.FontName) == 0) && (bold == x.bold) && (italic == x.italic) && (color.isEqual(col: x.getColor())) && isRotOrSkewed() == x.isRotOrSkewed() |
211 | && (!isRotOrSkewed() || rot_matrices_equal(mat0: getRotMat(), mat1: x.getRotMat())); |
212 | } |
213 | |
214 | /* |
215 | This one is used to decide whether two pieces of text can be joined together |
216 | and therefore we don't care about bold/italics properties |
217 | */ |
218 | bool HtmlFont::isEqualIgnoreBold(const HtmlFont &x) const |
219 | { |
220 | return ((size == x.size) && (familyName == x.familyName) && (color.isEqual(col: x.getColor()))); |
221 | } |
222 | |
223 | GooString *HtmlFont::getFontName() |
224 | { |
225 | return new GooString(familyName); |
226 | } |
227 | |
228 | GooString *HtmlFont::getFullName() |
229 | { |
230 | return new GooString(FontName); |
231 | } |
232 | |
233 | // this method if plain wrong todo |
234 | std::unique_ptr<GooString> HtmlFont::HtmlFilter(const Unicode *u, int uLen) |
235 | { |
236 | auto tmp = std::make_unique<GooString>(); |
237 | const UnicodeMap *uMap; |
238 | char buf[8]; |
239 | int n; |
240 | |
241 | // get the output encoding |
242 | if (!(uMap = globalParams->getTextEncoding())) { |
243 | return tmp; |
244 | } |
245 | |
246 | for (int i = 0; i < uLen; ++i) { |
247 | // skip control characters. W3C disallows them and they cause a warning |
248 | // with PHP. |
249 | if (u[i] <= 31 && u[i] != '\t') { |
250 | continue; |
251 | } |
252 | |
253 | switch (u[i]) { |
254 | case '"': |
255 | tmp->append(str: """ ); |
256 | break; |
257 | case '&': |
258 | tmp->append(str: "&" ); |
259 | break; |
260 | case '<': |
261 | tmp->append(str: "<" ); |
262 | break; |
263 | case '>': |
264 | tmp->append(str: ">" ); |
265 | break; |
266 | case ' ': |
267 | case '\t': |
268 | tmp->append(str: !xml && (i + 1 >= uLen || !tmp->getLength() || tmp->getChar(i: tmp->getLength() - 1) == ' ') ? " " : " " ); |
269 | break; |
270 | default: { |
271 | // convert unicode to string |
272 | if ((n = uMap->mapUnicode(u: u[i], buf, bufSize: sizeof(buf))) > 0) { |
273 | tmp->append(str: buf, lengthA: n); |
274 | } |
275 | } |
276 | } |
277 | } |
278 | |
279 | return tmp; |
280 | } |
281 | |
282 | HtmlFontAccu::HtmlFontAccu() { } |
283 | |
284 | HtmlFontAccu::~HtmlFontAccu() { } |
285 | |
286 | int HtmlFontAccu::AddFont(const HtmlFont &font) |
287 | { |
288 | std::vector<HtmlFont>::iterator i; |
289 | for (i = accu.begin(); i != accu.end(); ++i) { |
290 | if (font.isEqual(x: *i)) { |
291 | return (int)(i - (accu.begin())); |
292 | } |
293 | } |
294 | |
295 | accu.push_back(x: font); |
296 | return (accu.size() - 1); |
297 | } |
298 | |
299 | // get CSS font definition for font #i |
300 | GooString *HtmlFontAccu::CSStyle(int i, int j) |
301 | { |
302 | GooString *tmp = new GooString(); |
303 | |
304 | std::vector<HtmlFont>::iterator g = accu.begin(); |
305 | g += i; |
306 | HtmlFont font = *g; |
307 | GooString *colorStr = font.getColor().toString(); |
308 | GooString *fontName = (fontFullName ? font.getFullName() : font.getFontName()); |
309 | |
310 | if (!xml) { |
311 | tmp->append(str: ".ft" ); |
312 | tmp->append(str: std::to_string(val: j)); |
313 | tmp->append(str: std::to_string(val: i)); |
314 | tmp->append(str: "{font-size:" ); |
315 | tmp->append(str: std::to_string(val: font.getSize())); |
316 | if (font.getLineSize() != -1 && font.getLineSize() != 0) { |
317 | tmp->append(str: "px;line-height:" ); |
318 | tmp->append(str: std::to_string(val: font.getLineSize())); |
319 | } |
320 | tmp->append(str: "px;font-family:" ); |
321 | tmp->append(str: fontName); // font.getFontName()); |
322 | tmp->append(str: ";color:" ); |
323 | tmp->append(str: colorStr); |
324 | if (font.getColor().getOpacity() != 1.0) { |
325 | tmp->append(str: ";opacity:" ); |
326 | tmp->append(str: std::to_string(val: font.getColor().getOpacity())); |
327 | } |
328 | // if there is rotation or skew, include the matrix |
329 | if (font.isRotOrSkewed()) { |
330 | const double *const text_mat = font.getRotMat(); |
331 | GooString matrix_str(" matrix(" ); |
332 | matrix_str.appendf(fmt: "{0:10.10g}, {1:10.10g}, {2:10.10g}, {3:10.10g}, 0, 0)" , text_mat[0], text_mat[1], text_mat[2], text_mat[3]); |
333 | tmp->append(str: ";-moz-transform:" ); |
334 | tmp->append(str: &matrix_str); |
335 | tmp->append(str: ";-webkit-transform:" ); |
336 | tmp->append(str: &matrix_str); |
337 | tmp->append(str: ";-o-transform:" ); |
338 | tmp->append(str: &matrix_str); |
339 | tmp->append(str: ";-ms-transform:" ); |
340 | tmp->append(str: &matrix_str); |
341 | // Todo: 75% is a wild guess that seems to work pretty well; |
342 | // We probably need to calculate the real percentage |
343 | // Based on the characteristic baseline and bounding box of current font |
344 | // PDF origin is at baseline |
345 | tmp->append(str: ";-moz-transform-origin: left 75%" ); |
346 | tmp->append(str: ";-webkit-transform-origin: left 75%" ); |
347 | tmp->append(str: ";-o-transform-origin: left 75%" ); |
348 | tmp->append(str: ";-ms-transform-origin: left 75%" ); |
349 | } |
350 | tmp->append(str: ";}" ); |
351 | } |
352 | if (xml) { |
353 | tmp->append(str: "<fontspec id=\"" ); |
354 | tmp->append(str: std::to_string(val: i)); |
355 | tmp->append(str: "\" size=\"" ); |
356 | tmp->append(str: std::to_string(val: font.getSize())); |
357 | tmp->append(str: "\" family=\"" ); |
358 | tmp->append(str: fontName); |
359 | tmp->append(str: "\" color=\"" ); |
360 | tmp->append(str: colorStr); |
361 | if (font.getColor().getOpacity() != 1.0) { |
362 | tmp->append(str: "\" opacity=\"" ); |
363 | tmp->append(str: std::to_string(val: font.getColor().getOpacity())); |
364 | } |
365 | tmp->append(str: "\"/>" ); |
366 | } |
367 | |
368 | delete fontName; |
369 | delete colorStr; |
370 | return tmp; |
371 | } |
372 | |