1//========================================================================
2//
3// This file comes from pdftohtml project
4// http://pdftohtml.sourceforge.net
5//
6// Copyright from:
7// Gueorgui Ovtcharov
8// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/>
9// Mikhail Kruk <meshko@cs.brandeis.edu>
10//
11//========================================================================
12
13//========================================================================
14//
15// Modified under the Poppler project - http://poppler.freedesktop.org
16//
17// All changes made under the Poppler project to this file are licensed
18// under GPL version 2 or later
19//
20// Copyright (C) 2007, 2010, 2012, 2018, 2020 Albert Astals Cid <aacid@kde.org>
21// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
22// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
23// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
24// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
25// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
26// Copyright (C) 2012 Igor Slepchin <igor.slepchin@gmail.com>
27// Copyright (C) 2012 Luis Parravicini <lparravi@gmail.com>
28// Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
29// Copyright (C) 2017 Jason Crain <jason@inspiresomeone.us>
30// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
31// Copyright (C) 2018 Steven Boswell <ulatekh@yahoo.com>
32// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
33// Copyright (C) 2019, 2022 Oliver Sander <oliver.sander@tu-dresden.de>
34// Copyright (C) 2020 Eddie Kohler <ekohler@gmail.com>
35// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
36//
37// To see a description of the changes please see the Changelog file that
38// came with your tarball or type make ChangeLog if you are building from git
39//
40//========================================================================
41
42#include "HtmlFonts.h"
43#include "HtmlUtils.h"
44#include "GlobalParams.h"
45#include "UnicodeMap.h"
46#include "GfxFont.h"
47#include <cstdio>
48
49namespace {
50
51const char *const defaultFamilyName = "Times";
52
53const char *const styleSuffixes[] = {
54 "-Regular", "-Bold", "-BoldOblique", "-BoldItalic", "-Oblique", "-Italic", "-Roman",
55};
56
57void removeStyleSuffix(std::string &familyName)
58{
59 for (const char *const styleSuffix : styleSuffixes) {
60 auto pos = familyName.rfind(s: styleSuffix);
61 if (pos != std::string::npos) {
62 familyName.resize(n: pos);
63 return;
64 }
65 }
66}
67
68}
69
70#define xoutRound(x) ((int)(x + 0.5))
71extern bool xml;
72extern bool fontFullName;
73
74HtmlFontColor::HtmlFontColor(GfxRGB rgb, double opacity_)
75{
76 r = static_cast<int>(rgb.r / 65535.0 * 255.0);
77 g = static_cast<int>(rgb.g / 65535.0 * 255.0);
78 b = static_cast<int>(rgb.b / 65535.0 * 255.0);
79 opacity = static_cast<int>(opacity_ * 255.999);
80 if (!(Ok(xcol: r) && Ok(xcol: b) && Ok(xcol: g) && Ok(xcol: opacity))) {
81 if (!globalParams->getErrQuiet()) {
82 fprintf(stderr, format: "Error : Bad color (%d,%d,%d,%d) reset to (0,0,0,255)\n", r, g, b, opacity);
83 }
84 r = 0;
85 g = 0;
86 b = 0;
87 opacity = 255;
88 }
89}
90
91GooString *HtmlFontColor::convtoX(unsigned int xcol) const
92{
93 GooString *xret = new GooString();
94 char tmp;
95 unsigned int k;
96 k = (xcol / 16);
97 if (k < 10) {
98 tmp = (char)('0' + k);
99 } else {
100 tmp = (char)('a' + k - 10);
101 }
102 xret->append(c: tmp);
103 k = (xcol % 16);
104 if (k < 10) {
105 tmp = (char)('0' + k);
106 } else {
107 tmp = (char)('a' + k - 10);
108 }
109 xret->append(c: tmp);
110 return xret;
111}
112
113GooString *HtmlFontColor::toString() const
114{
115 GooString *tmp = new GooString("#");
116 GooString *tmpr = convtoX(xcol: r);
117 GooString *tmpg = convtoX(xcol: g);
118 GooString *tmpb = convtoX(xcol: b);
119 tmp->append(str: tmpr);
120 tmp->append(str: tmpg);
121 tmp->append(str: tmpb);
122 delete tmpr;
123 delete tmpg;
124 delete tmpb;
125 return tmp;
126}
127
128HtmlFont::HtmlFont(const GfxFont &font, int _size, GfxRGB rgb, double opacity)
129{
130 color = HtmlFontColor(rgb, opacity);
131
132 lineSize = -1;
133
134 size = _size;
135 italic = false;
136 bold = false;
137 rotOrSkewed = false;
138
139 if (font.isBold() || font.getWeight() >= GfxFont::W700) {
140 bold = true;
141 }
142 if (font.isItalic()) {
143 italic = true;
144 }
145
146 if (const std::optional<std::string> &fontname = font.getName()) {
147 FontName = new GooString(*fontname);
148
149 GooString fontnameLower(*fontname);
150 fontnameLower.lowerCase();
151
152 if (!bold && strstr(haystack: fontnameLower.c_str(), needle: "bold")) {
153 bold = true;
154 }
155
156 if (!italic && (strstr(haystack: fontnameLower.c_str(), needle: "italic") || strstr(haystack: fontnameLower.c_str(), needle: "oblique"))) {
157 italic = true;
158 }
159
160 familyName = fontname->c_str();
161 removeStyleSuffix(familyName);
162 } else {
163 FontName = new GooString(defaultFamilyName);
164 familyName = defaultFamilyName;
165 }
166
167 rotSkewMat[0] = rotSkewMat[1] = rotSkewMat[2] = rotSkewMat[3] = 0;
168}
169
170HtmlFont::HtmlFont(const HtmlFont &x)
171{
172 size = x.size;
173 lineSize = x.lineSize;
174 italic = x.italic;
175 bold = x.bold;
176 familyName = x.familyName;
177 color = x.color;
178 FontName = new GooString(x.FontName);
179 rotOrSkewed = x.rotOrSkewed;
180 memcpy(dest: rotSkewMat, src: x.rotSkewMat, n: sizeof(rotSkewMat));
181}
182
183HtmlFont::~HtmlFont()
184{
185 delete FontName;
186}
187
188HtmlFont &HtmlFont::operator=(const HtmlFont &x)
189{
190 if (this == &x) {
191 return *this;
192 }
193 size = x.size;
194 lineSize = x.lineSize;
195 italic = x.italic;
196 bold = x.bold;
197 familyName = x.familyName;
198 color = x.color;
199 delete FontName;
200 FontName = new GooString(x.FontName);
201 return *this;
202}
203
204/*
205 This function is used to compare font uniquely for insertion into
206 the list of all encountered fonts
207*/
208bool HtmlFont::isEqual(const HtmlFont &x) const
209{
210 return (size == x.size) && (lineSize == x.lineSize) && (FontName->cmp(str: x.FontName) == 0) && (bold == x.bold) && (italic == x.italic) && (color.isEqual(col: x.getColor())) && isRotOrSkewed() == x.isRotOrSkewed()
211 && (!isRotOrSkewed() || rot_matrices_equal(mat0: getRotMat(), mat1: x.getRotMat()));
212}
213
214/*
215 This one is used to decide whether two pieces of text can be joined together
216 and therefore we don't care about bold/italics properties
217*/
218bool HtmlFont::isEqualIgnoreBold(const HtmlFont &x) const
219{
220 return ((size == x.size) && (familyName == x.familyName) && (color.isEqual(col: x.getColor())));
221}
222
223GooString *HtmlFont::getFontName()
224{
225 return new GooString(familyName);
226}
227
228GooString *HtmlFont::getFullName()
229{
230 return new GooString(FontName);
231}
232
233// this method if plain wrong todo
234std::unique_ptr<GooString> HtmlFont::HtmlFilter(const Unicode *u, int uLen)
235{
236 auto tmp = std::make_unique<GooString>();
237 const UnicodeMap *uMap;
238 char buf[8];
239 int n;
240
241 // get the output encoding
242 if (!(uMap = globalParams->getTextEncoding())) {
243 return tmp;
244 }
245
246 for (int i = 0; i < uLen; ++i) {
247 // skip control characters. W3C disallows them and they cause a warning
248 // with PHP.
249 if (u[i] <= 31 && u[i] != '\t') {
250 continue;
251 }
252
253 switch (u[i]) {
254 case '"':
255 tmp->append(str: "&#34;");
256 break;
257 case '&':
258 tmp->append(str: "&amp;");
259 break;
260 case '<':
261 tmp->append(str: "&lt;");
262 break;
263 case '>':
264 tmp->append(str: "&gt;");
265 break;
266 case ' ':
267 case '\t':
268 tmp->append(str: !xml && (i + 1 >= uLen || !tmp->getLength() || tmp->getChar(i: tmp->getLength() - 1) == ' ') ? "&#160;" : " ");
269 break;
270 default: {
271 // convert unicode to string
272 if ((n = uMap->mapUnicode(u: u[i], buf, bufSize: sizeof(buf))) > 0) {
273 tmp->append(str: buf, lengthA: n);
274 }
275 }
276 }
277 }
278
279 return tmp;
280}
281
282HtmlFontAccu::HtmlFontAccu() { }
283
284HtmlFontAccu::~HtmlFontAccu() { }
285
286int HtmlFontAccu::AddFont(const HtmlFont &font)
287{
288 std::vector<HtmlFont>::iterator i;
289 for (i = accu.begin(); i != accu.end(); ++i) {
290 if (font.isEqual(x: *i)) {
291 return (int)(i - (accu.begin()));
292 }
293 }
294
295 accu.push_back(x: font);
296 return (accu.size() - 1);
297}
298
299// get CSS font definition for font #i
300GooString *HtmlFontAccu::CSStyle(int i, int j)
301{
302 GooString *tmp = new GooString();
303
304 std::vector<HtmlFont>::iterator g = accu.begin();
305 g += i;
306 HtmlFont font = *g;
307 GooString *colorStr = font.getColor().toString();
308 GooString *fontName = (fontFullName ? font.getFullName() : font.getFontName());
309
310 if (!xml) {
311 tmp->append(str: ".ft");
312 tmp->append(str: std::to_string(val: j));
313 tmp->append(str: std::to_string(val: i));
314 tmp->append(str: "{font-size:");
315 tmp->append(str: std::to_string(val: font.getSize()));
316 if (font.getLineSize() != -1 && font.getLineSize() != 0) {
317 tmp->append(str: "px;line-height:");
318 tmp->append(str: std::to_string(val: font.getLineSize()));
319 }
320 tmp->append(str: "px;font-family:");
321 tmp->append(str: fontName); // font.getFontName());
322 tmp->append(str: ";color:");
323 tmp->append(str: colorStr);
324 if (font.getColor().getOpacity() != 1.0) {
325 tmp->append(str: ";opacity:");
326 tmp->append(str: std::to_string(val: font.getColor().getOpacity()));
327 }
328 // if there is rotation or skew, include the matrix
329 if (font.isRotOrSkewed()) {
330 const double *const text_mat = font.getRotMat();
331 GooString matrix_str(" matrix(");
332 matrix_str.appendf(fmt: "{0:10.10g}, {1:10.10g}, {2:10.10g}, {3:10.10g}, 0, 0)", text_mat[0], text_mat[1], text_mat[2], text_mat[3]);
333 tmp->append(str: ";-moz-transform:");
334 tmp->append(str: &matrix_str);
335 tmp->append(str: ";-webkit-transform:");
336 tmp->append(str: &matrix_str);
337 tmp->append(str: ";-o-transform:");
338 tmp->append(str: &matrix_str);
339 tmp->append(str: ";-ms-transform:");
340 tmp->append(str: &matrix_str);
341 // Todo: 75% is a wild guess that seems to work pretty well;
342 // We probably need to calculate the real percentage
343 // Based on the characteristic baseline and bounding box of current font
344 // PDF origin is at baseline
345 tmp->append(str: ";-moz-transform-origin: left 75%");
346 tmp->append(str: ";-webkit-transform-origin: left 75%");
347 tmp->append(str: ";-o-transform-origin: left 75%");
348 tmp->append(str: ";-ms-transform-origin: left 75%");
349 }
350 tmp->append(str: ";}");
351 }
352 if (xml) {
353 tmp->append(str: "<fontspec id=\"");
354 tmp->append(str: std::to_string(val: i));
355 tmp->append(str: "\" size=\"");
356 tmp->append(str: std::to_string(val: font.getSize()));
357 tmp->append(str: "\" family=\"");
358 tmp->append(str: fontName);
359 tmp->append(str: "\" color=\"");
360 tmp->append(str: colorStr);
361 if (font.getColor().getOpacity() != 1.0) {
362 tmp->append(str: "\" opacity=\"");
363 tmp->append(str: std::to_string(val: font.getColor().getOpacity()));
364 }
365 tmp->append(str: "\"/>");
366 }
367
368 delete fontName;
369 delete colorStr;
370 return tmp;
371}
372

source code of poppler/utils/HtmlFonts.cc