1//========================================================================
2//
3// HtmlOutputDev.cc
4//
5// Copyright 1997-2002 Glyph & Cog, LLC
6//
7// Changed 1999-2000 by G.Ovtcharov
8//
9// Changed 2002 by Mikhail Kruk
10//
11//========================================================================
12
13//========================================================================
14//
15// Modified under the Poppler project - http://poppler.freedesktop.org
16//
17// All changes made under the Poppler project to this file are licensed
18// under GPL version 2 or later
19//
20// Copyright (C) 2005-2013, 2016-2022 Albert Astals Cid <aacid@kde.org>
21// Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org>
22// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
23// Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp>
24// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
25// Copyright (C) 2009 Warren Toomey <wkt@tuhs.org>
26// Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
27// Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com>
28// Copyright (C) 2010, 2012, 2013, 2022 Adrian Johnson <ajohnson@redneon.com>
29// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
30// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
31// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
32// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
33// Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin@gmail.com>
34// Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com>
35// Copyright (C) 2012 Gerald Schmidt <solahcin@gmail.com>
36// Copyright (C) 2012 Pino Toscano <pino@kde.org>
37// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
38// Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr>
39// Copyright (C) 2013 Johannes Brandstätter <jbrandstaetter@gmail.com>
40// Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it>
41// Copyright (C) 2016 Vincent Le Garrec <legarrec.vincent@gmail.com>
42// Copyright (C) 2017 Caolán McNamara <caolanm@redhat.com>
43// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
44// Copyright (C) 2018 Thibaut Brard <thibaut.brard@gmail.com>
45// Copyright (C) 2018-2020 Adam Reichold <adam.reichold@t-online.de>
46// Copyright (C) 2019, 2020, 2022, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
47// Copyright (C) 2020 Eddie Kohler <ekohler@gmail.com>
48// Copyright (C) 2021 Christopher Hasse <hasse.christopher@gmail.com>
49// Copyright (C) 2022 Brian Rosenfield <brosenfi@yahoo.com>
50// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
51//
52// To see a description of the changes please see the Changelog file that
53// came with your tarball or type make ChangeLog if you are building from git
54//
55//========================================================================
56
57#include "config.h"
58#include <cstdio>
59#include <cstdlib>
60#include <cstdarg>
61#include <cstddef>
62#include <cctype>
63#include <cmath>
64#include <iostream>
65#include "goo/GooString.h"
66#include "goo/gbasename.h"
67#include "goo/gbase64.h"
68#include "goo/gbasename.h"
69#include "UnicodeMap.h"
70#include "goo/gmem.h"
71#include "Error.h"
72#include "GfxState.h"
73#include "Page.h"
74#include "Annot.h"
75#include "PNGWriter.h"
76#include "GlobalParams.h"
77#include "HtmlOutputDev.h"
78#include "HtmlFonts.h"
79#include "HtmlUtils.h"
80#include "InMemoryFile.h"
81#include "Outline.h"
82#include "PDFDoc.h"
83
84#define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: "
85
86class HtmlImage
87{
88public:
89 HtmlImage(std::unique_ptr<GooString> &&_fName, GfxState *state) : fName(std::move(_fName))
90 {
91 state->transform(x1: 0, y1: 0, x2: &xMin, y2: &yMax);
92 state->transform(x1: 1, y1: 1, x2: &xMax, y2: &yMin);
93 }
94 ~HtmlImage() = default;
95 HtmlImage(const HtmlImage &) = delete;
96 HtmlImage &operator=(const HtmlImage &) = delete;
97
98 double xMin, xMax; // image x coordinates
99 double yMin, yMax; // image y coordinates
100 std::unique_ptr<GooString> fName; // image file name
101};
102
103// returns true if x is closer to y than x is to z
104static inline bool IS_CLOSER(double x, double y, double z)
105{
106 return std::fabs(x: (x) - (y)) < std::fabs(x: (x) - (z));
107}
108
109extern bool complexMode;
110extern bool singleHtml;
111extern bool dataUrls;
112extern bool ignore;
113extern bool printCommands;
114extern bool printHtml;
115extern bool noframes;
116extern bool stout;
117extern bool xml;
118extern bool noRoundedCoordinates;
119extern bool showHidden;
120extern bool noMerge;
121
122extern double wordBreakThreshold;
123
124static bool debug = false;
125
126#if 0
127static GooString* Dirname(GooString* str){
128
129 char *p=str->c_str();
130 int len=str->getLength();
131 for (int i=len-1;i>=0;i--)
132 if (*(p+i)==SLASH)
133 return new GooString(p,i+1);
134 return new GooString();
135}
136#endif
137
138static std::unique_ptr<GooString> print_matrix(const double *mat)
139{
140 return GooString::format(fmt: "[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]", *mat, mat[1], mat[2], mat[3], mat[4], mat[5]);
141}
142
143static std::unique_ptr<GooString> print_uni_str(const Unicode *u, const unsigned uLen)
144{
145 if (!uLen) {
146 return std::make_unique<GooString>(args: "");
147 }
148 std::unique_ptr<GooString> gstr_buff0 = GooString::format(fmt: "{0:c}", (*u < 0x7F ? *u & 0xFF : '?'));
149 for (unsigned i = 1; i < uLen; i++) {
150 if (u[i] < 0x7F) {
151 gstr_buff0->append(c: static_cast<char>(u[i]) & 0xFF);
152 }
153 }
154
155 return gstr_buff0;
156}
157
158//------------------------------------------------------------------------
159// HtmlString
160//------------------------------------------------------------------------
161
162HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu *_fonts) : fonts(_fonts)
163{
164 double x, y;
165
166 state->transform(x1: state->getCurX(), y1: state->getCurY(), x2: &x, y2: &y);
167 if (std::shared_ptr<const GfxFont> font = state->getFont()) {
168 double ascent = font->getAscent();
169 double descent = font->getDescent();
170 if (ascent > 1.05) {
171 // printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent );
172 ascent = 1.05;
173 }
174 if (descent < -0.4) {
175 // printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent );
176 descent = -0.4;
177 }
178 yMin = y - ascent * fontSize;
179 yMax = y - descent * fontSize;
180 GfxRGB rgb;
181 state->getFillRGB(rgb: &rgb);
182 HtmlFont hfont = HtmlFont(*font, std::lround(x: fontSize), rgb, state->getFillOpacity());
183 if (isMatRotOrSkew(mat: state->getTextMat())) {
184 double normalizedMatrix[4];
185 memcpy(dest: normalizedMatrix, src: state->getTextMat(), n: sizeof(normalizedMatrix));
186 // browser rotates the opposite way
187 // so flip the sign of the angle -> sin() components change sign
188 if (debug) {
189 std::cerr << DEBUG << "before transform: " << print_matrix(mat: normalizedMatrix)->c_str() << std::endl;
190 }
191 normalizedMatrix[1] *= -1;
192 normalizedMatrix[2] *= -1;
193 if (debug) {
194 std::cerr << DEBUG << "after reflecting angle: " << print_matrix(mat: normalizedMatrix)->c_str() << std::endl;
195 }
196 normalizeRotMat(mat: normalizedMatrix);
197 if (debug) {
198 std::cerr << DEBUG << "after norm: " << print_matrix(mat: normalizedMatrix)->c_str() << std::endl;
199 }
200 hfont.setRotMat(normalizedMatrix);
201 }
202 fontpos = fonts->AddFont(font: hfont);
203 } else {
204 // this means that the PDF file draws text without a current font,
205 // which should never happen
206 yMin = y - 0.95 * fontSize;
207 yMax = y + 0.35 * fontSize;
208 fontpos = 0;
209 }
210 if (yMin == yMax) {
211 // this is a sanity check for a case that shouldn't happen -- but
212 // if it does happen, we want to avoid dividing by zero later
213 yMin = y;
214 yMax = y + 1;
215 }
216 col = 0;
217 text = nullptr;
218 xRight = nullptr;
219 link = nullptr;
220 len = size = 0;
221 yxNext = nullptr;
222 xyNext = nullptr;
223 htext = std::make_unique<GooString>();
224 dir = textDirUnknown;
225}
226
227HtmlString::~HtmlString()
228{
229 gfree(p: text);
230 gfree(p: xRight);
231}
232
233void HtmlString::addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u)
234{
235 if (dir == textDirUnknown) {
236 // dir = UnicodeMap::getDirection(u);
237 dir = textDirLeftRight;
238 }
239
240 if (len == size) {
241 size += 16;
242 text = (Unicode *)grealloc(p: text, size: size * sizeof(Unicode));
243 xRight = (double *)grealloc(p: xRight, size: size * sizeof(double));
244 }
245 text[len] = u;
246 if (len == 0) {
247 xMin = x;
248 }
249 xMax = xRight[len] = x + dx;
250 // printf("added char: %f %f xright = %f\n", x, dx, x+dx);
251 ++len;
252}
253
254void HtmlString::endString()
255{
256 if (dir == textDirRightLeft && len > 1) {
257 // printf("will reverse!\n");
258 for (int i = 0; i < len / 2; i++) {
259 Unicode ch = text[i];
260 text[i] = text[len - i - 1];
261 text[len - i - 1] = ch;
262 }
263 }
264}
265
266//------------------------------------------------------------------------
267// HtmlPage
268//------------------------------------------------------------------------
269
270HtmlPage::HtmlPage(bool rawOrderA)
271{
272 rawOrder = rawOrderA;
273 curStr = nullptr;
274 yxStrings = nullptr;
275 xyStrings = nullptr;
276 yxCur1 = yxCur2 = nullptr;
277 fonts = new HtmlFontAccu();
278 links = new HtmlLinks();
279 pageWidth = 0;
280 pageHeight = 0;
281 fontsPageMarker = 0;
282 DocName = nullptr;
283 firstPage = -1;
284}
285
286HtmlPage::~HtmlPage()
287{
288 clear();
289 delete DocName;
290 delete fonts;
291 delete links;
292 for (auto entry : imgList) {
293 delete entry;
294 }
295}
296
297void HtmlPage::updateFont(GfxState *state)
298{
299 const char *name;
300 int code;
301 double dimLength;
302
303 // adjust the font size
304 fontSize = state->getTransformedFontSize();
305 const GfxFont *const font = state->getFont().get();
306 if (font && font->getType() == fontType3) {
307 // Grab the font size from the font bounding box if possible - remember to
308 // scale from the glyph coordinate system.
309 const double *fontBBox = font->getFontBBox();
310 const double *fontMat = font->getFontMatrix();
311 dimLength = (fontBBox[3] - fontBBox[1]) * fontMat[3];
312 if (dimLength > 0) {
313 fontSize *= dimLength;
314 } else {
315 // This is a hack which makes it possible to deal with some Type 3
316 // fonts. The problem is that it's impossible to know what the
317 // base coordinate system used in the font is without actually
318 // rendering the font. This code tries to guess by looking at the
319 // width of the character 'm' (which breaks if the font is a
320 // subset that doesn't contain 'm').
321 for (code = 0; code < 256; ++code) {
322 if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') {
323 break;
324 }
325 }
326 if (code < 256) {
327 dimLength = ((Gfx8BitFont *)font)->getWidth(c: code);
328 if (dimLength != 0) {
329 // 600 is a generic average 'm' width -- yes, this is a hack
330 fontSize *= dimLength / 0.6;
331 }
332 }
333 if (fontMat[0] != 0) {
334 fontSize *= fabs(x: fontMat[3] / fontMat[0]);
335 }
336 }
337 }
338}
339
340void HtmlPage::beginString(GfxState *state, const GooString *s)
341{
342 curStr = new HtmlString(state, fontSize, fonts);
343}
344
345void HtmlPage::conv()
346{
347 for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) {
348 tmp->htext = HtmlFont::HtmlFilter(u: tmp->text, uLen: tmp->len);
349
350 size_t linkIndex = 0;
351 if (links->inLink(xmin: tmp->xMin, ymin: tmp->yMin, xmax: tmp->xMax, ymax: tmp->yMax, p&: linkIndex)) {
352 tmp->link = links->getLink(i: linkIndex);
353 }
354 }
355}
356
357void HtmlPage::addChar(GfxState *state, double x, double y, double dx, double dy, double ox, double oy, const Unicode *u, int uLen)
358{
359 double x1, y1, w1, h1, dx2, dy2;
360 int n, i;
361 state->transform(x1: x, y1: y, x2: &x1, y2: &y1);
362 n = curStr->len;
363
364 // check that new character is in the same direction as current string
365 // and is not too far away from it before adding
366 // if ((UnicodeMap::getDirection(u[0]) != curStr->dir) ||
367 // XXX
368 if (debug) {
369 const double *text_mat = state->getTextMat();
370 // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
371 // sin q is zero iff there is no rotation, or 180 deg. rotation;
372 // for 180 rotation, cos q will be negative
373 if (text_mat[0] < 0 || !is_within(a: text_mat[1], thresh: .1, b: 0)) {
374 std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen)->c_str() << '"' << std::endl;
375 std::cerr << "text " << print_matrix(mat: state->getTextMat())->c_str();
376 }
377 }
378 if (n > 0 && // don't start a new string, unless there is already a string
379 // TODO: the following line assumes that text is flowing left to
380 // right, which will not necessarily be the case, e.g. if rotated;
381 // It assesses whether or not two characters are close enough to
382 // be part of the same string
383 fabs(x: x1 - curStr->xRight[n - 1]) > wordBreakThreshold * (curStr->yMax - curStr->yMin) &&
384 // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
385 // sin q is zero iff there is no rotation, or 180 deg. rotation;
386 // for 180 rotation, cos q will be negative
387 !rot_matrices_equal(mat0: curStr->getFont().getRotMat(), mat1: state->getTextMat())) {
388 endString();
389 beginString(state, s: nullptr);
390 }
391 state->textTransformDelta(x1: state->getCharSpace() * state->getHorizScaling(), y1: 0, x2: &dx2, y2: &dy2);
392 dx -= dx2;
393 dy -= dy2;
394 state->transformDelta(x1: dx, y1: dy, x2: &w1, y2: &h1);
395 if (uLen != 0) {
396 w1 /= uLen;
397 h1 /= uLen;
398 }
399 for (i = 0; i < uLen; ++i) {
400 curStr->addChar(state, x: x1 + i * w1, y: y1 + i * h1, dx: w1, dy: h1, u: u[i]);
401 }
402}
403
404void HtmlPage::endString()
405{
406 HtmlString *p1, *p2;
407 double h, y1, y2;
408
409 // throw away zero-length strings -- they don't have valid xMin/xMax
410 // values, and they're useless anyway
411 if (curStr->len == 0) {
412 delete curStr;
413 curStr = nullptr;
414 return;
415 }
416
417 curStr->endString();
418
419#if 0 //~tmp
420 if (curStr->yMax - curStr->yMin > 20) {
421 delete curStr;
422 curStr = NULL;
423 return;
424 }
425#endif
426
427 // insert string in y-major list
428 h = curStr->yMax - curStr->yMin;
429 y1 = curStr->yMin + 0.5 * h;
430 y2 = curStr->yMin + 0.8 * h;
431 if (rawOrder) {
432 p1 = yxCur1;
433 p2 = nullptr;
434 } else if ((!yxCur1 || (y1 >= yxCur1->yMin && (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) && (!yxCur2 || (y1 < yxCur2->yMin || (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
435 p1 = yxCur1;
436 p2 = yxCur2;
437 } else {
438 for (p1 = nullptr, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
439 if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin)) {
440 break;
441 }
442 }
443 yxCur2 = p2;
444 }
445 yxCur1 = curStr;
446 if (p1) {
447 p1->yxNext = curStr;
448 } else {
449 yxStrings = curStr;
450 }
451 curStr->yxNext = p2;
452 curStr = nullptr;
453}
454
455static const char *strrstr(const char *s, const char *ss)
456{
457 const char *p = strstr(haystack: s, needle: ss);
458 for (const char *pp = p; pp != nullptr; pp = strstr(haystack: p + 1, needle: ss)) {
459 p = pp;
460 }
461 return p;
462}
463
464static void CloseTags(GooString *htext, bool &finish_a, bool &finish_italic, bool &finish_bold)
465{
466 const char *last_italic = finish_italic && (finish_bold || finish_a) ? strrstr(s: htext->c_str(), ss: "<i>") : nullptr;
467 const char *last_bold = finish_bold && (finish_italic || finish_a) ? strrstr(s: htext->c_str(), ss: "<b>") : nullptr;
468 const char *last_a = finish_a && (finish_italic || finish_bold) ? strrstr(s: htext->c_str(), ss: "<a ") : nullptr;
469 if (finish_a && (finish_italic || finish_bold) && last_a > (last_italic > last_bold ? last_italic : last_bold)) {
470 htext->append(str: "</a>", lengthA: 4);
471 finish_a = false;
472 }
473 if (finish_italic && finish_bold && last_italic > last_bold) {
474 htext->append(str: "</i>", lengthA: 4);
475 finish_italic = false;
476 }
477 if (finish_bold) {
478 htext->append(str: "</b>", lengthA: 4);
479 }
480 if (finish_italic) {
481 htext->append(str: "</i>", lengthA: 4);
482 }
483 if (finish_a) {
484 htext->append(str: "</a>");
485 }
486}
487
488// Strings are lines of text;
489// This function aims to combine strings into lines and paragraphs if !noMerge
490// It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect
491void HtmlPage::coalesce()
492{
493 HtmlString *str1, *str2;
494 double space, horSpace, vertSpace, vertOverlap;
495 bool addSpace, addLineBreak;
496 int n, i;
497 double curX, curY;
498
499#if 0 //~ for debugging
500 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
501 printf("x=%f..%f y=%f..%f size=%2d '",
502 str1->xMin, str1->xMax, str1->yMin, str1->yMax,
503 (int)(str1->yMax - str1->yMin));
504 for (i = 0; i < str1->len; ++i) {
505 fputc(str1->text[i] & 0xff, stdout);
506 }
507 printf("'\n");
508 }
509 printf("\n------------------------------------------------------------\n\n");
510#endif
511 str1 = yxStrings;
512
513 if (!str1) {
514 return;
515 }
516
517 //----- discard duplicated text (fake boldface, drop shadows)
518 if (!complexMode) { /* if not in complex mode get rid of duplicate strings */
519 HtmlString *str3;
520 bool found;
521 while (str1) {
522 double size = str1->yMax - str1->yMin;
523 double xLimit = str1->xMin + size;
524 found = false;
525 for (str2 = str1, str3 = str1->yxNext; str3 && str3->xMin < xLimit; str2 = str3, str3 = str2->yxNext) {
526 if (str3->len == str1->len && !memcmp(s1: str3->text, s2: str1->text, n: str1->len * sizeof(Unicode)) && fabs(x: str3->yMin - str1->yMin) < size * 0.2 && fabs(x: str3->yMax - str1->yMax) < size * 0.2
527 && fabs(x: str3->xMax - str1->xMax) < size * 0.1) {
528 found = true;
529 // printf("found duplicate!\n");
530 break;
531 }
532 }
533 if (found) {
534 str2->xyNext = str3->xyNext;
535 str2->yxNext = str3->yxNext;
536 delete str3;
537 } else {
538 str1 = str1->yxNext;
539 }
540 }
541 } /*- !complexMode */
542
543 str1 = yxStrings;
544
545 const HtmlFont *hfont1 = getFont(hStr: str1);
546 if (hfont1->isBold()) {
547 str1->htext->insert(i: 0, str: "<b>", lengthA: 3);
548 }
549 if (hfont1->isItalic()) {
550 str1->htext->insert(i: 0, str: "<i>", lengthA: 3);
551 }
552 if (str1->getLink() != nullptr) {
553 GooString *ls = str1->getLink()->getLinkStart();
554 str1->htext->insert(i: 0, str: ls);
555 delete ls;
556 }
557 curX = str1->xMin;
558 curY = str1->yMin;
559
560 while (str1 && (str2 = str1->yxNext)) {
561 const HtmlFont *hfont2 = getFont(hStr: str2);
562 space = str1->yMax - str1->yMin; // the height of the font's bounding box
563 horSpace = str2->xMin - str1->xMax;
564 // if strings line up on left-hand side AND they are on subsequent lines, we need a line break
565 addLineBreak = !noMerge && (fabs(x: str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(x: str2->yMax, y: str1->yMax + space, z: str1->yMax);
566 vertSpace = str2->yMin - str1->yMax;
567
568 // printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);
569
570 if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) {
571 vertOverlap = str1->yMax - str2->yMin;
572 } else if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax) {
573 vertOverlap = str2->yMax - str1->yMin;
574 } else {
575 vertOverlap = 0;
576 }
577
578 // Combine strings if:
579 // They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following:
580 // 1. They appear to be part of the same line of text
581 // 2. They appear to be subsequent lines of a paragraph
582 // We assume (1) or (2) above, respectively, based on:
583 // (1) strings overlap vertically AND
584 // horizontal space between end of str1 and start of str2 is consistent with a single space or less;
585 // when rawOrder, the strings have to overlap vertically by at least 50%
586 // (2) Strings flow down the page, but the space between them is not too great, and they are lined up on the left
587 if (((((rawOrder && vertOverlap > 0.5 * space) || (!rawOrder && str2->yMin < str1->yMax)) && (horSpace > -0.5 * space && horSpace < space)) || (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak))
588 && (!complexMode || (hfont1->isEqualIgnoreBold(x: *hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter
589 str1->dir == str2->dir // text direction the same
590 ) {
591 // printf("yes\n");
592 n = str1->len + str2->len;
593 if ((addSpace = horSpace > wordBreakThreshold * space)) {
594 ++n;
595 }
596 if (addLineBreak) {
597 ++n;
598 }
599
600 str1->size = (n + 15) & ~15;
601 str1->text = (Unicode *)grealloc(p: str1->text, size: str1->size * sizeof(Unicode));
602 str1->xRight = (double *)grealloc(p: str1->xRight, size: str1->size * sizeof(double));
603 if (addSpace) {
604 str1->text[str1->len] = 0x20;
605 str1->htext->append(str: xml ? " " : "&#160;");
606 str1->xRight[str1->len] = str2->xMin;
607 ++str1->len;
608 }
609 if (addLineBreak) {
610 str1->text[str1->len] = '\n';
611 str1->htext->append(str: "<br/>");
612 str1->xRight[str1->len] = str2->xMin;
613 ++str1->len;
614 str1->yMin = str2->yMin;
615 str1->yMax = str2->yMax;
616 str1->xMax = str2->xMax;
617 int fontLineSize = hfont1->getLineSize();
618 int curLineSize = (int)(vertSpace + space);
619 if (curLineSize != fontLineSize) {
620 HtmlFont *newfnt = new HtmlFont(*hfont1);
621 newfnt->setLineSize(curLineSize);
622 str1->fontpos = fonts->AddFont(font: *newfnt);
623 delete newfnt;
624 hfont1 = getFont(hStr: str1);
625 // we have to reget hfont2 because it's location could have
626 // changed on resize
627 hfont2 = getFont(hStr: str2);
628 }
629 }
630 for (i = 0; i < str2->len; ++i) {
631 str1->text[str1->len] = str2->text[i];
632 str1->xRight[str1->len] = str2->xRight[i];
633 ++str1->len;
634 }
635
636 /* fix <i>, <b> if str1 and str2 differ and handle switch of links */
637 const HtmlLink *hlink1 = str1->getLink();
638 const HtmlLink *hlink2 = str2->getLink();
639 bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(x: *hlink2);
640 bool finish_a = switch_links && hlink1 != nullptr;
641 bool finish_italic = hfont1->isItalic() && (!hfont2->isItalic() || finish_a);
642 bool finish_bold = hfont1->isBold() && (!hfont2->isBold() || finish_a || finish_italic);
643 CloseTags(htext: str1->htext.get(), finish_a, finish_italic, finish_bold);
644 if (switch_links && hlink2 != nullptr) {
645 GooString *ls = hlink2->getLinkStart();
646 str1->htext->append(str: ls);
647 delete ls;
648 }
649 if ((!hfont1->isItalic() || finish_italic) && hfont2->isItalic()) {
650 str1->htext->append(str: "<i>", lengthA: 3);
651 }
652 if ((!hfont1->isBold() || finish_bold) && hfont2->isBold()) {
653 str1->htext->append(str: "<b>", lengthA: 3);
654 }
655
656 str1->htext->append(str: str2->htext.get());
657 // str1 now contains href for link of str2 (if it is defined)
658 str1->link = str2->link;
659 hfont1 = hfont2;
660 if (str2->xMax > str1->xMax) {
661 str1->xMax = str2->xMax;
662 }
663 if (str2->yMax > str1->yMax) {
664 str1->yMax = str2->yMax;
665 }
666 str1->yxNext = str2->yxNext;
667 delete str2;
668 } else { // keep strings separate
669 // printf("no\n");
670 bool finish_a = str1->getLink() != nullptr;
671 bool finish_bold = hfont1->isBold();
672 bool finish_italic = hfont1->isItalic();
673 CloseTags(htext: str1->htext.get(), finish_a, finish_italic, finish_bold);
674
675 str1->xMin = curX;
676 str1->yMin = curY;
677 str1 = str2;
678 curX = str1->xMin;
679 curY = str1->yMin;
680 hfont1 = hfont2;
681 if (hfont1->isBold()) {
682 str1->htext->insert(i: 0, str: "<b>", lengthA: 3);
683 }
684 if (hfont1->isItalic()) {
685 str1->htext->insert(i: 0, str: "<i>", lengthA: 3);
686 }
687 if (str1->getLink() != nullptr) {
688 GooString *ls = str1->getLink()->getLinkStart();
689 str1->htext->insert(i: 0, str: ls);
690 delete ls;
691 }
692 }
693 }
694 str1->xMin = curX;
695 str1->yMin = curY;
696
697 bool finish_bold = hfont1->isBold();
698 bool finish_italic = hfont1->isItalic();
699 bool finish_a = str1->getLink() != nullptr;
700 CloseTags(htext: str1->htext.get(), finish_a, finish_italic, finish_bold);
701
702#if 0 //~ for debugging
703 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
704 printf("x=%3d..%3d y=%3d..%3d size=%2d ",
705 (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
706 (int)(str1->yMax - str1->yMin));
707 printf("'%s'\n", str1->htext->c_str());
708 }
709 printf("\n------------------------------------------------------------\n\n");
710#endif
711}
712
713void HtmlPage::dumpAsXML(FILE *f, int page)
714{
715 fprintf(stream: f, format: "<page number=\"%d\" position=\"absolute\"", page);
716 fprintf(stream: f, format: " top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight, pageWidth);
717
718 for (int i = fontsPageMarker; i < fonts->size(); i++) {
719 GooString *fontCSStyle = fonts->CSStyle(i);
720 fprintf(stream: f, format: "\t%s\n", fontCSStyle->c_str());
721 delete fontCSStyle;
722 }
723
724 for (auto ptr : imgList) {
725 auto img = static_cast<HtmlImage *>(ptr);
726 if (!noRoundedCoordinates) {
727 fprintf(stream: f, format: "<image top=\"%d\" left=\"%d\" ", xoutRound(img->yMin), xoutRound(img->xMin));
728 fprintf(stream: f, format: "width=\"%d\" height=\"%d\" ", xoutRound(img->xMax - img->xMin), xoutRound(img->yMax - img->yMin));
729 } else {
730 fprintf(stream: f, format: "<image top=\"%f\" left=\"%f\" ", img->yMin, img->xMin);
731 fprintf(stream: f, format: "width=\"%f\" height=\"%f\" ", img->xMax - img->xMin, img->yMax - img->yMin);
732 }
733 fprintf(stream: f, format: "src=\"%s\"/>\n", img->fName->c_str());
734 delete img;
735 }
736 imgList.clear();
737
738 for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) {
739 if (tmp->htext) {
740 if (!noRoundedCoordinates) {
741 fprintf(stream: f, format: "<text top=\"%d\" left=\"%d\" ", xoutRound(tmp->yMin), xoutRound(tmp->xMin));
742 fprintf(stream: f, format: "width=\"%d\" height=\"%d\" ", xoutRound(tmp->xMax - tmp->xMin), xoutRound(tmp->yMax - tmp->yMin));
743 } else {
744 fprintf(stream: f, format: "<text top=\"%f\" left=\"%f\" ", tmp->yMin, tmp->xMin);
745 fprintf(stream: f, format: "width=\"%f\" height=\"%f\" ", tmp->xMax - tmp->xMin, tmp->yMax - tmp->yMin);
746 }
747 fprintf(stream: f, format: "font=\"%d\">", tmp->fontpos);
748 fputs(s: tmp->htext->c_str(), stream: f);
749 fputs(s: "</text>\n", stream: f);
750 }
751 }
752 fputs(s: "</page>\n", stream: f);
753}
754
755static void printCSS(FILE *f)
756{
757 // Image flip/flop CSS
758 // Source:
759 // http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css
760 // tested in Chrome, Fx (Linux) and IE9 (W7)
761 static const char css[] = "<style type=\"text/css\">"
762 "\n"
763 "<!--"
764 "\n"
765 ".xflip {"
766 "\n"
767 " -moz-transform: scaleX(-1);"
768 "\n"
769 " -webkit-transform: scaleX(-1);"
770 "\n"
771 " -o-transform: scaleX(-1);"
772 "\n"
773 " transform: scaleX(-1);"
774 "\n"
775 " filter: fliph;"
776 "\n"
777 "}"
778 "\n"
779 ".yflip {"
780 "\n"
781 " -moz-transform: scaleY(-1);"
782 "\n"
783 " -webkit-transform: scaleY(-1);"
784 "\n"
785 " -o-transform: scaleY(-1);"
786 "\n"
787 " transform: scaleY(-1);"
788 "\n"
789 " filter: flipv;"
790 "\n"
791 "}"
792 "\n"
793 ".xyflip {"
794 "\n"
795 " -moz-transform: scaleX(-1) scaleY(-1);"
796 "\n"
797 " -webkit-transform: scaleX(-1) scaleY(-1);"
798 "\n"
799 " -o-transform: scaleX(-1) scaleY(-1);"
800 "\n"
801 " transform: scaleX(-1) scaleY(-1);"
802 "\n"
803 " filter: fliph + flipv;"
804 "\n"
805 "}"
806 "\n"
807 "-->"
808 "\n"
809 "</style>"
810 "\n";
811
812 fwrite(ptr: css, size: sizeof(css) - 1, n: 1, s: f);
813}
814
815int HtmlPage::dumpComplexHeaders(FILE *const file, FILE *&pageFile, int page)
816{
817
818 if (!noframes) {
819 const std::string pgNum = std::to_string(val: page);
820 std::string pageFileName(DocName->toStr());
821 if (!singleHtml) {
822 pageFileName += '-' + pgNum + ".html";
823 pageFile = fopen(filename: pageFileName.c_str(), modes: "w");
824 } else {
825 pageFileName += "-html.html";
826 pageFile = fopen(filename: pageFileName.c_str(), modes: "a");
827 }
828
829 if (!pageFile) {
830 error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:s}'", pageFileName.c_str());
831 return 1;
832 }
833
834 if (!singleHtml) {
835 fprintf(stream: pageFile, format: "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>Page %d</title>\n\n", DOCTYPE, page);
836 } else {
837 fprintf(stream: pageFile, format: "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n\n", DOCTYPE, pageFileName.c_str());
838 }
839
840 const std::string htmlEncoding = HtmlOutputDev::mapEncodingToHtml(encoding: globalParams->getTextEncodingName());
841 if (!singleHtml) {
842 fprintf(stream: pageFile, format: "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str());
843 } else {
844 fprintf(stream: pageFile, format: "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding.c_str());
845 }
846 } else {
847 pageFile = file;
848 fprintf(stream: pageFile, format: "<!-- Page %d -->\n", page);
849 fprintf(stream: pageFile, format: "<a name=\"%d\"></a>\n", page);
850 }
851
852 return 0;
853}
854
855void HtmlPage::dumpComplex(FILE *file, int page, const std::vector<std::string> &backgroundImages)
856{
857 FILE *pageFile;
858
859 if (firstPage == -1) {
860 firstPage = page;
861 }
862
863 if (dumpComplexHeaders(file, pageFile, page)) {
864 error(category: errIO, pos: -1, msg: "Couldn't write headers.");
865 return;
866 }
867
868 fputs(s: "<style type=\"text/css\">\n<!--\n", stream: pageFile);
869 fputs(s: "\tp {margin: 0; padding: 0;}", stream: pageFile);
870 for (int i = fontsPageMarker; i != fonts->size(); i++) {
871 GooString *fontCSStyle;
872 if (!singleHtml) {
873 fontCSStyle = fonts->CSStyle(i);
874 } else {
875 fontCSStyle = fonts->CSStyle(i, j: page);
876 }
877 fprintf(stream: pageFile, format: "\t%s\n", fontCSStyle->c_str());
878 delete fontCSStyle;
879 }
880
881 fputs(s: "-->\n</style>\n", stream: pageFile);
882
883 if (!noframes) {
884 fputs(s: "</head>\n<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n", stream: pageFile);
885 }
886
887 fprintf(stream: pageFile, format: "<div id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n", page, pageWidth, pageHeight);
888
889 if (!ignore && (size_t)(page - firstPage) < backgroundImages.size()) {
890 fprintf(stream: pageFile, format: "<img width=\"%d\" height=\"%d\" src=\"%s\" alt=\"background image\"/>\n", pageWidth, pageHeight, backgroundImages[page - firstPage].c_str());
891 }
892
893 for (HtmlString *tmp1 = yxStrings; tmp1; tmp1 = tmp1->yxNext) {
894 if (tmp1->htext) {
895 fprintf(stream: pageFile, format: "<p style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft", xoutRound(tmp1->yMin), xoutRound(tmp1->xMin));
896 if (!singleHtml) {
897 fputc(c: '0', stream: pageFile);
898 } else {
899 fprintf(stream: pageFile, format: "%d", page);
900 }
901 fprintf(stream: pageFile, format: "%d\">", tmp1->fontpos);
902 fputs(s: tmp1->htext->c_str(), stream: pageFile);
903 fputs(s: "</p>\n", stream: pageFile);
904 }
905 }
906
907 fputs(s: "</div>\n", stream: pageFile);
908
909 if (!noframes) {
910 fputs(s: "</body>\n</html>\n", stream: pageFile);
911 fclose(stream: pageFile);
912 }
913}
914
915void HtmlPage::dump(FILE *f, int pageNum, const std::vector<std::string> &backgroundImages)
916{
917 if (complexMode || singleHtml) {
918 if (xml) {
919 dumpAsXML(f, page: pageNum);
920 }
921 if (!xml) {
922 dumpComplex(file: f, page: pageNum, backgroundImages);
923 }
924 } else {
925 fprintf(stream: f, format: "<a name=%d></a>", pageNum);
926 // Loop over the list of image names on this page
927 for (auto ptr : imgList) {
928 auto img = static_cast<HtmlImage *>(ptr);
929
930 // see printCSS() for class names
931 const char *styles[4] = { "", " class=\"xflip\"", " class=\"yflip\"", " class=\"xyflip\"" };
932 int style_index = 0;
933 if (img->xMin > img->xMax) {
934 style_index += 1; // xFlip
935 }
936 if (img->yMin > img->yMax) {
937 style_index += 2; // yFlip
938 }
939
940 fprintf(stream: f, format: "<img%s src=\"%s\"/><br/>\n", styles[style_index], img->fName->c_str());
941 delete img;
942 }
943 imgList.clear();
944
945 for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) {
946 if (tmp->htext) {
947 fputs(s: tmp->htext->c_str(), stream: f);
948 fputs(s: "<br/>\n", stream: f);
949 }
950 }
951 fputs(s: "<hr/>\n", stream: f);
952 }
953}
954
955void HtmlPage::clear()
956{
957 HtmlString *p1, *p2;
958
959 if (curStr) {
960 delete curStr;
961 curStr = nullptr;
962 }
963 for (p1 = yxStrings; p1; p1 = p2) {
964 p2 = p1->yxNext;
965 delete p1;
966 }
967 yxStrings = nullptr;
968 xyStrings = nullptr;
969 yxCur1 = yxCur2 = nullptr;
970
971 if (!noframes) {
972 delete fonts;
973 fonts = new HtmlFontAccu();
974 fontsPageMarker = 0;
975 } else {
976 fontsPageMarker = fonts->size();
977 }
978
979 delete links;
980 links = new HtmlLinks();
981}
982
983void HtmlPage::setDocName(const char *fname)
984{
985 DocName = new GooString(fname);
986}
987
988void HtmlPage::addImage(std::unique_ptr<GooString> &&fname, GfxState *state)
989{
990 HtmlImage *img = new HtmlImage(std::move(fname), state);
991 imgList.push_back(x: img);
992}
993
994//------------------------------------------------------------------------
995// HtmlMetaVar
996//------------------------------------------------------------------------
997
998HtmlMetaVar::HtmlMetaVar(const char *_name, const char *_content)
999{
1000 name = new GooString(_name);
1001 content = new GooString(_content);
1002}
1003
1004HtmlMetaVar::~HtmlMetaVar()
1005{
1006 delete name;
1007 delete content;
1008}
1009
1010GooString *HtmlMetaVar::toString() const
1011{
1012 GooString *result = new GooString("<meta name=\"");
1013 result->append(str: name);
1014 result->append(str: "\" content=\"");
1015 result->append(str: content);
1016 result->append(str: "\"/>");
1017 return result;
1018}
1019
1020//------------------------------------------------------------------------
1021// HtmlOutputDev
1022//------------------------------------------------------------------------
1023
1024static const char *HtmlEncodings[][2] = { { "Latin1", "ISO-8859-1" }, { nullptr, nullptr } };
1025
1026std::string HtmlOutputDev::mapEncodingToHtml(const std::string &encoding)
1027{
1028 for (int i = 0; HtmlEncodings[i][0] != nullptr; i++) {
1029 if (encoding == HtmlEncodings[i][0]) {
1030 return HtmlEncodings[i][1];
1031 }
1032 }
1033 return encoding;
1034}
1035
1036void HtmlOutputDev::doFrame(int firstPage)
1037{
1038 GooString *fName = new GooString(Docname);
1039 fName->append(str: ".html");
1040
1041 if (!(fContentsFrame = fopen(filename: fName->c_str(), modes: "w"))) {
1042 error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:t}'", fName);
1043 delete fName;
1044 return;
1045 }
1046
1047 delete fName;
1048
1049 const std::string baseName = gbasename(filename: Docname->c_str());
1050 fputs(DOCTYPE, stream: fContentsFrame);
1051 fputs(s: "\n<html>", stream: fContentsFrame);
1052 fputs(s: "\n<head>", stream: fContentsFrame);
1053 fprintf(stream: fContentsFrame, format: "\n<title>%s</title>", docTitle->c_str());
1054 const std::string htmlEncoding = mapEncodingToHtml(encoding: globalParams->getTextEncodingName());
1055 fprintf(stream: fContentsFrame, format: "\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str());
1056 dumpMetaVars(fContentsFrame);
1057 fprintf(stream: fContentsFrame, format: "</head>\n");
1058 fputs(s: "<frameset cols=\"100,*\">\n", stream: fContentsFrame);
1059 fprintf(stream: fContentsFrame, format: "<frame name=\"links\" src=\"%s_ind.html\"/>\n", baseName.c_str());
1060 fputs(s: "<frame name=\"contents\" src=", stream: fContentsFrame);
1061 if (complexMode) {
1062 fprintf(stream: fContentsFrame, format: "\"%s-%d.html\"", baseName.c_str(), firstPage);
1063 } else {
1064 fprintf(stream: fContentsFrame, format: "\"%ss.html\"", baseName.c_str());
1065 }
1066
1067 fputs(s: "/>\n</frameset>\n</html>\n", stream: fContentsFrame);
1068
1069 fclose(stream: fContentsFrame);
1070}
1071
1072HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, const char *fileName, const char *title, const char *author, const char *keywords, const char *subject, const char *date, bool rawOrderA, int firstPage, bool outline)
1073{
1074 catalog = catalogA;
1075 fContentsFrame = nullptr;
1076 page = nullptr;
1077 docTitle = new GooString(title);
1078 pages = nullptr;
1079 dumpJPEG = true;
1080 // write = true;
1081 rawOrder = rawOrderA;
1082 this->doOutline = outline;
1083 ok = false;
1084 // this->firstPage = firstPage;
1085 // pageNum=firstPage;
1086 // open file
1087 needClose = false;
1088 pages = new HtmlPage(rawOrder);
1089
1090 glMetaVars.push_back(x: new HtmlMetaVar("generator", "pdftohtml 0.36"));
1091 if (author) {
1092 glMetaVars.push_back(x: new HtmlMetaVar("author", author));
1093 }
1094 if (keywords) {
1095 glMetaVars.push_back(x: new HtmlMetaVar("keywords", keywords));
1096 }
1097 if (date) {
1098 glMetaVars.push_back(x: new HtmlMetaVar("date", date));
1099 }
1100 if (subject) {
1101 glMetaVars.push_back(x: new HtmlMetaVar("subject", subject));
1102 }
1103
1104 maxPageWidth = 0;
1105 maxPageHeight = 0;
1106
1107 pages->setDocName(fileName);
1108 Docname = new GooString(fileName);
1109
1110 // for non-xml output (complex or simple) with frames generate the left frame
1111 if (!xml && !noframes) {
1112 if (!singleHtml) {
1113 GooString *left = new GooString(fileName);
1114 left->append(str: "_ind.html");
1115
1116 doFrame(firstPage);
1117
1118 if (!(fContentsFrame = fopen(filename: left->c_str(), modes: "w"))) {
1119 error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:t}'", left);
1120 delete left;
1121 return;
1122 }
1123 delete left;
1124 fputs(DOCTYPE, stream: fContentsFrame);
1125 fputs(s: "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title></title>\n</head>\n<body>\n", stream: fContentsFrame);
1126
1127 if (doOutline) {
1128 fprintf(stream: fContentsFrame, format: "<a href=\"%s%s\" target=\"contents\">Outline</a><br/>", gbasename(filename: Docname->c_str()).c_str(), complexMode ? "-outline.html" : "s.html#outline");
1129 }
1130 }
1131 if (!complexMode) { /* not in complex mode */
1132
1133 GooString *right = new GooString(fileName);
1134 right->append(str: "s.html");
1135
1136 if (!(page = fopen(filename: right->c_str(), modes: "w"))) {
1137 error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:t}'", right);
1138 delete right;
1139 return;
1140 }
1141 delete right;
1142 fputs(DOCTYPE, stream: page);
1143 fputs(s: "<html>\n<head>\n<title></title>\n", stream: page);
1144 printCSS(f: page);
1145 fputs(s: "</head>\n<body>\n", stream: page);
1146 }
1147 }
1148
1149 if (noframes) {
1150 if (stout) {
1151 page = stdout;
1152 } else {
1153 GooString *right = new GooString(fileName);
1154 if (!xml) {
1155 right->append(str: ".html");
1156 }
1157 if (xml) {
1158 right->append(str: ".xml");
1159 }
1160 if (!(page = fopen(filename: right->c_str(), modes: "w"))) {
1161 error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:t}'", right);
1162 delete right;
1163 return;
1164 }
1165 delete right;
1166 }
1167
1168 const std::string htmlEncoding = mapEncodingToHtml(encoding: globalParams->getTextEncodingName());
1169 if (xml) {
1170 fprintf(stream: page, format: "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding.c_str());
1171 fputs(s: "<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", stream: page);
1172 fprintf(stream: page, format: "<pdf2xml producer=\"%s\" version=\"%s\">\n", PACKAGE_NAME, PACKAGE_VERSION);
1173 } else {
1174 fprintf(stream: page, format: "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n", DOCTYPE, docTitle->c_str());
1175
1176 fprintf(stream: page, format: "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str());
1177
1178 dumpMetaVars(page);
1179 printCSS(f: page);
1180 fprintf(stream: page, format: "</head>\n");
1181 fprintf(stream: page, format: "<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
1182 }
1183 }
1184 ok = true;
1185}
1186
1187HtmlOutputDev::~HtmlOutputDev()
1188{
1189 delete Docname;
1190 delete docTitle;
1191
1192 for (auto entry : glMetaVars) {
1193 delete entry;
1194 }
1195
1196 if (fContentsFrame) {
1197 fputs(s: "</body>\n</html>\n", stream: fContentsFrame);
1198 fclose(stream: fContentsFrame);
1199 }
1200 if (page != nullptr) {
1201 if (xml) {
1202 fputs(s: "</pdf2xml>\n", stream: page);
1203 fclose(stream: page);
1204 } else if (!complexMode || xml || noframes) {
1205 fputs(s: "</body>\n</html>\n", stream: page);
1206 fclose(stream: page);
1207 }
1208 }
1209 if (pages) {
1210 delete pages;
1211 }
1212}
1213
1214void HtmlOutputDev::startPage(int pageNumA, GfxState *state, XRef *xref)
1215{
1216#if 0
1217 if (mode&&!xml){
1218 if (write){
1219 write=false;
1220 GooString* fname=Dirname(Docname);
1221 fname->append("image.log");
1222 if((tin=fopen(getFileNameFromPath(fname->c_str(),fname->getLength()),"w"))==NULL){
1223 printf("Error : can not open %s",fname);
1224 exit(1);
1225 }
1226 delete fname;
1227 // if(state->getRotation()!=0)
1228 // fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1());
1229 // else
1230 fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1());
1231 }
1232 }
1233#endif
1234
1235 pageNum = pageNumA;
1236 const std::string str = gbasename(filename: Docname->c_str());
1237 pages->clear();
1238 if (!noframes) {
1239 if (fContentsFrame) {
1240 if (complexMode) {
1241 fprintf(stream: fContentsFrame, format: "<a href=\"%s-%d.html\"", str.c_str(), pageNum);
1242 } else {
1243 fprintf(stream: fContentsFrame, format: "<a href=\"%ss.html#%d\"", str.c_str(), pageNum);
1244 }
1245 fprintf(stream: fContentsFrame, format: " target=\"contents\" >Page %d</a><br/>\n", pageNum);
1246 }
1247 }
1248
1249 pages->pageWidth = static_cast<int>(state->getPageWidth());
1250 pages->pageHeight = static_cast<int>(state->getPageHeight());
1251}
1252
1253void HtmlOutputDev::endPage()
1254{
1255 std::unique_ptr<Links> linksList = docPage->getLinks();
1256 for (AnnotLink *link : linksList->getLinks()) {
1257 doProcessLink(link);
1258 }
1259
1260 pages->conv();
1261 pages->coalesce();
1262 pages->dump(f: page, pageNum, backgroundImages);
1263
1264 // I don't yet know what to do in the case when there are pages of different
1265 // sizes and we want complex output: running ghostscript many times
1266 // seems very inefficient. So for now I'll just use last page's size
1267 maxPageWidth = pages->pageWidth;
1268 maxPageHeight = pages->pageHeight;
1269
1270 // if(!noframes&&!xml) fputs("<br/>\n", fContentsFrame);
1271 if (!stout && !globalParams->getErrQuiet()) {
1272 printf(format: "Page-%d\n", (pageNum));
1273 }
1274}
1275
1276void HtmlOutputDev::addBackgroundImage(const std::string &img)
1277{
1278 backgroundImages.push_back(x: img);
1279}
1280
1281void HtmlOutputDev::updateFont(GfxState *state)
1282{
1283 pages->updateFont(state);
1284}
1285
1286void HtmlOutputDev::beginString(GfxState *state, const GooString *s)
1287{
1288 pages->beginString(state, s);
1289}
1290
1291void HtmlOutputDev::endString(GfxState *state)
1292{
1293 pages->endString();
1294}
1295
1296void HtmlOutputDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int /*nBytes*/, const Unicode *u, int uLen)
1297{
1298 if (!showHidden && (state->getRender() & 3) == 3) {
1299 return;
1300 }
1301 pages->addChar(state, x, y, dx, dy, ox: originX, oy: originY, u, uLen);
1302}
1303
1304void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str)
1305{
1306 InMemoryFile ims;
1307 FILE *f1 = nullptr;
1308 int c;
1309
1310 // open the image file
1311 std::unique_ptr<GooString> fName = createImageFileName(ext: "jpg");
1312 f1 = dataUrls ? ims.open(mode: "wb") : fopen(filename: fName->c_str(), modes: "wb");
1313 if (!f1) {
1314 error(category: errIO, pos: -1, msg: "Couldn't open image file '{0:t}'", fName.get());
1315 return;
1316 }
1317
1318 // initialize stream
1319 str = str->getNextStream();
1320 str->reset();
1321
1322 // copy the stream
1323 while ((c = str->getChar()) != EOF) {
1324 fputc(c: c, stream: f1);
1325 }
1326
1327 fclose(stream: f1);
1328
1329 if (dataUrls) {
1330 fName = std::make_unique<GooString>(args: std::string("data:image/jpeg;base64,") + gbase64Encode(input: ims.getBuffer()));
1331 }
1332 pages->addImage(fname: std::move(fName), state);
1333}
1334
1335void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool isMask)
1336{
1337#ifdef ENABLE_LIBPNG
1338 FILE *f1;
1339 InMemoryFile ims;
1340
1341 if (!colorMap && !isMask) {
1342 error(category: errInternal, pos: -1, msg: "Can't have color image without a color map");
1343 return;
1344 }
1345
1346 // open the image file
1347 std::unique_ptr<GooString> fName = createImageFileName(ext: "png");
1348 f1 = dataUrls ? ims.open(mode: "wb") : fopen(filename: fName->c_str(), modes: "wb");
1349 if (!f1) {
1350 error(category: errIO, pos: -1, msg: "Couldn't open image file '{0:t}'", fName.get());
1351 return;
1352 }
1353
1354 PNGWriter *writer = new PNGWriter(isMask ? PNGWriter::MONOCHROME : PNGWriter::RGB);
1355 // TODO can we calculate the resolution of the image?
1356 if (!writer->init(f: f1, width, height, hDPI: 72, vDPI: 72)) {
1357 error(category: errInternal, pos: -1, msg: "Can't init PNG for image '{0:t}'", fName.get());
1358 delete writer;
1359 fclose(stream: f1);
1360 return;
1361 }
1362
1363 if (!isMask) {
1364 unsigned char *p;
1365 GfxRGB rgb;
1366 unsigned char *row = (unsigned char *)gmalloc(size: 3 * width); // 3 bytes/pixel: RGB
1367 unsigned char **row_pointer = &row;
1368
1369 // Initialize the image stream
1370 ImageStream *imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(), colorMap->getBits());
1371 imgStr->reset();
1372
1373 // For each line...
1374 for (int y = 0; y < height; y++) {
1375
1376 // Convert into a PNG row
1377 p = imgStr->getLine();
1378 if (!p) {
1379 error(category: errIO, pos: -1, msg: "Failed to read PNG. '{0:t}' will be incorrect", fName.get());
1380 gfree(p: row);
1381 delete writer;
1382 delete imgStr;
1383 fclose(stream: f1);
1384 return;
1385 }
1386 for (int x = 0; x < width; x++) {
1387 colorMap->getRGB(x: p, rgb: &rgb);
1388 // Write the RGB pixels into the row
1389 row[3 * x] = colToByte(x: rgb.r);
1390 row[3 * x + 1] = colToByte(x: rgb.g);
1391 row[3 * x + 2] = colToByte(x: rgb.b);
1392 p += colorMap->getNumPixelComps();
1393 }
1394
1395 if (!writer->writeRow(row: row_pointer)) {
1396 error(category: errIO, pos: -1, msg: "Failed to write into PNG '{0:t}'", fName.get());
1397 delete writer;
1398 delete imgStr;
1399 fclose(stream: f1);
1400 return;
1401 }
1402 }
1403 gfree(p: row);
1404 imgStr->close();
1405 delete imgStr;
1406 } else { // isMask == true
1407 int size = (width + 7) / 8;
1408
1409 // PDF masks use 0 = draw current color, 1 = leave unchanged.
1410 // We invert this to provide the standard interpretation of alpha
1411 // (0 = transparent, 1 = opaque). If the colorMap already inverts
1412 // the mask we leave the data unchanged.
1413 int invert_bits = 0xff;
1414 if (colorMap) {
1415 GfxGray gray;
1416 unsigned char zero[gfxColorMaxComps];
1417 memset(s: zero, c: 0, n: sizeof(zero));
1418 colorMap->getGray(x: zero, gray: &gray);
1419 if (colToByte(x: gray) == 0) {
1420 invert_bits = 0x00;
1421 }
1422 }
1423
1424 str->reset();
1425 unsigned char *png_row = (unsigned char *)gmalloc(size);
1426
1427 for (int ri = 0; ri < height; ++ri) {
1428 for (int i = 0; i < size; i++) {
1429 png_row[i] = str->getChar() ^ invert_bits;
1430 }
1431
1432 if (!writer->writeRow(row: &png_row)) {
1433 error(category: errIO, pos: -1, msg: "Failed to write into PNG '{0:t}'", fName.get());
1434 delete writer;
1435 fclose(stream: f1);
1436 gfree(p: png_row);
1437 return;
1438 }
1439 }
1440 str->close();
1441 gfree(p: png_row);
1442 }
1443
1444 str->close();
1445
1446 writer->close();
1447 delete writer;
1448 fclose(stream: f1);
1449
1450 if (dataUrls) {
1451 fName = std::make_unique<GooString>(args: std::string("data:image/png;base64,") + gbase64Encode(input: ims.getBuffer()));
1452 }
1453 pages->addImage(fname: std::move(fName), state);
1454#else
1455 return;
1456#endif
1457}
1458
1459std::unique_ptr<GooString> HtmlOutputDev::createImageFileName(const char *ext)
1460{
1461 return GooString::format(fmt: "{0:s}-{1:d}_{2:d}.{3:s}", Docname->c_str(), pageNum, pages->getNumImages() + 1, ext);
1462}
1463
1464void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg)
1465{
1466
1467 if (ignore || (complexMode && !xml)) {
1468 OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
1469 return;
1470 }
1471
1472 // dump JPEG file
1473 if (dumpJPEG && str->getKind() == strDCT) {
1474 drawJpegImage(state, str);
1475 } else {
1476#ifdef ENABLE_LIBPNG
1477 drawPngImage(state, str, width, height, colorMap: nullptr, isMask: true);
1478#else
1479 OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
1480#endif
1481 }
1482}
1483
1484void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, const int *maskColors, bool inlineImg)
1485{
1486
1487 if (ignore || (complexMode && !xml)) {
1488 OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg);
1489 return;
1490 }
1491
1492 /*if( !globalParams->getErrQuiet() )
1493 printf("image stream of kind %d\n", str->getKind());*/
1494 // dump JPEG file
1495 if (dumpJPEG && str->getKind() == strDCT && (colorMap->getNumPixelComps() == 1 || colorMap->getNumPixelComps() == 3) && !inlineImg) {
1496 drawJpegImage(state, str);
1497 } else {
1498#ifdef ENABLE_LIBPNG
1499 drawPngImage(state, str, width, height, colorMap);
1500#else
1501 OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg);
1502#endif
1503 }
1504}
1505
1506void HtmlOutputDev::doProcessLink(AnnotLink *link)
1507{
1508 double _x1, _y1, _x2, _y2;
1509 int x1, y1, x2, y2;
1510
1511 link->getRect(x1: &_x1, y1: &_y1, x2: &_x2, y2: &_y2);
1512 cvtUserToDev(ux: _x1, uy: _y1, dx: &x1, dy: &y1);
1513
1514 cvtUserToDev(ux: _x2, uy: _y2, dx: &x2, dy: &y2);
1515
1516 GooString *_dest = getLinkDest(link);
1517 HtmlLink t((double)x1, (double)y2, (double)x2, (double)y1, _dest);
1518 pages->AddLink(x: t);
1519 delete _dest;
1520}
1521
1522GooString *HtmlOutputDev::getLinkDest(AnnotLink *link)
1523{
1524 if (!link->getAction()) {
1525 return new GooString();
1526 }
1527 switch (link->getAction()->getKind()) {
1528 case actionGoTo: {
1529 int destPage = 1;
1530 LinkGoTo *ha = (LinkGoTo *)link->getAction();
1531 std::unique_ptr<LinkDest> dest;
1532 if (ha->getDest() != nullptr) {
1533 dest = std::make_unique<LinkDest>(args: *ha->getDest());
1534 } else if (ha->getNamedDest() != nullptr) {
1535 dest = catalog->findDest(name: ha->getNamedDest());
1536 }
1537
1538 if (dest) {
1539 GooString *file = new GooString(gbasename(filename: Docname->c_str()));
1540
1541 if (dest->isPageRef()) {
1542 const Ref pageref = dest->getPageRef();
1543 destPage = catalog->findPage(pageRef: pageref);
1544 } else {
1545 destPage = dest->getPageNum();
1546 }
1547
1548 /* complex simple
1549 frames file-4.html files.html#4
1550 noframes file.html#4 file.html#4
1551 */
1552 if (noframes) {
1553 file->append(str: ".html#");
1554 file->append(str: std::to_string(val: destPage));
1555 } else {
1556 if (complexMode) {
1557 file->append(str: "-");
1558 file->append(str: std::to_string(val: destPage));
1559 file->append(str: ".html");
1560 } else {
1561 file->append(str: "s.html#");
1562 file->append(str: std::to_string(val: destPage));
1563 }
1564 }
1565
1566 if (printCommands) {
1567 printf(format: " link to page %d ", destPage);
1568 }
1569 return file;
1570 } else {
1571 return new GooString();
1572 }
1573 }
1574 case actionGoToR: {
1575 LinkGoToR *ha = (LinkGoToR *)link->getAction();
1576 LinkDest *dest = nullptr;
1577 int destPage = 1;
1578 GooString *file = new GooString();
1579 if (ha->getFileName()) {
1580 delete file;
1581 file = new GooString(ha->getFileName()->c_str());
1582 }
1583 if (ha->getDest() != nullptr) {
1584 dest = new LinkDest(*ha->getDest());
1585 }
1586 if (dest && file) {
1587 if (!(dest->isPageRef())) {
1588 destPage = dest->getPageNum();
1589 }
1590 delete dest;
1591
1592 if (printCommands) {
1593 printf(format: " link to page %d ", destPage);
1594 }
1595 if (printHtml) {
1596 const char *p = file->c_str() + file->getLength() - 4;
1597 if (!strcmp(s1: p, s2: ".pdf") || !strcmp(s1: p, s2: ".PDF")) {
1598 file->del(i: file->getLength() - 4, n: 4);
1599 file->append(str: ".html");
1600 }
1601 file->append(c: '#');
1602 file->append(str: std::to_string(val: destPage));
1603 }
1604 }
1605 if (printCommands && file) {
1606 printf(format: "filename %s\n", file->c_str());
1607 }
1608 return file;
1609 }
1610 case actionURI: {
1611 LinkURI *ha = (LinkURI *)link->getAction();
1612 GooString *file = new GooString(ha->getURI());
1613 // printf("uri : %s\n",file->c_str());
1614 return file;
1615 }
1616 case actionLaunch:
1617 if (printHtml) {
1618 LinkLaunch *ha = (LinkLaunch *)link->getAction();
1619 GooString *file = new GooString(ha->getFileName()->c_str());
1620 const char *p = file->c_str() + file->getLength() - 4;
1621 if (!strcmp(s1: p, s2: ".pdf") || !strcmp(s1: p, s2: ".PDF")) {
1622 file->del(i: file->getLength() - 4, n: 4);
1623 file->append(str: ".html");
1624 }
1625 if (printCommands) {
1626 printf(format: "filename %s", file->c_str());
1627 }
1628
1629 return file;
1630 }
1631 // fallthrough
1632 default:
1633 return new GooString();
1634 }
1635}
1636
1637void HtmlOutputDev::dumpMetaVars(FILE *file)
1638{
1639 GooString *var;
1640
1641 for (const HtmlMetaVar *t : glMetaVars) {
1642 var = t->toString();
1643 fprintf(stream: file, format: "%s\n", var->c_str());
1644 delete var;
1645 }
1646}
1647
1648bool HtmlOutputDev::dumpDocOutline(PDFDoc *doc)
1649{
1650 FILE *output = nullptr;
1651 bool bClose = false;
1652
1653 if (!ok) {
1654 return false;
1655 }
1656
1657 Outline *outline = doc->getOutline();
1658 if (!outline) {
1659 return false;
1660 }
1661
1662 const std::vector<OutlineItem *> *outlines = outline->getItems();
1663 if (!outlines) {
1664 return false;
1665 }
1666
1667 if (!complexMode || xml) {
1668 output = page;
1669 } else if (complexMode && !xml) {
1670 if (noframes) {
1671 output = page;
1672 fputs(s: "<hr/>\n", stream: output);
1673 } else {
1674 GooString *str = Docname->copy();
1675 str->append(str: "-outline.html");
1676 output = fopen(filename: str->c_str(), modes: "w");
1677 delete str;
1678 if (output == nullptr) {
1679 return false;
1680 }
1681 bClose = true;
1682
1683 const std::string htmlEncoding = HtmlOutputDev::mapEncodingToHtml(encoding: globalParams->getTextEncodingName());
1684
1685 fprintf(stream: output,
1686 format: "<html xmlns=\"http://www.w3.org/1999/xhtml\" "
1687 "lang=\"\" xml:lang=\"\">\n"
1688 "<head>\n"
1689 "<title>Document Outline</title>\n"
1690 "<meta http-equiv=\"Content-Type\" content=\"text/html; "
1691 "charset=%s\"/>\n"
1692 "</head>\n<body>\n",
1693 htmlEncoding.c_str());
1694 }
1695 }
1696
1697 if (!xml) {
1698 bool done = newHtmlOutlineLevel(output, outlines);
1699 if (done && !complexMode) {
1700 fputs(s: "<hr/>\n", stream: output);
1701 }
1702
1703 if (bClose) {
1704 fputs(s: "</body>\n</html>\n", stream: output);
1705 fclose(stream: output);
1706 }
1707 } else {
1708 newXmlOutlineLevel(output, outlines);
1709 }
1710
1711 return true;
1712}
1713
1714bool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines, int level)
1715{
1716 bool atLeastOne = false;
1717
1718 if (level == 1) {
1719 fputs(s: "<a name=\"outline\"></a>", stream: output);
1720 fputs(s: "<h1>Document Outline</h1>\n", stream: output);
1721 }
1722 fputs(s: "<ul>\n", stream: output);
1723
1724 for (OutlineItem *item : *outlines) {
1725 const auto &title = item->getTitle();
1726 std::unique_ptr<GooString> titleStr = HtmlFont::HtmlFilter(u: title.data(), uLen: title.size());
1727
1728 GooString *linkName = nullptr;
1729
1730 const int itemPage = getOutlinePageNum(item);
1731 if (itemPage > 0) {
1732 /* complex simple
1733 frames file-4.html files.html#4
1734 noframes file.html#4 file.html#4
1735 */
1736 linkName = new GooString(gbasename(filename: Docname->c_str()));
1737 if (noframes) {
1738 linkName->append(str: ".html#");
1739 linkName->append(str: std::to_string(val: itemPage));
1740 } else {
1741 if (complexMode) {
1742 linkName->append(str: "-");
1743 linkName->append(str: std::to_string(val: itemPage));
1744 linkName->append(str: ".html");
1745 } else {
1746 linkName->append(str: "s.html#");
1747 linkName->append(str: std::to_string(val: itemPage));
1748 }
1749 }
1750 }
1751
1752 fputs(s: "<li>", stream: output);
1753 if (linkName) {
1754 fprintf(stream: output, format: "<a href=\"%s\">", linkName->c_str());
1755 }
1756 if (titleStr) {
1757 fputs(s: titleStr->c_str(), stream: output);
1758 }
1759 if (linkName) {
1760 fputs(s: "</a>", stream: output);
1761 delete linkName;
1762 }
1763 atLeastOne = true;
1764
1765 item->open();
1766 if (item->hasKids() && item->getKids()) {
1767 fputs(s: "\n", stream: output);
1768 newHtmlOutlineLevel(output, outlines: item->getKids(), level: level + 1);
1769 }
1770 fputs(s: "</li>\n", stream: output);
1771 }
1772 fputs(s: "</ul>\n", stream: output);
1773
1774 return atLeastOne;
1775}
1776
1777void HtmlOutputDev::newXmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines)
1778{
1779 fputs(s: "<outline>\n", stream: output);
1780
1781 for (OutlineItem *item : *outlines) {
1782 const std::vector<Unicode> &title = item->getTitle();
1783 auto titleStr = HtmlFont::HtmlFilter(u: title.data(), uLen: title.size());
1784 const int itemPage = getOutlinePageNum(item);
1785 if (itemPage > 0) {
1786 fprintf(stream: output, format: "<item page=\"%d\">%s</item>\n", itemPage, titleStr->c_str());
1787 } else {
1788 fprintf(stream: output, format: "<item>%s</item>\n", titleStr->c_str());
1789 }
1790
1791 item->open();
1792 if (item->hasKids() && item->getKids()) {
1793 newXmlOutlineLevel(output, outlines: item->getKids());
1794 }
1795 }
1796
1797 fputs(s: "</outline>\n", stream: output);
1798}
1799
1800int HtmlOutputDev::getOutlinePageNum(OutlineItem *item)
1801{
1802 const LinkAction *action = item->getAction();
1803 const LinkGoTo *link = nullptr;
1804 std::unique_ptr<LinkDest> linkdest;
1805 int pagenum = -1;
1806
1807 if (!action || action->getKind() != actionGoTo) {
1808 return pagenum;
1809 }
1810
1811 link = static_cast<const LinkGoTo *>(action);
1812
1813 if (!link || !link->isOk()) {
1814 return pagenum;
1815 }
1816
1817 if (link->getDest()) {
1818 linkdest = std::make_unique<LinkDest>(args: *link->getDest());
1819 } else if (link->getNamedDest()) {
1820 linkdest = catalog->findDest(name: link->getNamedDest());
1821 }
1822
1823 if (!linkdest) {
1824 return pagenum;
1825 }
1826
1827 if (linkdest->isPageRef()) {
1828 const Ref pageref = linkdest->getPageRef();
1829 pagenum = catalog->findPage(pageRef: pageref);
1830 } else {
1831 pagenum = linkdest->getPageNum();
1832 }
1833
1834 return pagenum;
1835}
1836

source code of poppler/utils/HtmlOutputDev.cc