1 | //======================================================================== |
2 | // |
3 | // HtmlOutputDev.cc |
4 | // |
5 | // Copyright 1997-2002 Glyph & Cog, LLC |
6 | // |
7 | // Changed 1999-2000 by G.Ovtcharov |
8 | // |
9 | // Changed 2002 by Mikhail Kruk |
10 | // |
11 | //======================================================================== |
12 | |
13 | //======================================================================== |
14 | // |
15 | // Modified under the Poppler project - http://poppler.freedesktop.org |
16 | // |
17 | // All changes made under the Poppler project to this file are licensed |
18 | // under GPL version 2 or later |
19 | // |
20 | // Copyright (C) 2005-2013, 2016-2022 Albert Astals Cid <aacid@kde.org> |
21 | // Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org> |
22 | // Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru> |
23 | // Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp> |
24 | // Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com> |
25 | // Copyright (C) 2009 Warren Toomey <wkt@tuhs.org> |
26 | // Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org> |
27 | // Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com> |
28 | // Copyright (C) 2010, 2012, 2013, 2022 Adrian Johnson <ajohnson@redneon.com> |
29 | // Copyright (C) 2010 Hib Eris <hib@hiberis.nl> |
30 | // Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) |
31 | // Copyright (C) 2011 Joshua Richardson <jric@chegg.com> |
32 | // Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> |
33 | // Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin@gmail.com> |
34 | // Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com> |
35 | // Copyright (C) 2012 Gerald Schmidt <solahcin@gmail.com> |
36 | // Copyright (C) 2012 Pino Toscano <pino@kde.org> |
37 | // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de> |
38 | // Copyright (C) 2013 Julien Nabet <serval2412@yahoo.fr> |
39 | // Copyright (C) 2013 Johannes Brandstätter <jbrandstaetter@gmail.com> |
40 | // Copyright (C) 2014 Fabio D'Urso <fabiodurso@hotmail.it> |
41 | // Copyright (C) 2016 Vincent Le Garrec <legarrec.vincent@gmail.com> |
42 | // Copyright (C) 2017 Caolán McNamara <caolanm@redhat.com> |
43 | // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
44 | // Copyright (C) 2018 Thibaut Brard <thibaut.brard@gmail.com> |
45 | // Copyright (C) 2018-2020 Adam Reichold <adam.reichold@t-online.de> |
46 | // Copyright (C) 2019, 2020, 2022, 2024 Oliver Sander <oliver.sander@tu-dresden.de> |
47 | // Copyright (C) 2020 Eddie Kohler <ekohler@gmail.com> |
48 | // Copyright (C) 2021 Christopher Hasse <hasse.christopher@gmail.com> |
49 | // Copyright (C) 2022 Brian Rosenfield <brosenfi@yahoo.com> |
50 | // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
51 | // |
52 | // To see a description of the changes please see the Changelog file that |
53 | // came with your tarball or type make ChangeLog if you are building from git |
54 | // |
55 | //======================================================================== |
56 | |
57 | #include "config.h" |
58 | #include <cstdio> |
59 | #include <cstdlib> |
60 | #include <cstdarg> |
61 | #include <cstddef> |
62 | #include <cctype> |
63 | #include <cmath> |
64 | #include <iostream> |
65 | #include "goo/GooString.h" |
66 | #include "goo/gbasename.h" |
67 | #include "goo/gbase64.h" |
68 | #include "goo/gbasename.h" |
69 | #include "UnicodeMap.h" |
70 | #include "goo/gmem.h" |
71 | #include "Error.h" |
72 | #include "GfxState.h" |
73 | #include "Page.h" |
74 | #include "Annot.h" |
75 | #include "PNGWriter.h" |
76 | #include "GlobalParams.h" |
77 | #include "HtmlOutputDev.h" |
78 | #include "HtmlFonts.h" |
79 | #include "HtmlUtils.h" |
80 | #include "InMemoryFile.h" |
81 | #include "Outline.h" |
82 | #include "PDFDoc.h" |
83 | |
84 | #define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: " |
85 | |
86 | class HtmlImage |
87 | { |
88 | public: |
89 | HtmlImage(std::unique_ptr<GooString> &&_fName, GfxState *state) : fName(std::move(_fName)) |
90 | { |
91 | state->transform(x1: 0, y1: 0, x2: &xMin, y2: &yMax); |
92 | state->transform(x1: 1, y1: 1, x2: &xMax, y2: &yMin); |
93 | } |
94 | ~HtmlImage() = default; |
95 | HtmlImage(const HtmlImage &) = delete; |
96 | HtmlImage &operator=(const HtmlImage &) = delete; |
97 | |
98 | double xMin, xMax; // image x coordinates |
99 | double yMin, yMax; // image y coordinates |
100 | std::unique_ptr<GooString> fName; // image file name |
101 | }; |
102 | |
103 | // returns true if x is closer to y than x is to z |
104 | static inline bool IS_CLOSER(double x, double y, double z) |
105 | { |
106 | return std::fabs(x: (x) - (y)) < std::fabs(x: (x) - (z)); |
107 | } |
108 | |
109 | extern bool complexMode; |
110 | extern bool singleHtml; |
111 | extern bool dataUrls; |
112 | extern bool ignore; |
113 | extern bool printCommands; |
114 | extern bool printHtml; |
115 | extern bool noframes; |
116 | extern bool stout; |
117 | extern bool xml; |
118 | extern bool noRoundedCoordinates; |
119 | extern bool showHidden; |
120 | extern bool noMerge; |
121 | |
122 | extern double wordBreakThreshold; |
123 | |
124 | static bool debug = false; |
125 | |
126 | #if 0 |
127 | static GooString* Dirname(GooString* str){ |
128 | |
129 | char *p=str->c_str(); |
130 | int len=str->getLength(); |
131 | for (int i=len-1;i>=0;i--) |
132 | if (*(p+i)==SLASH) |
133 | return new GooString(p,i+1); |
134 | return new GooString(); |
135 | } |
136 | #endif |
137 | |
138 | static std::unique_ptr<GooString> print_matrix(const double *mat) |
139 | { |
140 | return GooString::format(fmt: "[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]" , *mat, mat[1], mat[2], mat[3], mat[4], mat[5]); |
141 | } |
142 | |
143 | static std::unique_ptr<GooString> print_uni_str(const Unicode *u, const unsigned uLen) |
144 | { |
145 | if (!uLen) { |
146 | return std::make_unique<GooString>(args: "" ); |
147 | } |
148 | std::unique_ptr<GooString> gstr_buff0 = GooString::format(fmt: "{0:c}" , (*u < 0x7F ? *u & 0xFF : '?')); |
149 | for (unsigned i = 1; i < uLen; i++) { |
150 | if (u[i] < 0x7F) { |
151 | gstr_buff0->append(c: static_cast<char>(u[i]) & 0xFF); |
152 | } |
153 | } |
154 | |
155 | return gstr_buff0; |
156 | } |
157 | |
158 | //------------------------------------------------------------------------ |
159 | // HtmlString |
160 | //------------------------------------------------------------------------ |
161 | |
162 | HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu *_fonts) : fonts(_fonts) |
163 | { |
164 | double x, y; |
165 | |
166 | state->transform(x1: state->getCurX(), y1: state->getCurY(), x2: &x, y2: &y); |
167 | if (std::shared_ptr<const GfxFont> font = state->getFont()) { |
168 | double ascent = font->getAscent(); |
169 | double descent = font->getDescent(); |
170 | if (ascent > 1.05) { |
171 | // printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent ); |
172 | ascent = 1.05; |
173 | } |
174 | if (descent < -0.4) { |
175 | // printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent ); |
176 | descent = -0.4; |
177 | } |
178 | yMin = y - ascent * fontSize; |
179 | yMax = y - descent * fontSize; |
180 | GfxRGB rgb; |
181 | state->getFillRGB(rgb: &rgb); |
182 | HtmlFont hfont = HtmlFont(*font, std::lround(x: fontSize), rgb, state->getFillOpacity()); |
183 | if (isMatRotOrSkew(mat: state->getTextMat())) { |
184 | double normalizedMatrix[4]; |
185 | memcpy(dest: normalizedMatrix, src: state->getTextMat(), n: sizeof(normalizedMatrix)); |
186 | // browser rotates the opposite way |
187 | // so flip the sign of the angle -> sin() components change sign |
188 | if (debug) { |
189 | std::cerr << DEBUG << "before transform: " << print_matrix(mat: normalizedMatrix)->c_str() << std::endl; |
190 | } |
191 | normalizedMatrix[1] *= -1; |
192 | normalizedMatrix[2] *= -1; |
193 | if (debug) { |
194 | std::cerr << DEBUG << "after reflecting angle: " << print_matrix(mat: normalizedMatrix)->c_str() << std::endl; |
195 | } |
196 | normalizeRotMat(mat: normalizedMatrix); |
197 | if (debug) { |
198 | std::cerr << DEBUG << "after norm: " << print_matrix(mat: normalizedMatrix)->c_str() << std::endl; |
199 | } |
200 | hfont.setRotMat(normalizedMatrix); |
201 | } |
202 | fontpos = fonts->AddFont(font: hfont); |
203 | } else { |
204 | // this means that the PDF file draws text without a current font, |
205 | // which should never happen |
206 | yMin = y - 0.95 * fontSize; |
207 | yMax = y + 0.35 * fontSize; |
208 | fontpos = 0; |
209 | } |
210 | if (yMin == yMax) { |
211 | // this is a sanity check for a case that shouldn't happen -- but |
212 | // if it does happen, we want to avoid dividing by zero later |
213 | yMin = y; |
214 | yMax = y + 1; |
215 | } |
216 | col = 0; |
217 | text = nullptr; |
218 | xRight = nullptr; |
219 | link = nullptr; |
220 | len = size = 0; |
221 | yxNext = nullptr; |
222 | xyNext = nullptr; |
223 | htext = std::make_unique<GooString>(); |
224 | dir = textDirUnknown; |
225 | } |
226 | |
227 | HtmlString::~HtmlString() |
228 | { |
229 | gfree(p: text); |
230 | gfree(p: xRight); |
231 | } |
232 | |
233 | void HtmlString::addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u) |
234 | { |
235 | if (dir == textDirUnknown) { |
236 | // dir = UnicodeMap::getDirection(u); |
237 | dir = textDirLeftRight; |
238 | } |
239 | |
240 | if (len == size) { |
241 | size += 16; |
242 | text = (Unicode *)grealloc(p: text, size: size * sizeof(Unicode)); |
243 | xRight = (double *)grealloc(p: xRight, size: size * sizeof(double)); |
244 | } |
245 | text[len] = u; |
246 | if (len == 0) { |
247 | xMin = x; |
248 | } |
249 | xMax = xRight[len] = x + dx; |
250 | // printf("added char: %f %f xright = %f\n", x, dx, x+dx); |
251 | ++len; |
252 | } |
253 | |
254 | void HtmlString::endString() |
255 | { |
256 | if (dir == textDirRightLeft && len > 1) { |
257 | // printf("will reverse!\n"); |
258 | for (int i = 0; i < len / 2; i++) { |
259 | Unicode ch = text[i]; |
260 | text[i] = text[len - i - 1]; |
261 | text[len - i - 1] = ch; |
262 | } |
263 | } |
264 | } |
265 | |
266 | //------------------------------------------------------------------------ |
267 | // HtmlPage |
268 | //------------------------------------------------------------------------ |
269 | |
270 | HtmlPage::HtmlPage(bool rawOrderA) |
271 | { |
272 | rawOrder = rawOrderA; |
273 | curStr = nullptr; |
274 | yxStrings = nullptr; |
275 | xyStrings = nullptr; |
276 | yxCur1 = yxCur2 = nullptr; |
277 | fonts = new HtmlFontAccu(); |
278 | links = new HtmlLinks(); |
279 | pageWidth = 0; |
280 | pageHeight = 0; |
281 | fontsPageMarker = 0; |
282 | DocName = nullptr; |
283 | firstPage = -1; |
284 | } |
285 | |
286 | HtmlPage::~HtmlPage() |
287 | { |
288 | clear(); |
289 | delete DocName; |
290 | delete fonts; |
291 | delete links; |
292 | for (auto entry : imgList) { |
293 | delete entry; |
294 | } |
295 | } |
296 | |
297 | void HtmlPage::updateFont(GfxState *state) |
298 | { |
299 | const char *name; |
300 | int code; |
301 | double dimLength; |
302 | |
303 | // adjust the font size |
304 | fontSize = state->getTransformedFontSize(); |
305 | const GfxFont *const font = state->getFont().get(); |
306 | if (font && font->getType() == fontType3) { |
307 | // Grab the font size from the font bounding box if possible - remember to |
308 | // scale from the glyph coordinate system. |
309 | const double *fontBBox = font->getFontBBox(); |
310 | const double *fontMat = font->getFontMatrix(); |
311 | dimLength = (fontBBox[3] - fontBBox[1]) * fontMat[3]; |
312 | if (dimLength > 0) { |
313 | fontSize *= dimLength; |
314 | } else { |
315 | // This is a hack which makes it possible to deal with some Type 3 |
316 | // fonts. The problem is that it's impossible to know what the |
317 | // base coordinate system used in the font is without actually |
318 | // rendering the font. This code tries to guess by looking at the |
319 | // width of the character 'm' (which breaks if the font is a |
320 | // subset that doesn't contain 'm'). |
321 | for (code = 0; code < 256; ++code) { |
322 | if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') { |
323 | break; |
324 | } |
325 | } |
326 | if (code < 256) { |
327 | dimLength = ((Gfx8BitFont *)font)->getWidth(c: code); |
328 | if (dimLength != 0) { |
329 | // 600 is a generic average 'm' width -- yes, this is a hack |
330 | fontSize *= dimLength / 0.6; |
331 | } |
332 | } |
333 | if (fontMat[0] != 0) { |
334 | fontSize *= fabs(x: fontMat[3] / fontMat[0]); |
335 | } |
336 | } |
337 | } |
338 | } |
339 | |
340 | void HtmlPage::beginString(GfxState *state, const GooString *s) |
341 | { |
342 | curStr = new HtmlString(state, fontSize, fonts); |
343 | } |
344 | |
345 | void HtmlPage::conv() |
346 | { |
347 | for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) { |
348 | tmp->htext = HtmlFont::HtmlFilter(u: tmp->text, uLen: tmp->len); |
349 | |
350 | size_t linkIndex = 0; |
351 | if (links->inLink(xmin: tmp->xMin, ymin: tmp->yMin, xmax: tmp->xMax, ymax: tmp->yMax, p&: linkIndex)) { |
352 | tmp->link = links->getLink(i: linkIndex); |
353 | } |
354 | } |
355 | } |
356 | |
357 | void HtmlPage::addChar(GfxState *state, double x, double y, double dx, double dy, double ox, double oy, const Unicode *u, int uLen) |
358 | { |
359 | double x1, y1, w1, h1, dx2, dy2; |
360 | int n, i; |
361 | state->transform(x1: x, y1: y, x2: &x1, y2: &y1); |
362 | n = curStr->len; |
363 | |
364 | // check that new character is in the same direction as current string |
365 | // and is not too far away from it before adding |
366 | // if ((UnicodeMap::getDirection(u[0]) != curStr->dir) || |
367 | // XXX |
368 | if (debug) { |
369 | const double *text_mat = state->getTextMat(); |
370 | // rotation is (cos q, sin q, -sin q, cos q, 0, 0) |
371 | // sin q is zero iff there is no rotation, or 180 deg. rotation; |
372 | // for 180 rotation, cos q will be negative |
373 | if (text_mat[0] < 0 || !is_within(a: text_mat[1], thresh: .1, b: 0)) { |
374 | std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen)->c_str() << '"' << std::endl; |
375 | std::cerr << "text " << print_matrix(mat: state->getTextMat())->c_str(); |
376 | } |
377 | } |
378 | if (n > 0 && // don't start a new string, unless there is already a string |
379 | // TODO: the following line assumes that text is flowing left to |
380 | // right, which will not necessarily be the case, e.g. if rotated; |
381 | // It assesses whether or not two characters are close enough to |
382 | // be part of the same string |
383 | fabs(x: x1 - curStr->xRight[n - 1]) > wordBreakThreshold * (curStr->yMax - curStr->yMin) && |
384 | // rotation is (cos q, sin q, -sin q, cos q, 0, 0) |
385 | // sin q is zero iff there is no rotation, or 180 deg. rotation; |
386 | // for 180 rotation, cos q will be negative |
387 | !rot_matrices_equal(mat0: curStr->getFont().getRotMat(), mat1: state->getTextMat())) { |
388 | endString(); |
389 | beginString(state, s: nullptr); |
390 | } |
391 | state->textTransformDelta(x1: state->getCharSpace() * state->getHorizScaling(), y1: 0, x2: &dx2, y2: &dy2); |
392 | dx -= dx2; |
393 | dy -= dy2; |
394 | state->transformDelta(x1: dx, y1: dy, x2: &w1, y2: &h1); |
395 | if (uLen != 0) { |
396 | w1 /= uLen; |
397 | h1 /= uLen; |
398 | } |
399 | for (i = 0; i < uLen; ++i) { |
400 | curStr->addChar(state, x: x1 + i * w1, y: y1 + i * h1, dx: w1, dy: h1, u: u[i]); |
401 | } |
402 | } |
403 | |
404 | void HtmlPage::endString() |
405 | { |
406 | HtmlString *p1, *p2; |
407 | double h, y1, y2; |
408 | |
409 | // throw away zero-length strings -- they don't have valid xMin/xMax |
410 | // values, and they're useless anyway |
411 | if (curStr->len == 0) { |
412 | delete curStr; |
413 | curStr = nullptr; |
414 | return; |
415 | } |
416 | |
417 | curStr->endString(); |
418 | |
419 | #if 0 //~tmp |
420 | if (curStr->yMax - curStr->yMin > 20) { |
421 | delete curStr; |
422 | curStr = NULL; |
423 | return; |
424 | } |
425 | #endif |
426 | |
427 | // insert string in y-major list |
428 | h = curStr->yMax - curStr->yMin; |
429 | y1 = curStr->yMin + 0.5 * h; |
430 | y2 = curStr->yMin + 0.8 * h; |
431 | if (rawOrder) { |
432 | p1 = yxCur1; |
433 | p2 = nullptr; |
434 | } else if ((!yxCur1 || (y1 >= yxCur1->yMin && (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) && (!yxCur2 || (y1 < yxCur2->yMin || (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) { |
435 | p1 = yxCur1; |
436 | p2 = yxCur2; |
437 | } else { |
438 | for (p1 = nullptr, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) { |
439 | if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin)) { |
440 | break; |
441 | } |
442 | } |
443 | yxCur2 = p2; |
444 | } |
445 | yxCur1 = curStr; |
446 | if (p1) { |
447 | p1->yxNext = curStr; |
448 | } else { |
449 | yxStrings = curStr; |
450 | } |
451 | curStr->yxNext = p2; |
452 | curStr = nullptr; |
453 | } |
454 | |
455 | static const char *strrstr(const char *s, const char *ss) |
456 | { |
457 | const char *p = strstr(haystack: s, needle: ss); |
458 | for (const char *pp = p; pp != nullptr; pp = strstr(haystack: p + 1, needle: ss)) { |
459 | p = pp; |
460 | } |
461 | return p; |
462 | } |
463 | |
464 | static void CloseTags(GooString *htext, bool &finish_a, bool &finish_italic, bool &finish_bold) |
465 | { |
466 | const char *last_italic = finish_italic && (finish_bold || finish_a) ? strrstr(s: htext->c_str(), ss: "<i>" ) : nullptr; |
467 | const char *last_bold = finish_bold && (finish_italic || finish_a) ? strrstr(s: htext->c_str(), ss: "<b>" ) : nullptr; |
468 | const char *last_a = finish_a && (finish_italic || finish_bold) ? strrstr(s: htext->c_str(), ss: "<a " ) : nullptr; |
469 | if (finish_a && (finish_italic || finish_bold) && last_a > (last_italic > last_bold ? last_italic : last_bold)) { |
470 | htext->append(str: "</a>" , lengthA: 4); |
471 | finish_a = false; |
472 | } |
473 | if (finish_italic && finish_bold && last_italic > last_bold) { |
474 | htext->append(str: "</i>" , lengthA: 4); |
475 | finish_italic = false; |
476 | } |
477 | if (finish_bold) { |
478 | htext->append(str: "</b>" , lengthA: 4); |
479 | } |
480 | if (finish_italic) { |
481 | htext->append(str: "</i>" , lengthA: 4); |
482 | } |
483 | if (finish_a) { |
484 | htext->append(str: "</a>" ); |
485 | } |
486 | } |
487 | |
488 | // Strings are lines of text; |
489 | // This function aims to combine strings into lines and paragraphs if !noMerge |
490 | // It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect |
491 | void HtmlPage::coalesce() |
492 | { |
493 | HtmlString *str1, *str2; |
494 | double space, horSpace, vertSpace, vertOverlap; |
495 | bool addSpace, addLineBreak; |
496 | int n, i; |
497 | double curX, curY; |
498 | |
499 | #if 0 //~ for debugging |
500 | for (str1 = yxStrings; str1; str1 = str1->yxNext) { |
501 | printf("x=%f..%f y=%f..%f size=%2d '" , |
502 | str1->xMin, str1->xMax, str1->yMin, str1->yMax, |
503 | (int)(str1->yMax - str1->yMin)); |
504 | for (i = 0; i < str1->len; ++i) { |
505 | fputc(str1->text[i] & 0xff, stdout); |
506 | } |
507 | printf("'\n" ); |
508 | } |
509 | printf("\n------------------------------------------------------------\n\n" ); |
510 | #endif |
511 | str1 = yxStrings; |
512 | |
513 | if (!str1) { |
514 | return; |
515 | } |
516 | |
517 | //----- discard duplicated text (fake boldface, drop shadows) |
518 | if (!complexMode) { /* if not in complex mode get rid of duplicate strings */ |
519 | HtmlString *str3; |
520 | bool found; |
521 | while (str1) { |
522 | double size = str1->yMax - str1->yMin; |
523 | double xLimit = str1->xMin + size; |
524 | found = false; |
525 | for (str2 = str1, str3 = str1->yxNext; str3 && str3->xMin < xLimit; str2 = str3, str3 = str2->yxNext) { |
526 | if (str3->len == str1->len && !memcmp(s1: str3->text, s2: str1->text, n: str1->len * sizeof(Unicode)) && fabs(x: str3->yMin - str1->yMin) < size * 0.2 && fabs(x: str3->yMax - str1->yMax) < size * 0.2 |
527 | && fabs(x: str3->xMax - str1->xMax) < size * 0.1) { |
528 | found = true; |
529 | // printf("found duplicate!\n"); |
530 | break; |
531 | } |
532 | } |
533 | if (found) { |
534 | str2->xyNext = str3->xyNext; |
535 | str2->yxNext = str3->yxNext; |
536 | delete str3; |
537 | } else { |
538 | str1 = str1->yxNext; |
539 | } |
540 | } |
541 | } /*- !complexMode */ |
542 | |
543 | str1 = yxStrings; |
544 | |
545 | const HtmlFont *hfont1 = getFont(hStr: str1); |
546 | if (hfont1->isBold()) { |
547 | str1->htext->insert(i: 0, str: "<b>" , lengthA: 3); |
548 | } |
549 | if (hfont1->isItalic()) { |
550 | str1->htext->insert(i: 0, str: "<i>" , lengthA: 3); |
551 | } |
552 | if (str1->getLink() != nullptr) { |
553 | GooString *ls = str1->getLink()->getLinkStart(); |
554 | str1->htext->insert(i: 0, str: ls); |
555 | delete ls; |
556 | } |
557 | curX = str1->xMin; |
558 | curY = str1->yMin; |
559 | |
560 | while (str1 && (str2 = str1->yxNext)) { |
561 | const HtmlFont *hfont2 = getFont(hStr: str2); |
562 | space = str1->yMax - str1->yMin; // the height of the font's bounding box |
563 | horSpace = str2->xMin - str1->xMax; |
564 | // if strings line up on left-hand side AND they are on subsequent lines, we need a line break |
565 | addLineBreak = !noMerge && (fabs(x: str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(x: str2->yMax, y: str1->yMax + space, z: str1->yMax); |
566 | vertSpace = str2->yMin - str1->yMax; |
567 | |
568 | // printf("coalesce %d %d %f? ", str1->dir, str2->dir, d); |
569 | |
570 | if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) { |
571 | vertOverlap = str1->yMax - str2->yMin; |
572 | } else if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax) { |
573 | vertOverlap = str2->yMax - str1->yMin; |
574 | } else { |
575 | vertOverlap = 0; |
576 | } |
577 | |
578 | // Combine strings if: |
579 | // They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following: |
580 | // 1. They appear to be part of the same line of text |
581 | // 2. They appear to be subsequent lines of a paragraph |
582 | // We assume (1) or (2) above, respectively, based on: |
583 | // (1) strings overlap vertically AND |
584 | // horizontal space between end of str1 and start of str2 is consistent with a single space or less; |
585 | // when rawOrder, the strings have to overlap vertically by at least 50% |
586 | // (2) Strings flow down the page, but the space between them is not too great, and they are lined up on the left |
587 | if (((((rawOrder && vertOverlap > 0.5 * space) || (!rawOrder && str2->yMin < str1->yMax)) && (horSpace > -0.5 * space && horSpace < space)) || (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak)) |
588 | && (!complexMode || (hfont1->isEqualIgnoreBold(x: *hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter |
589 | str1->dir == str2->dir // text direction the same |
590 | ) { |
591 | // printf("yes\n"); |
592 | n = str1->len + str2->len; |
593 | if ((addSpace = horSpace > wordBreakThreshold * space)) { |
594 | ++n; |
595 | } |
596 | if (addLineBreak) { |
597 | ++n; |
598 | } |
599 | |
600 | str1->size = (n + 15) & ~15; |
601 | str1->text = (Unicode *)grealloc(p: str1->text, size: str1->size * sizeof(Unicode)); |
602 | str1->xRight = (double *)grealloc(p: str1->xRight, size: str1->size * sizeof(double)); |
603 | if (addSpace) { |
604 | str1->text[str1->len] = 0x20; |
605 | str1->htext->append(str: xml ? " " : " " ); |
606 | str1->xRight[str1->len] = str2->xMin; |
607 | ++str1->len; |
608 | } |
609 | if (addLineBreak) { |
610 | str1->text[str1->len] = '\n'; |
611 | str1->htext->append(str: "<br/>" ); |
612 | str1->xRight[str1->len] = str2->xMin; |
613 | ++str1->len; |
614 | str1->yMin = str2->yMin; |
615 | str1->yMax = str2->yMax; |
616 | str1->xMax = str2->xMax; |
617 | int fontLineSize = hfont1->getLineSize(); |
618 | int curLineSize = (int)(vertSpace + space); |
619 | if (curLineSize != fontLineSize) { |
620 | HtmlFont *newfnt = new HtmlFont(*hfont1); |
621 | newfnt->setLineSize(curLineSize); |
622 | str1->fontpos = fonts->AddFont(font: *newfnt); |
623 | delete newfnt; |
624 | hfont1 = getFont(hStr: str1); |
625 | // we have to reget hfont2 because it's location could have |
626 | // changed on resize |
627 | hfont2 = getFont(hStr: str2); |
628 | } |
629 | } |
630 | for (i = 0; i < str2->len; ++i) { |
631 | str1->text[str1->len] = str2->text[i]; |
632 | str1->xRight[str1->len] = str2->xRight[i]; |
633 | ++str1->len; |
634 | } |
635 | |
636 | /* fix <i>, <b> if str1 and str2 differ and handle switch of links */ |
637 | const HtmlLink *hlink1 = str1->getLink(); |
638 | const HtmlLink *hlink2 = str2->getLink(); |
639 | bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(x: *hlink2); |
640 | bool finish_a = switch_links && hlink1 != nullptr; |
641 | bool finish_italic = hfont1->isItalic() && (!hfont2->isItalic() || finish_a); |
642 | bool finish_bold = hfont1->isBold() && (!hfont2->isBold() || finish_a || finish_italic); |
643 | CloseTags(htext: str1->htext.get(), finish_a, finish_italic, finish_bold); |
644 | if (switch_links && hlink2 != nullptr) { |
645 | GooString *ls = hlink2->getLinkStart(); |
646 | str1->htext->append(str: ls); |
647 | delete ls; |
648 | } |
649 | if ((!hfont1->isItalic() || finish_italic) && hfont2->isItalic()) { |
650 | str1->htext->append(str: "<i>" , lengthA: 3); |
651 | } |
652 | if ((!hfont1->isBold() || finish_bold) && hfont2->isBold()) { |
653 | str1->htext->append(str: "<b>" , lengthA: 3); |
654 | } |
655 | |
656 | str1->htext->append(str: str2->htext.get()); |
657 | // str1 now contains href for link of str2 (if it is defined) |
658 | str1->link = str2->link; |
659 | hfont1 = hfont2; |
660 | if (str2->xMax > str1->xMax) { |
661 | str1->xMax = str2->xMax; |
662 | } |
663 | if (str2->yMax > str1->yMax) { |
664 | str1->yMax = str2->yMax; |
665 | } |
666 | str1->yxNext = str2->yxNext; |
667 | delete str2; |
668 | } else { // keep strings separate |
669 | // printf("no\n"); |
670 | bool finish_a = str1->getLink() != nullptr; |
671 | bool finish_bold = hfont1->isBold(); |
672 | bool finish_italic = hfont1->isItalic(); |
673 | CloseTags(htext: str1->htext.get(), finish_a, finish_italic, finish_bold); |
674 | |
675 | str1->xMin = curX; |
676 | str1->yMin = curY; |
677 | str1 = str2; |
678 | curX = str1->xMin; |
679 | curY = str1->yMin; |
680 | hfont1 = hfont2; |
681 | if (hfont1->isBold()) { |
682 | str1->htext->insert(i: 0, str: "<b>" , lengthA: 3); |
683 | } |
684 | if (hfont1->isItalic()) { |
685 | str1->htext->insert(i: 0, str: "<i>" , lengthA: 3); |
686 | } |
687 | if (str1->getLink() != nullptr) { |
688 | GooString *ls = str1->getLink()->getLinkStart(); |
689 | str1->htext->insert(i: 0, str: ls); |
690 | delete ls; |
691 | } |
692 | } |
693 | } |
694 | str1->xMin = curX; |
695 | str1->yMin = curY; |
696 | |
697 | bool finish_bold = hfont1->isBold(); |
698 | bool finish_italic = hfont1->isItalic(); |
699 | bool finish_a = str1->getLink() != nullptr; |
700 | CloseTags(htext: str1->htext.get(), finish_a, finish_italic, finish_bold); |
701 | |
702 | #if 0 //~ for debugging |
703 | for (str1 = yxStrings; str1; str1 = str1->yxNext) { |
704 | printf("x=%3d..%3d y=%3d..%3d size=%2d " , |
705 | (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax, |
706 | (int)(str1->yMax - str1->yMin)); |
707 | printf("'%s'\n" , str1->htext->c_str()); |
708 | } |
709 | printf("\n------------------------------------------------------------\n\n" ); |
710 | #endif |
711 | } |
712 | |
713 | void HtmlPage::dumpAsXML(FILE *f, int page) |
714 | { |
715 | fprintf(stream: f, format: "<page number=\"%d\" position=\"absolute\"" , page); |
716 | fprintf(stream: f, format: " top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n" , pageHeight, pageWidth); |
717 | |
718 | for (int i = fontsPageMarker; i < fonts->size(); i++) { |
719 | GooString *fontCSStyle = fonts->CSStyle(i); |
720 | fprintf(stream: f, format: "\t%s\n" , fontCSStyle->c_str()); |
721 | delete fontCSStyle; |
722 | } |
723 | |
724 | for (auto ptr : imgList) { |
725 | auto img = static_cast<HtmlImage *>(ptr); |
726 | if (!noRoundedCoordinates) { |
727 | fprintf(stream: f, format: "<image top=\"%d\" left=\"%d\" " , xoutRound(img->yMin), xoutRound(img->xMin)); |
728 | fprintf(stream: f, format: "width=\"%d\" height=\"%d\" " , xoutRound(img->xMax - img->xMin), xoutRound(img->yMax - img->yMin)); |
729 | } else { |
730 | fprintf(stream: f, format: "<image top=\"%f\" left=\"%f\" " , img->yMin, img->xMin); |
731 | fprintf(stream: f, format: "width=\"%f\" height=\"%f\" " , img->xMax - img->xMin, img->yMax - img->yMin); |
732 | } |
733 | fprintf(stream: f, format: "src=\"%s\"/>\n" , img->fName->c_str()); |
734 | delete img; |
735 | } |
736 | imgList.clear(); |
737 | |
738 | for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) { |
739 | if (tmp->htext) { |
740 | if (!noRoundedCoordinates) { |
741 | fprintf(stream: f, format: "<text top=\"%d\" left=\"%d\" " , xoutRound(tmp->yMin), xoutRound(tmp->xMin)); |
742 | fprintf(stream: f, format: "width=\"%d\" height=\"%d\" " , xoutRound(tmp->xMax - tmp->xMin), xoutRound(tmp->yMax - tmp->yMin)); |
743 | } else { |
744 | fprintf(stream: f, format: "<text top=\"%f\" left=\"%f\" " , tmp->yMin, tmp->xMin); |
745 | fprintf(stream: f, format: "width=\"%f\" height=\"%f\" " , tmp->xMax - tmp->xMin, tmp->yMax - tmp->yMin); |
746 | } |
747 | fprintf(stream: f, format: "font=\"%d\">" , tmp->fontpos); |
748 | fputs(s: tmp->htext->c_str(), stream: f); |
749 | fputs(s: "</text>\n" , stream: f); |
750 | } |
751 | } |
752 | fputs(s: "</page>\n" , stream: f); |
753 | } |
754 | |
755 | static void printCSS(FILE *f) |
756 | { |
757 | // Image flip/flop CSS |
758 | // Source: |
759 | // http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css |
760 | // tested in Chrome, Fx (Linux) and IE9 (W7) |
761 | static const char css[] = "<style type=\"text/css\">" |
762 | "\n" |
763 | "<!--" |
764 | "\n" |
765 | ".xflip {" |
766 | "\n" |
767 | " -moz-transform: scaleX(-1);" |
768 | "\n" |
769 | " -webkit-transform: scaleX(-1);" |
770 | "\n" |
771 | " -o-transform: scaleX(-1);" |
772 | "\n" |
773 | " transform: scaleX(-1);" |
774 | "\n" |
775 | " filter: fliph;" |
776 | "\n" |
777 | "}" |
778 | "\n" |
779 | ".yflip {" |
780 | "\n" |
781 | " -moz-transform: scaleY(-1);" |
782 | "\n" |
783 | " -webkit-transform: scaleY(-1);" |
784 | "\n" |
785 | " -o-transform: scaleY(-1);" |
786 | "\n" |
787 | " transform: scaleY(-1);" |
788 | "\n" |
789 | " filter: flipv;" |
790 | "\n" |
791 | "}" |
792 | "\n" |
793 | ".xyflip {" |
794 | "\n" |
795 | " -moz-transform: scaleX(-1) scaleY(-1);" |
796 | "\n" |
797 | " -webkit-transform: scaleX(-1) scaleY(-1);" |
798 | "\n" |
799 | " -o-transform: scaleX(-1) scaleY(-1);" |
800 | "\n" |
801 | " transform: scaleX(-1) scaleY(-1);" |
802 | "\n" |
803 | " filter: fliph + flipv;" |
804 | "\n" |
805 | "}" |
806 | "\n" |
807 | "-->" |
808 | "\n" |
809 | "</style>" |
810 | "\n" ; |
811 | |
812 | fwrite(ptr: css, size: sizeof(css) - 1, n: 1, s: f); |
813 | } |
814 | |
815 | int HtmlPage::(FILE *const file, FILE *&pageFile, int page) |
816 | { |
817 | |
818 | if (!noframes) { |
819 | const std::string pgNum = std::to_string(val: page); |
820 | std::string pageFileName(DocName->toStr()); |
821 | if (!singleHtml) { |
822 | pageFileName += '-' + pgNum + ".html" ; |
823 | pageFile = fopen(filename: pageFileName.c_str(), modes: "w" ); |
824 | } else { |
825 | pageFileName += "-html.html" ; |
826 | pageFile = fopen(filename: pageFileName.c_str(), modes: "a" ); |
827 | } |
828 | |
829 | if (!pageFile) { |
830 | error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:s}'" , pageFileName.c_str()); |
831 | return 1; |
832 | } |
833 | |
834 | if (!singleHtml) { |
835 | fprintf(stream: pageFile, format: "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>Page %d</title>\n\n" , DOCTYPE, page); |
836 | } else { |
837 | fprintf(stream: pageFile, format: "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n\n" , DOCTYPE, pageFileName.c_str()); |
838 | } |
839 | |
840 | const std::string htmlEncoding = HtmlOutputDev::mapEncodingToHtml(encoding: globalParams->getTextEncodingName()); |
841 | if (!singleHtml) { |
842 | fprintf(stream: pageFile, format: "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n" , htmlEncoding.c_str()); |
843 | } else { |
844 | fprintf(stream: pageFile, format: "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n" , htmlEncoding.c_str()); |
845 | } |
846 | } else { |
847 | pageFile = file; |
848 | fprintf(stream: pageFile, format: "<!-- Page %d -->\n" , page); |
849 | fprintf(stream: pageFile, format: "<a name=\"%d\"></a>\n" , page); |
850 | } |
851 | |
852 | return 0; |
853 | } |
854 | |
855 | void HtmlPage::dumpComplex(FILE *file, int page, const std::vector<std::string> &backgroundImages) |
856 | { |
857 | FILE *pageFile; |
858 | |
859 | if (firstPage == -1) { |
860 | firstPage = page; |
861 | } |
862 | |
863 | if (dumpComplexHeaders(file, pageFile, page)) { |
864 | error(category: errIO, pos: -1, msg: "Couldn't write headers." ); |
865 | return; |
866 | } |
867 | |
868 | fputs(s: "<style type=\"text/css\">\n<!--\n" , stream: pageFile); |
869 | fputs(s: "\tp {margin: 0; padding: 0;}" , stream: pageFile); |
870 | for (int i = fontsPageMarker; i != fonts->size(); i++) { |
871 | GooString *fontCSStyle; |
872 | if (!singleHtml) { |
873 | fontCSStyle = fonts->CSStyle(i); |
874 | } else { |
875 | fontCSStyle = fonts->CSStyle(i, j: page); |
876 | } |
877 | fprintf(stream: pageFile, format: "\t%s\n" , fontCSStyle->c_str()); |
878 | delete fontCSStyle; |
879 | } |
880 | |
881 | fputs(s: "-->\n</style>\n" , stream: pageFile); |
882 | |
883 | if (!noframes) { |
884 | fputs(s: "</head>\n<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n" , stream: pageFile); |
885 | } |
886 | |
887 | fprintf(stream: pageFile, format: "<div id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n" , page, pageWidth, pageHeight); |
888 | |
889 | if (!ignore && (size_t)(page - firstPage) < backgroundImages.size()) { |
890 | fprintf(stream: pageFile, format: "<img width=\"%d\" height=\"%d\" src=\"%s\" alt=\"background image\"/>\n" , pageWidth, pageHeight, backgroundImages[page - firstPage].c_str()); |
891 | } |
892 | |
893 | for (HtmlString *tmp1 = yxStrings; tmp1; tmp1 = tmp1->yxNext) { |
894 | if (tmp1->htext) { |
895 | fprintf(stream: pageFile, format: "<p style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft" , xoutRound(tmp1->yMin), xoutRound(tmp1->xMin)); |
896 | if (!singleHtml) { |
897 | fputc(c: '0', stream: pageFile); |
898 | } else { |
899 | fprintf(stream: pageFile, format: "%d" , page); |
900 | } |
901 | fprintf(stream: pageFile, format: "%d\">" , tmp1->fontpos); |
902 | fputs(s: tmp1->htext->c_str(), stream: pageFile); |
903 | fputs(s: "</p>\n" , stream: pageFile); |
904 | } |
905 | } |
906 | |
907 | fputs(s: "</div>\n" , stream: pageFile); |
908 | |
909 | if (!noframes) { |
910 | fputs(s: "</body>\n</html>\n" , stream: pageFile); |
911 | fclose(stream: pageFile); |
912 | } |
913 | } |
914 | |
915 | void HtmlPage::dump(FILE *f, int pageNum, const std::vector<std::string> &backgroundImages) |
916 | { |
917 | if (complexMode || singleHtml) { |
918 | if (xml) { |
919 | dumpAsXML(f, page: pageNum); |
920 | } |
921 | if (!xml) { |
922 | dumpComplex(file: f, page: pageNum, backgroundImages); |
923 | } |
924 | } else { |
925 | fprintf(stream: f, format: "<a name=%d></a>" , pageNum); |
926 | // Loop over the list of image names on this page |
927 | for (auto ptr : imgList) { |
928 | auto img = static_cast<HtmlImage *>(ptr); |
929 | |
930 | // see printCSS() for class names |
931 | const char *styles[4] = { "" , " class=\"xflip\"" , " class=\"yflip\"" , " class=\"xyflip\"" }; |
932 | int style_index = 0; |
933 | if (img->xMin > img->xMax) { |
934 | style_index += 1; // xFlip |
935 | } |
936 | if (img->yMin > img->yMax) { |
937 | style_index += 2; // yFlip |
938 | } |
939 | |
940 | fprintf(stream: f, format: "<img%s src=\"%s\"/><br/>\n" , styles[style_index], img->fName->c_str()); |
941 | delete img; |
942 | } |
943 | imgList.clear(); |
944 | |
945 | for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) { |
946 | if (tmp->htext) { |
947 | fputs(s: tmp->htext->c_str(), stream: f); |
948 | fputs(s: "<br/>\n" , stream: f); |
949 | } |
950 | } |
951 | fputs(s: "<hr/>\n" , stream: f); |
952 | } |
953 | } |
954 | |
955 | void HtmlPage::clear() |
956 | { |
957 | HtmlString *p1, *p2; |
958 | |
959 | if (curStr) { |
960 | delete curStr; |
961 | curStr = nullptr; |
962 | } |
963 | for (p1 = yxStrings; p1; p1 = p2) { |
964 | p2 = p1->yxNext; |
965 | delete p1; |
966 | } |
967 | yxStrings = nullptr; |
968 | xyStrings = nullptr; |
969 | yxCur1 = yxCur2 = nullptr; |
970 | |
971 | if (!noframes) { |
972 | delete fonts; |
973 | fonts = new HtmlFontAccu(); |
974 | fontsPageMarker = 0; |
975 | } else { |
976 | fontsPageMarker = fonts->size(); |
977 | } |
978 | |
979 | delete links; |
980 | links = new HtmlLinks(); |
981 | } |
982 | |
983 | void HtmlPage::setDocName(const char *fname) |
984 | { |
985 | DocName = new GooString(fname); |
986 | } |
987 | |
988 | void HtmlPage::addImage(std::unique_ptr<GooString> &&fname, GfxState *state) |
989 | { |
990 | HtmlImage *img = new HtmlImage(std::move(fname), state); |
991 | imgList.push_back(x: img); |
992 | } |
993 | |
994 | //------------------------------------------------------------------------ |
995 | // HtmlMetaVar |
996 | //------------------------------------------------------------------------ |
997 | |
998 | HtmlMetaVar::HtmlMetaVar(const char *_name, const char *_content) |
999 | { |
1000 | name = new GooString(_name); |
1001 | content = new GooString(_content); |
1002 | } |
1003 | |
1004 | HtmlMetaVar::~HtmlMetaVar() |
1005 | { |
1006 | delete name; |
1007 | delete content; |
1008 | } |
1009 | |
1010 | GooString *HtmlMetaVar::toString() const |
1011 | { |
1012 | GooString *result = new GooString("<meta name=\"" ); |
1013 | result->append(str: name); |
1014 | result->append(str: "\" content=\"" ); |
1015 | result->append(str: content); |
1016 | result->append(str: "\"/>" ); |
1017 | return result; |
1018 | } |
1019 | |
1020 | //------------------------------------------------------------------------ |
1021 | // HtmlOutputDev |
1022 | //------------------------------------------------------------------------ |
1023 | |
1024 | static const char *HtmlEncodings[][2] = { { "Latin1" , "ISO-8859-1" }, { nullptr, nullptr } }; |
1025 | |
1026 | std::string HtmlOutputDev::mapEncodingToHtml(const std::string &encoding) |
1027 | { |
1028 | for (int i = 0; HtmlEncodings[i][0] != nullptr; i++) { |
1029 | if (encoding == HtmlEncodings[i][0]) { |
1030 | return HtmlEncodings[i][1]; |
1031 | } |
1032 | } |
1033 | return encoding; |
1034 | } |
1035 | |
1036 | void HtmlOutputDev::doFrame(int firstPage) |
1037 | { |
1038 | GooString *fName = new GooString(Docname); |
1039 | fName->append(str: ".html" ); |
1040 | |
1041 | if (!(fContentsFrame = fopen(filename: fName->c_str(), modes: "w" ))) { |
1042 | error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:t}'" , fName); |
1043 | delete fName; |
1044 | return; |
1045 | } |
1046 | |
1047 | delete fName; |
1048 | |
1049 | const std::string baseName = gbasename(filename: Docname->c_str()); |
1050 | fputs(DOCTYPE, stream: fContentsFrame); |
1051 | fputs(s: "\n<html>" , stream: fContentsFrame); |
1052 | fputs(s: "\n<head>" , stream: fContentsFrame); |
1053 | fprintf(stream: fContentsFrame, format: "\n<title>%s</title>" , docTitle->c_str()); |
1054 | const std::string htmlEncoding = mapEncodingToHtml(encoding: globalParams->getTextEncodingName()); |
1055 | fprintf(stream: fContentsFrame, format: "\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n" , htmlEncoding.c_str()); |
1056 | dumpMetaVars(fContentsFrame); |
1057 | fprintf(stream: fContentsFrame, format: "</head>\n" ); |
1058 | fputs(s: "<frameset cols=\"100,*\">\n" , stream: fContentsFrame); |
1059 | fprintf(stream: fContentsFrame, format: "<frame name=\"links\" src=\"%s_ind.html\"/>\n" , baseName.c_str()); |
1060 | fputs(s: "<frame name=\"contents\" src=" , stream: fContentsFrame); |
1061 | if (complexMode) { |
1062 | fprintf(stream: fContentsFrame, format: "\"%s-%d.html\"" , baseName.c_str(), firstPage); |
1063 | } else { |
1064 | fprintf(stream: fContentsFrame, format: "\"%ss.html\"" , baseName.c_str()); |
1065 | } |
1066 | |
1067 | fputs(s: "/>\n</frameset>\n</html>\n" , stream: fContentsFrame); |
1068 | |
1069 | fclose(stream: fContentsFrame); |
1070 | } |
1071 | |
1072 | HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, const char *fileName, const char *title, const char *author, const char *keywords, const char *subject, const char *date, bool rawOrderA, int firstPage, bool outline) |
1073 | { |
1074 | catalog = catalogA; |
1075 | fContentsFrame = nullptr; |
1076 | page = nullptr; |
1077 | docTitle = new GooString(title); |
1078 | pages = nullptr; |
1079 | dumpJPEG = true; |
1080 | // write = true; |
1081 | rawOrder = rawOrderA; |
1082 | this->doOutline = outline; |
1083 | ok = false; |
1084 | // this->firstPage = firstPage; |
1085 | // pageNum=firstPage; |
1086 | // open file |
1087 | needClose = false; |
1088 | pages = new HtmlPage(rawOrder); |
1089 | |
1090 | glMetaVars.push_back(x: new HtmlMetaVar("generator" , "pdftohtml 0.36" )); |
1091 | if (author) { |
1092 | glMetaVars.push_back(x: new HtmlMetaVar("author" , author)); |
1093 | } |
1094 | if (keywords) { |
1095 | glMetaVars.push_back(x: new HtmlMetaVar("keywords" , keywords)); |
1096 | } |
1097 | if (date) { |
1098 | glMetaVars.push_back(x: new HtmlMetaVar("date" , date)); |
1099 | } |
1100 | if (subject) { |
1101 | glMetaVars.push_back(x: new HtmlMetaVar("subject" , subject)); |
1102 | } |
1103 | |
1104 | maxPageWidth = 0; |
1105 | maxPageHeight = 0; |
1106 | |
1107 | pages->setDocName(fileName); |
1108 | Docname = new GooString(fileName); |
1109 | |
1110 | // for non-xml output (complex or simple) with frames generate the left frame |
1111 | if (!xml && !noframes) { |
1112 | if (!singleHtml) { |
1113 | GooString *left = new GooString(fileName); |
1114 | left->append(str: "_ind.html" ); |
1115 | |
1116 | doFrame(firstPage); |
1117 | |
1118 | if (!(fContentsFrame = fopen(filename: left->c_str(), modes: "w" ))) { |
1119 | error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:t}'" , left); |
1120 | delete left; |
1121 | return; |
1122 | } |
1123 | delete left; |
1124 | fputs(DOCTYPE, stream: fContentsFrame); |
1125 | fputs(s: "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title></title>\n</head>\n<body>\n" , stream: fContentsFrame); |
1126 | |
1127 | if (doOutline) { |
1128 | fprintf(stream: fContentsFrame, format: "<a href=\"%s%s\" target=\"contents\">Outline</a><br/>" , gbasename(filename: Docname->c_str()).c_str(), complexMode ? "-outline.html" : "s.html#outline" ); |
1129 | } |
1130 | } |
1131 | if (!complexMode) { /* not in complex mode */ |
1132 | |
1133 | GooString *right = new GooString(fileName); |
1134 | right->append(str: "s.html" ); |
1135 | |
1136 | if (!(page = fopen(filename: right->c_str(), modes: "w" ))) { |
1137 | error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:t}'" , right); |
1138 | delete right; |
1139 | return; |
1140 | } |
1141 | delete right; |
1142 | fputs(DOCTYPE, stream: page); |
1143 | fputs(s: "<html>\n<head>\n<title></title>\n" , stream: page); |
1144 | printCSS(f: page); |
1145 | fputs(s: "</head>\n<body>\n" , stream: page); |
1146 | } |
1147 | } |
1148 | |
1149 | if (noframes) { |
1150 | if (stout) { |
1151 | page = stdout; |
1152 | } else { |
1153 | GooString *right = new GooString(fileName); |
1154 | if (!xml) { |
1155 | right->append(str: ".html" ); |
1156 | } |
1157 | if (xml) { |
1158 | right->append(str: ".xml" ); |
1159 | } |
1160 | if (!(page = fopen(filename: right->c_str(), modes: "w" ))) { |
1161 | error(category: errIO, pos: -1, msg: "Couldn't open html file '{0:t}'" , right); |
1162 | delete right; |
1163 | return; |
1164 | } |
1165 | delete right; |
1166 | } |
1167 | |
1168 | const std::string htmlEncoding = mapEncodingToHtml(encoding: globalParams->getTextEncodingName()); |
1169 | if (xml) { |
1170 | fprintf(stream: page, format: "<?xml version=\"1.0\" encoding=\"%s\"?>\n" , htmlEncoding.c_str()); |
1171 | fputs(s: "<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n" , stream: page); |
1172 | fprintf(stream: page, format: "<pdf2xml producer=\"%s\" version=\"%s\">\n" , PACKAGE_NAME, PACKAGE_VERSION); |
1173 | } else { |
1174 | fprintf(stream: page, format: "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n" , DOCTYPE, docTitle->c_str()); |
1175 | |
1176 | fprintf(stream: page, format: "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n" , htmlEncoding.c_str()); |
1177 | |
1178 | dumpMetaVars(page); |
1179 | printCSS(f: page); |
1180 | fprintf(stream: page, format: "</head>\n" ); |
1181 | fprintf(stream: page, format: "<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n" ); |
1182 | } |
1183 | } |
1184 | ok = true; |
1185 | } |
1186 | |
1187 | HtmlOutputDev::~HtmlOutputDev() |
1188 | { |
1189 | delete Docname; |
1190 | delete docTitle; |
1191 | |
1192 | for (auto entry : glMetaVars) { |
1193 | delete entry; |
1194 | } |
1195 | |
1196 | if (fContentsFrame) { |
1197 | fputs(s: "</body>\n</html>\n" , stream: fContentsFrame); |
1198 | fclose(stream: fContentsFrame); |
1199 | } |
1200 | if (page != nullptr) { |
1201 | if (xml) { |
1202 | fputs(s: "</pdf2xml>\n" , stream: page); |
1203 | fclose(stream: page); |
1204 | } else if (!complexMode || xml || noframes) { |
1205 | fputs(s: "</body>\n</html>\n" , stream: page); |
1206 | fclose(stream: page); |
1207 | } |
1208 | } |
1209 | if (pages) { |
1210 | delete pages; |
1211 | } |
1212 | } |
1213 | |
1214 | void HtmlOutputDev::startPage(int pageNumA, GfxState *state, XRef *xref) |
1215 | { |
1216 | #if 0 |
1217 | if (mode&&!xml){ |
1218 | if (write){ |
1219 | write=false; |
1220 | GooString* fname=Dirname(Docname); |
1221 | fname->append("image.log" ); |
1222 | if((tin=fopen(getFileNameFromPath(fname->c_str(),fname->getLength()),"w" ))==NULL){ |
1223 | printf("Error : can not open %s" ,fname); |
1224 | exit(1); |
1225 | } |
1226 | delete fname; |
1227 | // if(state->getRotation()!=0) |
1228 | // fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1()); |
1229 | // else |
1230 | fprintf(tin,"ROTATE=%d neg %d neg translate\n" ,state->getX1(),state->getY1()); |
1231 | } |
1232 | } |
1233 | #endif |
1234 | |
1235 | pageNum = pageNumA; |
1236 | const std::string str = gbasename(filename: Docname->c_str()); |
1237 | pages->clear(); |
1238 | if (!noframes) { |
1239 | if (fContentsFrame) { |
1240 | if (complexMode) { |
1241 | fprintf(stream: fContentsFrame, format: "<a href=\"%s-%d.html\"" , str.c_str(), pageNum); |
1242 | } else { |
1243 | fprintf(stream: fContentsFrame, format: "<a href=\"%ss.html#%d\"" , str.c_str(), pageNum); |
1244 | } |
1245 | fprintf(stream: fContentsFrame, format: " target=\"contents\" >Page %d</a><br/>\n" , pageNum); |
1246 | } |
1247 | } |
1248 | |
1249 | pages->pageWidth = static_cast<int>(state->getPageWidth()); |
1250 | pages->pageHeight = static_cast<int>(state->getPageHeight()); |
1251 | } |
1252 | |
1253 | void HtmlOutputDev::endPage() |
1254 | { |
1255 | std::unique_ptr<Links> linksList = docPage->getLinks(); |
1256 | for (AnnotLink *link : linksList->getLinks()) { |
1257 | doProcessLink(link); |
1258 | } |
1259 | |
1260 | pages->conv(); |
1261 | pages->coalesce(); |
1262 | pages->dump(f: page, pageNum, backgroundImages); |
1263 | |
1264 | // I don't yet know what to do in the case when there are pages of different |
1265 | // sizes and we want complex output: running ghostscript many times |
1266 | // seems very inefficient. So for now I'll just use last page's size |
1267 | maxPageWidth = pages->pageWidth; |
1268 | maxPageHeight = pages->pageHeight; |
1269 | |
1270 | // if(!noframes&&!xml) fputs("<br/>\n", fContentsFrame); |
1271 | if (!stout && !globalParams->getErrQuiet()) { |
1272 | printf(format: "Page-%d\n" , (pageNum)); |
1273 | } |
1274 | } |
1275 | |
1276 | void HtmlOutputDev::addBackgroundImage(const std::string &img) |
1277 | { |
1278 | backgroundImages.push_back(x: img); |
1279 | } |
1280 | |
1281 | void HtmlOutputDev::updateFont(GfxState *state) |
1282 | { |
1283 | pages->updateFont(state); |
1284 | } |
1285 | |
1286 | void HtmlOutputDev::beginString(GfxState *state, const GooString *s) |
1287 | { |
1288 | pages->beginString(state, s); |
1289 | } |
1290 | |
1291 | void HtmlOutputDev::endString(GfxState *state) |
1292 | { |
1293 | pages->endString(); |
1294 | } |
1295 | |
1296 | void HtmlOutputDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int /*nBytes*/, const Unicode *u, int uLen) |
1297 | { |
1298 | if (!showHidden && (state->getRender() & 3) == 3) { |
1299 | return; |
1300 | } |
1301 | pages->addChar(state, x, y, dx, dy, ox: originX, oy: originY, u, uLen); |
1302 | } |
1303 | |
1304 | void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str) |
1305 | { |
1306 | InMemoryFile ims; |
1307 | FILE *f1 = nullptr; |
1308 | int c; |
1309 | |
1310 | // open the image file |
1311 | std::unique_ptr<GooString> fName = createImageFileName(ext: "jpg" ); |
1312 | f1 = dataUrls ? ims.open(mode: "wb" ) : fopen(filename: fName->c_str(), modes: "wb" ); |
1313 | if (!f1) { |
1314 | error(category: errIO, pos: -1, msg: "Couldn't open image file '{0:t}'" , fName.get()); |
1315 | return; |
1316 | } |
1317 | |
1318 | // initialize stream |
1319 | str = str->getNextStream(); |
1320 | str->reset(); |
1321 | |
1322 | // copy the stream |
1323 | while ((c = str->getChar()) != EOF) { |
1324 | fputc(c: c, stream: f1); |
1325 | } |
1326 | |
1327 | fclose(stream: f1); |
1328 | |
1329 | if (dataUrls) { |
1330 | fName = std::make_unique<GooString>(args: std::string("data:image/jpeg;base64," ) + gbase64Encode(input: ims.getBuffer())); |
1331 | } |
1332 | pages->addImage(fname: std::move(fName), state); |
1333 | } |
1334 | |
1335 | void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool isMask) |
1336 | { |
1337 | #ifdef ENABLE_LIBPNG |
1338 | FILE *f1; |
1339 | InMemoryFile ims; |
1340 | |
1341 | if (!colorMap && !isMask) { |
1342 | error(category: errInternal, pos: -1, msg: "Can't have color image without a color map" ); |
1343 | return; |
1344 | } |
1345 | |
1346 | // open the image file |
1347 | std::unique_ptr<GooString> fName = createImageFileName(ext: "png" ); |
1348 | f1 = dataUrls ? ims.open(mode: "wb" ) : fopen(filename: fName->c_str(), modes: "wb" ); |
1349 | if (!f1) { |
1350 | error(category: errIO, pos: -1, msg: "Couldn't open image file '{0:t}'" , fName.get()); |
1351 | return; |
1352 | } |
1353 | |
1354 | PNGWriter *writer = new PNGWriter(isMask ? PNGWriter::MONOCHROME : PNGWriter::RGB); |
1355 | // TODO can we calculate the resolution of the image? |
1356 | if (!writer->init(f: f1, width, height, hDPI: 72, vDPI: 72)) { |
1357 | error(category: errInternal, pos: -1, msg: "Can't init PNG for image '{0:t}'" , fName.get()); |
1358 | delete writer; |
1359 | fclose(stream: f1); |
1360 | return; |
1361 | } |
1362 | |
1363 | if (!isMask) { |
1364 | unsigned char *p; |
1365 | GfxRGB rgb; |
1366 | unsigned char *row = (unsigned char *)gmalloc(size: 3 * width); // 3 bytes/pixel: RGB |
1367 | unsigned char **row_pointer = &row; |
1368 | |
1369 | // Initialize the image stream |
1370 | ImageStream *imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(), colorMap->getBits()); |
1371 | imgStr->reset(); |
1372 | |
1373 | // For each line... |
1374 | for (int y = 0; y < height; y++) { |
1375 | |
1376 | // Convert into a PNG row |
1377 | p = imgStr->getLine(); |
1378 | if (!p) { |
1379 | error(category: errIO, pos: -1, msg: "Failed to read PNG. '{0:t}' will be incorrect" , fName.get()); |
1380 | gfree(p: row); |
1381 | delete writer; |
1382 | delete imgStr; |
1383 | fclose(stream: f1); |
1384 | return; |
1385 | } |
1386 | for (int x = 0; x < width; x++) { |
1387 | colorMap->getRGB(x: p, rgb: &rgb); |
1388 | // Write the RGB pixels into the row |
1389 | row[3 * x] = colToByte(x: rgb.r); |
1390 | row[3 * x + 1] = colToByte(x: rgb.g); |
1391 | row[3 * x + 2] = colToByte(x: rgb.b); |
1392 | p += colorMap->getNumPixelComps(); |
1393 | } |
1394 | |
1395 | if (!writer->writeRow(row: row_pointer)) { |
1396 | error(category: errIO, pos: -1, msg: "Failed to write into PNG '{0:t}'" , fName.get()); |
1397 | delete writer; |
1398 | delete imgStr; |
1399 | fclose(stream: f1); |
1400 | return; |
1401 | } |
1402 | } |
1403 | gfree(p: row); |
1404 | imgStr->close(); |
1405 | delete imgStr; |
1406 | } else { // isMask == true |
1407 | int size = (width + 7) / 8; |
1408 | |
1409 | // PDF masks use 0 = draw current color, 1 = leave unchanged. |
1410 | // We invert this to provide the standard interpretation of alpha |
1411 | // (0 = transparent, 1 = opaque). If the colorMap already inverts |
1412 | // the mask we leave the data unchanged. |
1413 | int invert_bits = 0xff; |
1414 | if (colorMap) { |
1415 | GfxGray gray; |
1416 | unsigned char zero[gfxColorMaxComps]; |
1417 | memset(s: zero, c: 0, n: sizeof(zero)); |
1418 | colorMap->getGray(x: zero, gray: &gray); |
1419 | if (colToByte(x: gray) == 0) { |
1420 | invert_bits = 0x00; |
1421 | } |
1422 | } |
1423 | |
1424 | str->reset(); |
1425 | unsigned char *png_row = (unsigned char *)gmalloc(size); |
1426 | |
1427 | for (int ri = 0; ri < height; ++ri) { |
1428 | for (int i = 0; i < size; i++) { |
1429 | png_row[i] = str->getChar() ^ invert_bits; |
1430 | } |
1431 | |
1432 | if (!writer->writeRow(row: &png_row)) { |
1433 | error(category: errIO, pos: -1, msg: "Failed to write into PNG '{0:t}'" , fName.get()); |
1434 | delete writer; |
1435 | fclose(stream: f1); |
1436 | gfree(p: png_row); |
1437 | return; |
1438 | } |
1439 | } |
1440 | str->close(); |
1441 | gfree(p: png_row); |
1442 | } |
1443 | |
1444 | str->close(); |
1445 | |
1446 | writer->close(); |
1447 | delete writer; |
1448 | fclose(stream: f1); |
1449 | |
1450 | if (dataUrls) { |
1451 | fName = std::make_unique<GooString>(args: std::string("data:image/png;base64," ) + gbase64Encode(input: ims.getBuffer())); |
1452 | } |
1453 | pages->addImage(fname: std::move(fName), state); |
1454 | #else |
1455 | return; |
1456 | #endif |
1457 | } |
1458 | |
1459 | std::unique_ptr<GooString> HtmlOutputDev::createImageFileName(const char *ext) |
1460 | { |
1461 | return GooString::format(fmt: "{0:s}-{1:d}_{2:d}.{3:s}" , Docname->c_str(), pageNum, pages->getNumImages() + 1, ext); |
1462 | } |
1463 | |
1464 | void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg) |
1465 | { |
1466 | |
1467 | if (ignore || (complexMode && !xml)) { |
1468 | OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg); |
1469 | return; |
1470 | } |
1471 | |
1472 | // dump JPEG file |
1473 | if (dumpJPEG && str->getKind() == strDCT) { |
1474 | drawJpegImage(state, str); |
1475 | } else { |
1476 | #ifdef ENABLE_LIBPNG |
1477 | drawPngImage(state, str, width, height, colorMap: nullptr, isMask: true); |
1478 | #else |
1479 | OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg); |
1480 | #endif |
1481 | } |
1482 | } |
1483 | |
1484 | void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, const int *maskColors, bool inlineImg) |
1485 | { |
1486 | |
1487 | if (ignore || (complexMode && !xml)) { |
1488 | OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg); |
1489 | return; |
1490 | } |
1491 | |
1492 | /*if( !globalParams->getErrQuiet() ) |
1493 | printf("image stream of kind %d\n", str->getKind());*/ |
1494 | // dump JPEG file |
1495 | if (dumpJPEG && str->getKind() == strDCT && (colorMap->getNumPixelComps() == 1 || colorMap->getNumPixelComps() == 3) && !inlineImg) { |
1496 | drawJpegImage(state, str); |
1497 | } else { |
1498 | #ifdef ENABLE_LIBPNG |
1499 | drawPngImage(state, str, width, height, colorMap); |
1500 | #else |
1501 | OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg); |
1502 | #endif |
1503 | } |
1504 | } |
1505 | |
1506 | void HtmlOutputDev::doProcessLink(AnnotLink *link) |
1507 | { |
1508 | double _x1, _y1, _x2, _y2; |
1509 | int x1, y1, x2, y2; |
1510 | |
1511 | link->getRect(x1: &_x1, y1: &_y1, x2: &_x2, y2: &_y2); |
1512 | cvtUserToDev(ux: _x1, uy: _y1, dx: &x1, dy: &y1); |
1513 | |
1514 | cvtUserToDev(ux: _x2, uy: _y2, dx: &x2, dy: &y2); |
1515 | |
1516 | GooString *_dest = getLinkDest(link); |
1517 | HtmlLink t((double)x1, (double)y2, (double)x2, (double)y1, _dest); |
1518 | pages->AddLink(x: t); |
1519 | delete _dest; |
1520 | } |
1521 | |
1522 | GooString *HtmlOutputDev::getLinkDest(AnnotLink *link) |
1523 | { |
1524 | if (!link->getAction()) { |
1525 | return new GooString(); |
1526 | } |
1527 | switch (link->getAction()->getKind()) { |
1528 | case actionGoTo: { |
1529 | int destPage = 1; |
1530 | LinkGoTo *ha = (LinkGoTo *)link->getAction(); |
1531 | std::unique_ptr<LinkDest> dest; |
1532 | if (ha->getDest() != nullptr) { |
1533 | dest = std::make_unique<LinkDest>(args: *ha->getDest()); |
1534 | } else if (ha->getNamedDest() != nullptr) { |
1535 | dest = catalog->findDest(name: ha->getNamedDest()); |
1536 | } |
1537 | |
1538 | if (dest) { |
1539 | GooString *file = new GooString(gbasename(filename: Docname->c_str())); |
1540 | |
1541 | if (dest->isPageRef()) { |
1542 | const Ref = dest->getPageRef(); |
1543 | destPage = catalog->findPage(pageRef: pageref); |
1544 | } else { |
1545 | destPage = dest->getPageNum(); |
1546 | } |
1547 | |
1548 | /* complex simple |
1549 | frames file-4.html files.html#4 |
1550 | noframes file.html#4 file.html#4 |
1551 | */ |
1552 | if (noframes) { |
1553 | file->append(str: ".html#" ); |
1554 | file->append(str: std::to_string(val: destPage)); |
1555 | } else { |
1556 | if (complexMode) { |
1557 | file->append(str: "-" ); |
1558 | file->append(str: std::to_string(val: destPage)); |
1559 | file->append(str: ".html" ); |
1560 | } else { |
1561 | file->append(str: "s.html#" ); |
1562 | file->append(str: std::to_string(val: destPage)); |
1563 | } |
1564 | } |
1565 | |
1566 | if (printCommands) { |
1567 | printf(format: " link to page %d " , destPage); |
1568 | } |
1569 | return file; |
1570 | } else { |
1571 | return new GooString(); |
1572 | } |
1573 | } |
1574 | case actionGoToR: { |
1575 | LinkGoToR *ha = (LinkGoToR *)link->getAction(); |
1576 | LinkDest *dest = nullptr; |
1577 | int destPage = 1; |
1578 | GooString *file = new GooString(); |
1579 | if (ha->getFileName()) { |
1580 | delete file; |
1581 | file = new GooString(ha->getFileName()->c_str()); |
1582 | } |
1583 | if (ha->getDest() != nullptr) { |
1584 | dest = new LinkDest(*ha->getDest()); |
1585 | } |
1586 | if (dest && file) { |
1587 | if (!(dest->isPageRef())) { |
1588 | destPage = dest->getPageNum(); |
1589 | } |
1590 | delete dest; |
1591 | |
1592 | if (printCommands) { |
1593 | printf(format: " link to page %d " , destPage); |
1594 | } |
1595 | if (printHtml) { |
1596 | const char *p = file->c_str() + file->getLength() - 4; |
1597 | if (!strcmp(s1: p, s2: ".pdf" ) || !strcmp(s1: p, s2: ".PDF" )) { |
1598 | file->del(i: file->getLength() - 4, n: 4); |
1599 | file->append(str: ".html" ); |
1600 | } |
1601 | file->append(c: '#'); |
1602 | file->append(str: std::to_string(val: destPage)); |
1603 | } |
1604 | } |
1605 | if (printCommands && file) { |
1606 | printf(format: "filename %s\n" , file->c_str()); |
1607 | } |
1608 | return file; |
1609 | } |
1610 | case actionURI: { |
1611 | LinkURI *ha = (LinkURI *)link->getAction(); |
1612 | GooString *file = new GooString(ha->getURI()); |
1613 | // printf("uri : %s\n",file->c_str()); |
1614 | return file; |
1615 | } |
1616 | case actionLaunch: |
1617 | if (printHtml) { |
1618 | LinkLaunch *ha = (LinkLaunch *)link->getAction(); |
1619 | GooString *file = new GooString(ha->getFileName()->c_str()); |
1620 | const char *p = file->c_str() + file->getLength() - 4; |
1621 | if (!strcmp(s1: p, s2: ".pdf" ) || !strcmp(s1: p, s2: ".PDF" )) { |
1622 | file->del(i: file->getLength() - 4, n: 4); |
1623 | file->append(str: ".html" ); |
1624 | } |
1625 | if (printCommands) { |
1626 | printf(format: "filename %s" , file->c_str()); |
1627 | } |
1628 | |
1629 | return file; |
1630 | } |
1631 | // fallthrough |
1632 | default: |
1633 | return new GooString(); |
1634 | } |
1635 | } |
1636 | |
1637 | void HtmlOutputDev::dumpMetaVars(FILE *file) |
1638 | { |
1639 | GooString *var; |
1640 | |
1641 | for (const HtmlMetaVar *t : glMetaVars) { |
1642 | var = t->toString(); |
1643 | fprintf(stream: file, format: "%s\n" , var->c_str()); |
1644 | delete var; |
1645 | } |
1646 | } |
1647 | |
1648 | bool HtmlOutputDev::dumpDocOutline(PDFDoc *doc) |
1649 | { |
1650 | FILE *output = nullptr; |
1651 | bool bClose = false; |
1652 | |
1653 | if (!ok) { |
1654 | return false; |
1655 | } |
1656 | |
1657 | Outline *outline = doc->getOutline(); |
1658 | if (!outline) { |
1659 | return false; |
1660 | } |
1661 | |
1662 | const std::vector<OutlineItem *> *outlines = outline->getItems(); |
1663 | if (!outlines) { |
1664 | return false; |
1665 | } |
1666 | |
1667 | if (!complexMode || xml) { |
1668 | output = page; |
1669 | } else if (complexMode && !xml) { |
1670 | if (noframes) { |
1671 | output = page; |
1672 | fputs(s: "<hr/>\n" , stream: output); |
1673 | } else { |
1674 | GooString *str = Docname->copy(); |
1675 | str->append(str: "-outline.html" ); |
1676 | output = fopen(filename: str->c_str(), modes: "w" ); |
1677 | delete str; |
1678 | if (output == nullptr) { |
1679 | return false; |
1680 | } |
1681 | bClose = true; |
1682 | |
1683 | const std::string htmlEncoding = HtmlOutputDev::mapEncodingToHtml(encoding: globalParams->getTextEncodingName()); |
1684 | |
1685 | fprintf(stream: output, |
1686 | format: "<html xmlns=\"http://www.w3.org/1999/xhtml\" " |
1687 | "lang=\"\" xml:lang=\"\">\n" |
1688 | "<head>\n" |
1689 | "<title>Document Outline</title>\n" |
1690 | "<meta http-equiv=\"Content-Type\" content=\"text/html; " |
1691 | "charset=%s\"/>\n" |
1692 | "</head>\n<body>\n" , |
1693 | htmlEncoding.c_str()); |
1694 | } |
1695 | } |
1696 | |
1697 | if (!xml) { |
1698 | bool done = newHtmlOutlineLevel(output, outlines); |
1699 | if (done && !complexMode) { |
1700 | fputs(s: "<hr/>\n" , stream: output); |
1701 | } |
1702 | |
1703 | if (bClose) { |
1704 | fputs(s: "</body>\n</html>\n" , stream: output); |
1705 | fclose(stream: output); |
1706 | } |
1707 | } else { |
1708 | newXmlOutlineLevel(output, outlines); |
1709 | } |
1710 | |
1711 | return true; |
1712 | } |
1713 | |
1714 | bool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines, int level) |
1715 | { |
1716 | bool atLeastOne = false; |
1717 | |
1718 | if (level == 1) { |
1719 | fputs(s: "<a name=\"outline\"></a>" , stream: output); |
1720 | fputs(s: "<h1>Document Outline</h1>\n" , stream: output); |
1721 | } |
1722 | fputs(s: "<ul>\n" , stream: output); |
1723 | |
1724 | for (OutlineItem *item : *outlines) { |
1725 | const auto &title = item->getTitle(); |
1726 | std::unique_ptr<GooString> titleStr = HtmlFont::HtmlFilter(u: title.data(), uLen: title.size()); |
1727 | |
1728 | GooString *linkName = nullptr; |
1729 | |
1730 | const int itemPage = getOutlinePageNum(item); |
1731 | if (itemPage > 0) { |
1732 | /* complex simple |
1733 | frames file-4.html files.html#4 |
1734 | noframes file.html#4 file.html#4 |
1735 | */ |
1736 | linkName = new GooString(gbasename(filename: Docname->c_str())); |
1737 | if (noframes) { |
1738 | linkName->append(str: ".html#" ); |
1739 | linkName->append(str: std::to_string(val: itemPage)); |
1740 | } else { |
1741 | if (complexMode) { |
1742 | linkName->append(str: "-" ); |
1743 | linkName->append(str: std::to_string(val: itemPage)); |
1744 | linkName->append(str: ".html" ); |
1745 | } else { |
1746 | linkName->append(str: "s.html#" ); |
1747 | linkName->append(str: std::to_string(val: itemPage)); |
1748 | } |
1749 | } |
1750 | } |
1751 | |
1752 | fputs(s: "<li>" , stream: output); |
1753 | if (linkName) { |
1754 | fprintf(stream: output, format: "<a href=\"%s\">" , linkName->c_str()); |
1755 | } |
1756 | if (titleStr) { |
1757 | fputs(s: titleStr->c_str(), stream: output); |
1758 | } |
1759 | if (linkName) { |
1760 | fputs(s: "</a>" , stream: output); |
1761 | delete linkName; |
1762 | } |
1763 | atLeastOne = true; |
1764 | |
1765 | item->open(); |
1766 | if (item->hasKids() && item->getKids()) { |
1767 | fputs(s: "\n" , stream: output); |
1768 | newHtmlOutlineLevel(output, outlines: item->getKids(), level: level + 1); |
1769 | } |
1770 | fputs(s: "</li>\n" , stream: output); |
1771 | } |
1772 | fputs(s: "</ul>\n" , stream: output); |
1773 | |
1774 | return atLeastOne; |
1775 | } |
1776 | |
1777 | void HtmlOutputDev::newXmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines) |
1778 | { |
1779 | fputs(s: "<outline>\n" , stream: output); |
1780 | |
1781 | for (OutlineItem *item : *outlines) { |
1782 | const std::vector<Unicode> &title = item->getTitle(); |
1783 | auto titleStr = HtmlFont::HtmlFilter(u: title.data(), uLen: title.size()); |
1784 | const int itemPage = getOutlinePageNum(item); |
1785 | if (itemPage > 0) { |
1786 | fprintf(stream: output, format: "<item page=\"%d\">%s</item>\n" , itemPage, titleStr->c_str()); |
1787 | } else { |
1788 | fprintf(stream: output, format: "<item>%s</item>\n" , titleStr->c_str()); |
1789 | } |
1790 | |
1791 | item->open(); |
1792 | if (item->hasKids() && item->getKids()) { |
1793 | newXmlOutlineLevel(output, outlines: item->getKids()); |
1794 | } |
1795 | } |
1796 | |
1797 | fputs(s: "</outline>\n" , stream: output); |
1798 | } |
1799 | |
1800 | int HtmlOutputDev::getOutlinePageNum(OutlineItem *item) |
1801 | { |
1802 | const LinkAction *action = item->getAction(); |
1803 | const LinkGoTo *link = nullptr; |
1804 | std::unique_ptr<LinkDest> linkdest; |
1805 | int pagenum = -1; |
1806 | |
1807 | if (!action || action->getKind() != actionGoTo) { |
1808 | return pagenum; |
1809 | } |
1810 | |
1811 | link = static_cast<const LinkGoTo *>(action); |
1812 | |
1813 | if (!link || !link->isOk()) { |
1814 | return pagenum; |
1815 | } |
1816 | |
1817 | if (link->getDest()) { |
1818 | linkdest = std::make_unique<LinkDest>(args: *link->getDest()); |
1819 | } else if (link->getNamedDest()) { |
1820 | linkdest = catalog->findDest(name: link->getNamedDest()); |
1821 | } |
1822 | |
1823 | if (!linkdest) { |
1824 | return pagenum; |
1825 | } |
1826 | |
1827 | if (linkdest->isPageRef()) { |
1828 | const Ref = linkdest->getPageRef(); |
1829 | pagenum = catalog->findPage(pageRef: pageref); |
1830 | } else { |
1831 | pagenum = linkdest->getPageNum(); |
1832 | } |
1833 | |
1834 | return pagenum; |
1835 | } |
1836 | |