1 | //======================================================================== |
2 | // |
3 | // HtmlOutputDev.h |
4 | // |
5 | // Copyright 1997 Derek B. Noonburg |
6 | // |
7 | // Changed 1999 by G.Ovtcharov |
8 | //======================================================================== |
9 | |
10 | //======================================================================== |
11 | // |
12 | // Modified under the Poppler project - http://poppler.freedesktop.org |
13 | // |
14 | // All changes made under the Poppler project to this file are licensed |
15 | // under GPL version 2 or later |
16 | // |
17 | // Copyright (C) 2006, 2007, 2009, 2012, 2018-2022 Albert Astals Cid <aacid@kde.org> |
18 | // Copyright (C) 2008, 2009 Warren Toomey <wkt@tuhs.org> |
19 | // Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org> |
20 | // Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net> |
21 | // Copyright (C) 2010 Hib Eris <hib@hiberis.nl> |
22 | // Copyright (C) 2011 Joshua Richardson <jric@chegg.com> |
23 | // Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> |
24 | // Copyright (C) 2012 Igor Slepchin <igor.redhat@gmail.com> |
25 | // Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it> |
26 | // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de> |
27 | // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
28 | // Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de> |
29 | // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
30 | // |
31 | // To see a description of the changes please see the Changelog file that |
32 | // came with your tarball or type make ChangeLog if you are building from git |
33 | // |
34 | //======================================================================== |
35 | |
36 | #ifndef HTMLOUTPUTDEV_H |
37 | #define HTMLOUTPUTDEV_H |
38 | |
39 | #include <cstdio> |
40 | #include "goo/gbasename.h" |
41 | #include "GfxFont.h" |
42 | #include "OutputDev.h" |
43 | #include "HtmlLinks.h" |
44 | #include "HtmlFonts.h" |
45 | #include "Link.h" |
46 | #include "Catalog.h" |
47 | #include "UnicodeMap.h" |
48 | |
49 | #define xoutRound(x) ((int)(x + 0.5)) |
50 | |
51 | #define DOCTYPE "<!DOCTYPE html>" |
52 | |
53 | class GfxState; |
54 | class GooString; |
55 | class HtmlImage; |
56 | class PDFDoc; |
57 | class OutlineItem; |
58 | //------------------------------------------------------------------------ |
59 | // HtmlString |
60 | //------------------------------------------------------------------------ |
61 | |
62 | enum UnicodeTextDirection |
63 | { |
64 | textDirUnknown, |
65 | textDirLeftRight, |
66 | textDirRightLeft, |
67 | textDirTopBottom |
68 | }; |
69 | |
70 | class HtmlString |
71 | { |
72 | public: |
73 | // Constructor. |
74 | HtmlString(GfxState *state, double fontSize, HtmlFontAccu *fonts); |
75 | |
76 | // Destructor. |
77 | ~HtmlString(); |
78 | |
79 | HtmlString(const HtmlString &) = delete; |
80 | HtmlString &operator=(const HtmlString &) = delete; |
81 | |
82 | // Add a character to the string. |
83 | void addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u); |
84 | const HtmlLink *getLink() const { return link; } |
85 | const HtmlFont &getFont() const { return *fonts->Get(i: fontpos); } |
86 | void endString(); // postprocessing |
87 | |
88 | private: |
89 | // aender die text variable |
90 | const HtmlLink *link; |
91 | double xMin, xMax; // bounding box x coordinates |
92 | double yMin, yMax; // bounding box y coordinates |
93 | int col; // starting column |
94 | Unicode *text; // the text |
95 | double *xRight; // right-hand x coord of each char |
96 | HtmlString *yxNext; // next string in y-major order |
97 | HtmlString *xyNext; // next string in x-major order |
98 | int fontpos; |
99 | std::unique_ptr<GooString> htext; |
100 | int len; // length of text and xRight |
101 | int size; // size of text and xRight arrays |
102 | UnicodeTextDirection dir; // direction (left to right/right to left) |
103 | HtmlFontAccu *fonts; |
104 | |
105 | friend class HtmlPage; |
106 | }; |
107 | |
108 | //------------------------------------------------------------------------ |
109 | // HtmlPage |
110 | //------------------------------------------------------------------------ |
111 | |
112 | class HtmlPage |
113 | { |
114 | public: |
115 | // Constructor. |
116 | explicit HtmlPage(bool rawOrder); |
117 | |
118 | // Destructor. |
119 | ~HtmlPage(); |
120 | |
121 | HtmlPage(const HtmlPage &) = delete; |
122 | HtmlPage &operator=(const HtmlPage &) = delete; |
123 | |
124 | // Begin a new string. |
125 | void beginString(GfxState *state, const GooString *s); |
126 | |
127 | // Add a character to the current string. |
128 | void addChar(GfxState *state, double x, double y, double dx, double dy, double ox, double oy, const Unicode *u, int uLen); // unsigned char c); |
129 | |
130 | void updateFont(GfxState *state); |
131 | |
132 | // End the current string, sorting it into the list of strings. |
133 | void endString(); |
134 | |
135 | // Coalesce strings that look like parts of the same line. |
136 | void coalesce(); |
137 | |
138 | // Find a string. If <top> is true, starts looking at top of page; |
139 | // otherwise starts looking at <xMin>,<yMin>. If <bottom> is true, |
140 | // stops looking at bottom of page; otherwise stops looking at |
141 | // <xMax>,<yMax>. If found, sets the text bounding rectangle and |
142 | // returns true; otherwise returns false. |
143 | |
144 | // new functions |
145 | void AddLink(const HtmlLink &x) { links->AddLink(x); } |
146 | |
147 | // add an image to the current page |
148 | void addImage(std::unique_ptr<GooString> &&fname, GfxState *state); |
149 | |
150 | // number of images on the current page |
151 | int getNumImages() { return imgList.size(); } |
152 | |
153 | void dump(FILE *f, int pageNum, const std::vector<std::string> &backgroundImages); |
154 | |
155 | // Clear the page. |
156 | void clear(); |
157 | |
158 | void conv(); |
159 | |
160 | private: |
161 | const HtmlFont *getFont(HtmlString *hStr) const { return fonts->Get(i: hStr->fontpos); } |
162 | |
163 | double fontSize; // current font size |
164 | bool rawOrder; // keep strings in content stream order |
165 | |
166 | HtmlString *curStr; // currently active string |
167 | |
168 | HtmlString *yxStrings; // strings in y-major order |
169 | HtmlString *xyStrings; // strings in x-major order |
170 | HtmlString *yxCur1, *yxCur2; // cursors for yxStrings list |
171 | |
172 | void setDocName(const char *fname); |
173 | void dumpAsXML(FILE *f, int page); |
174 | void dumpComplex(FILE *f, int page, const std::vector<std::string> &backgroundImages); |
175 | int (FILE *const file, FILE *&pageFile, int page); |
176 | |
177 | // marks the position of the fonts that belong to current page (for noframes) |
178 | int fontsPageMarker; |
179 | HtmlFontAccu *fonts; |
180 | HtmlLinks *links; |
181 | std::vector<HtmlImage *> imgList; |
182 | |
183 | GooString *DocName; |
184 | int pageWidth; |
185 | int pageHeight; |
186 | int firstPage; // used to begin the numeration of pages |
187 | |
188 | friend class HtmlOutputDev; |
189 | }; |
190 | |
191 | //------------------------------------------------------------------------ |
192 | // HtmlMetaVar |
193 | //------------------------------------------------------------------------ |
194 | class HtmlMetaVar |
195 | { |
196 | public: |
197 | HtmlMetaVar(const char *_name, const char *_content); |
198 | ~HtmlMetaVar(); |
199 | |
200 | HtmlMetaVar(const HtmlMetaVar &) = delete; |
201 | HtmlMetaVar &operator=(const HtmlMetaVar &) = delete; |
202 | |
203 | GooString *toString() const; |
204 | |
205 | private: |
206 | GooString *name; |
207 | GooString *content; |
208 | }; |
209 | |
210 | //------------------------------------------------------------------------ |
211 | // HtmlOutputDev |
212 | //------------------------------------------------------------------------ |
213 | |
214 | class HtmlOutputDev : public OutputDev |
215 | { |
216 | public: |
217 | // Open a text output file. If <fileName> is nullptr, no file is written |
218 | // (this is useful, e.g., for searching text). If <useASCII7> is true, |
219 | // text is converted to 7-bit ASCII; otherwise, text is converted to |
220 | // 8-bit ISO Latin-1. <useASCII7> should also be set for Japanese |
221 | // (EUC-JP) text. If <rawOrder> is true, the text is kept in content |
222 | // stream order. |
223 | HtmlOutputDev(Catalog *catalogA, const char *fileName, const char *title, const char *author, const char *keywords, const char *subject, const char *date, bool rawOrder, int firstPage = 1, bool outline = false); |
224 | |
225 | // Destructor. |
226 | ~HtmlOutputDev() override; |
227 | |
228 | // Check if file was successfully created. |
229 | virtual bool isOk() { return ok; } |
230 | |
231 | //---- get info about output device |
232 | |
233 | // Does this device use upside-down coordinates? |
234 | // (Upside-down means (0,0) is the top left corner of the page.) |
235 | bool upsideDown() override { return true; } |
236 | |
237 | // Does this device use drawChar() or drawString()? |
238 | bool useDrawChar() override { return true; } |
239 | |
240 | // Does this device use beginType3Char/endType3Char? Otherwise, |
241 | // text in Type 3 fonts will be drawn with drawChar/drawString. |
242 | bool interpretType3Chars() override { return false; } |
243 | |
244 | // Does this device need non-text content? |
245 | bool needNonText() override { return true; } |
246 | |
247 | //----- initialization and control |
248 | |
249 | bool checkPageSlice(Page *p, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, int sliceX, int sliceY, int sliceW, int sliceH, bool printing, bool (*abortCheckCbk)(void *data) = nullptr, |
250 | void *abortCheckCbkData = nullptr, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr) override |
251 | { |
252 | docPage = p; |
253 | return true; |
254 | } |
255 | |
256 | // Start a page. |
257 | void startPage(int pageNum, GfxState *state, XRef *xref) override; |
258 | |
259 | // End a page. |
260 | void endPage() override; |
261 | |
262 | // add a background image to the list of background images, |
263 | // as this seems to be done outside other processing. takes ownership of img. |
264 | void addBackgroundImage(const std::string &img); |
265 | |
266 | //----- update text state |
267 | void updateFont(GfxState *state) override; |
268 | |
269 | //----- text drawing |
270 | void beginString(GfxState *state, const GooString *s) override; |
271 | void endString(GfxState *state) override; |
272 | void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, const Unicode *u, int uLen) override; |
273 | |
274 | void drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg) override; |
275 | void drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, const int *maskColors, bool inlineImg) override; |
276 | |
277 | // new feature |
278 | virtual int DevType() { return 1234; } |
279 | |
280 | int getPageWidth() { return maxPageWidth; } |
281 | int getPageHeight() { return maxPageHeight; } |
282 | |
283 | bool dumpDocOutline(PDFDoc *doc); |
284 | |
285 | private: |
286 | // convert encoding into a HTML standard, or encoding->c_str if not |
287 | // recognized. |
288 | static std::string mapEncodingToHtml(const std::string &encoding); |
289 | void doProcessLink(AnnotLink *link); |
290 | GooString *getLinkDest(AnnotLink *link); |
291 | void dumpMetaVars(FILE *); |
292 | void doFrame(int firstPage); |
293 | bool newHtmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines, int level = 1); |
294 | void newXmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines); |
295 | int getOutlinePageNum(OutlineItem *item); |
296 | void drawJpegImage(GfxState *state, Stream *str); |
297 | void drawPngImage(GfxState *state, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool isMask = false); |
298 | std::unique_ptr<GooString> createImageFileName(const char *ext); |
299 | |
300 | FILE *fContentsFrame; |
301 | FILE *page; // html file |
302 | // FILE *tin; // image log file |
303 | // bool write; |
304 | bool needClose; // need to close the file? |
305 | HtmlPage *pages; // text for the current page |
306 | bool rawOrder; // keep text in content stream order |
307 | bool doOutline; // output document outline |
308 | bool ok; // set up ok? |
309 | bool dumpJPEG; |
310 | int pageNum; |
311 | int maxPageWidth; |
312 | int maxPageHeight; |
313 | GooString *Docname; |
314 | GooString *docTitle; |
315 | std::vector<HtmlMetaVar *> glMetaVars; |
316 | Catalog *catalog; |
317 | Page *docPage; |
318 | std::vector<std::string> backgroundImages; |
319 | friend class HtmlPage; |
320 | }; |
321 | |
322 | #endif |
323 | |