1 | //======================================================================== |
2 | // |
3 | // GfxFont.h |
4 | // |
5 | // Copyright 1996-2003 Glyph & Cog, LLC |
6 | // |
7 | //======================================================================== |
8 | |
9 | //======================================================================== |
10 | // |
11 | // Modified under the Poppler project - http://poppler.freedesktop.org |
12 | // |
13 | // All changes made under the Poppler project to this file are licensed |
14 | // under GPL version 2 or later |
15 | // |
16 | // Copyright (C) 2005, 2008, 2015, 2017-2022 Albert Astals Cid <aacid@kde.org> |
17 | // Copyright (C) 2006 Takashi Iwai <tiwai@suse.de> |
18 | // Copyright (C) 2006 Kristian Høgsberg <krh@redhat.com> |
19 | // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org> |
20 | // Copyright (C) 2007 Jeff Muizelaar <jeff@infidigm.net> |
21 | // Copyright (C) 2007 Koji Otani <sho@bbr.jp> |
22 | // Copyright (C) 2011 Axel Strübing <axel.struebing@freenet.de> |
23 | // Copyright (C) 2011, 2012, 2014 Adrian Johnson <ajohnson@redneon.com> |
24 | // Copyright (C) 2015, 2018 Jason Crain <jason@aquaticape.us> |
25 | // Copyright (C) 2015 Thomas Freitag <Thomas.Freitag@alfa.de> |
26 | // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
27 | // Copyright (C) 2021, 2022 Oliver Sander <oliver.sander@tu-dresden.de> |
28 | // |
29 | // To see a description of the changes please see the Changelog file that |
30 | // came with your tarball or type make ChangeLog if you are building from git |
31 | // |
32 | //======================================================================== |
33 | |
34 | #ifndef GFXFONT_H |
35 | #define GFXFONT_H |
36 | |
37 | #include <memory> |
38 | #include <optional> |
39 | |
40 | #include "goo/GooString.h" |
41 | #include "Object.h" |
42 | #include "CharTypes.h" |
43 | #include "poppler_private_export.h" |
44 | |
45 | class Dict; |
46 | class CMap; |
47 | class CharCodeToUnicode; |
48 | class FoFiTrueType; |
49 | class PSOutputDev; |
50 | struct GfxFontCIDWidths; |
51 | struct Base14FontMapEntry; |
52 | class FNVHash; |
53 | |
54 | //------------------------------------------------------------------------ |
55 | // GfxFontType |
56 | //------------------------------------------------------------------------ |
57 | |
58 | enum GfxFontType |
59 | { |
60 | //----- Gfx8BitFont |
61 | fontUnknownType, |
62 | fontType1, |
63 | fontType1C, |
64 | fontType1COT, |
65 | fontType3, |
66 | fontTrueType, |
67 | fontTrueTypeOT, |
68 | //----- GfxCIDFont |
69 | fontCIDType0, |
70 | fontCIDType0C, |
71 | fontCIDType0COT, |
72 | fontCIDType2, |
73 | fontCIDType2OT |
74 | }; |
75 | |
76 | //------------------------------------------------------------------------ |
77 | // GfxFontCIDWidths |
78 | //------------------------------------------------------------------------ |
79 | |
80 | struct GfxFontCIDWidthExcep |
81 | { |
82 | CID first; // this record applies to |
83 | CID last; // CIDs <first>..<last> |
84 | double width; // char width |
85 | }; |
86 | |
87 | struct GfxFontCIDWidthExcepV |
88 | { |
89 | CID first; // this record applies to |
90 | CID last; // CIDs <first>..<last> |
91 | double height; // char height |
92 | double vx, vy; // origin position |
93 | }; |
94 | |
95 | struct GfxFontCIDWidths |
96 | { |
97 | double defWidth; // default char width |
98 | double defHeight; // default char height |
99 | double defVY; // default origin position |
100 | GfxFontCIDWidthExcep *exceps; // exceptions |
101 | int nExceps; // number of valid entries in exceps |
102 | GfxFontCIDWidthExcepV * // exceptions for vertical font |
103 | excepsV; |
104 | int nExcepsV; // number of valid entries in excepsV |
105 | }; |
106 | |
107 | //------------------------------------------------------------------------ |
108 | // GfxFontLoc |
109 | //------------------------------------------------------------------------ |
110 | |
111 | enum GfxFontLocType |
112 | { |
113 | gfxFontLocEmbedded, // font embedded in PDF file |
114 | gfxFontLocExternal, // external font file |
115 | gfxFontLocResident // font resident in PS printer |
116 | }; |
117 | |
118 | class POPPLER_PRIVATE_EXPORT GfxFontLoc |
119 | { |
120 | public: |
121 | GfxFontLoc(); |
122 | ~GfxFontLoc(); |
123 | |
124 | GfxFontLoc(const GfxFontLoc &) = delete; |
125 | GfxFontLoc(GfxFontLoc &&) noexcept; |
126 | GfxFontLoc &operator=(const GfxFontLoc &) = delete; |
127 | GfxFontLoc &operator=(GfxFontLoc &&other) noexcept; |
128 | |
129 | // Set the 'path' string from a GooString on the heap. |
130 | // Ownership of the object is taken. |
131 | void setPath(GooString *pathA); |
132 | const GooString *pathAsGooString() const; |
133 | |
134 | GfxFontLocType locType; |
135 | GfxFontType fontType; |
136 | Ref embFontID; // embedded stream obj ID |
137 | // (if locType == gfxFontLocEmbedded) |
138 | std::string path; // font file path |
139 | // (if locType == gfxFontLocExternal) |
140 | // PS font name |
141 | // (if locType == gfxFontLocResident) |
142 | int fontNum; // for TrueType collections |
143 | // (if locType == gfxFontLocExternal) |
144 | int substIdx; // substitute font index |
145 | // (if locType == gfxFontLocExternal, |
146 | // and a Base-14 substitution was made) |
147 | }; |
148 | |
149 | //------------------------------------------------------------------------ |
150 | // GfxFont |
151 | //------------------------------------------------------------------------ |
152 | |
153 | #define fontFixedWidth (1 << 0) |
154 | #define fontSerif (1 << 1) |
155 | #define fontSymbolic (1 << 2) |
156 | #define fontItalic (1 << 6) |
157 | #define fontBold (1 << 18) |
158 | |
159 | class POPPLER_PRIVATE_EXPORT GfxFont |
160 | { |
161 | public: |
162 | enum Stretch |
163 | { |
164 | StretchNotDefined, |
165 | UltraCondensed, |
166 | , |
167 | Condensed, |
168 | SemiCondensed, |
169 | Normal, |
170 | SemiExpanded, |
171 | Expanded, |
172 | ExtraExpanded, |
173 | UltraExpanded |
174 | }; |
175 | |
176 | enum Weight |
177 | { |
178 | WeightNotDefined, |
179 | W100, |
180 | W200, |
181 | W300, |
182 | W400, // Normal |
183 | W500, |
184 | W600, |
185 | W700, // Bold |
186 | W800, |
187 | W900 |
188 | }; |
189 | |
190 | // Build a GfxFont object. |
191 | static std::unique_ptr<GfxFont> makeFont(XRef *xref, const char *tagA, Ref idA, Dict *fontDict); |
192 | |
193 | GfxFont(const GfxFont &) = delete; |
194 | GfxFont &operator=(const GfxFont &other) = delete; |
195 | virtual ~GfxFont(); |
196 | |
197 | bool isOk() const { return ok; } |
198 | |
199 | // Get font tag. |
200 | const std::string &getTag() const { return tag; } |
201 | |
202 | // Get font dictionary ID. |
203 | const Ref *getID() const { return &id; } |
204 | |
205 | // Does this font match the tag? |
206 | bool matches(const char *tagA) const { return tag == tagA; } |
207 | |
208 | // Get font family name. |
209 | GooString *getFamily() const { return family; } |
210 | |
211 | // Get font stretch. |
212 | Stretch getStretch() const { return stretch; } |
213 | |
214 | // Get font weight. |
215 | Weight getWeight() const { return weight; } |
216 | |
217 | // Get the original font name (ignornig any munging that might have |
218 | // been done to map to a canonical Base-14 font name). |
219 | const std::optional<std::string> &getName() const { return name; } |
220 | |
221 | bool isSubset() const; |
222 | |
223 | // Returns the original font name without the subset tag (if it has one) |
224 | std::string getNameWithoutSubsetTag() const; |
225 | |
226 | // Get font type. |
227 | GfxFontType getType() const { return type; } |
228 | virtual bool isCIDFont() const { return false; } |
229 | |
230 | // Get embedded font ID, i.e., a ref for the font file stream. |
231 | // Returns false if there is no embedded font. |
232 | bool getEmbeddedFontID(Ref *embID) const |
233 | { |
234 | *embID = embFontID; |
235 | return embFontID != Ref::INVALID(); |
236 | } |
237 | |
238 | // Invalidate an embedded font |
239 | // Returns false if there is no embedded font. |
240 | bool invalidateEmbeddedFont() |
241 | { |
242 | if (embFontID != Ref::INVALID()) { |
243 | embFontID = Ref::INVALID(); |
244 | return true; |
245 | } |
246 | return false; |
247 | } |
248 | |
249 | // Get the PostScript font name for the embedded font. Returns |
250 | // NULL if there is no embedded font. |
251 | const GooString *getEmbeddedFontName() const { return embFontName; } |
252 | |
253 | // Get font descriptor flags. |
254 | int getFlags() const { return flags; } |
255 | bool isFixedWidth() const { return flags & fontFixedWidth; } |
256 | bool isSerif() const { return flags & fontSerif; } |
257 | bool isSymbolic() const { return flags & fontSymbolic; } |
258 | bool isItalic() const { return flags & fontItalic; } |
259 | bool isBold() const { return flags & fontBold; } |
260 | |
261 | // Return the Unicode map. |
262 | virtual const CharCodeToUnicode *getToUnicode() const = 0; |
263 | |
264 | // Return the font matrix. |
265 | const double *getFontMatrix() const { return fontMat; } |
266 | |
267 | // Return the font bounding box. |
268 | const double *getFontBBox() const { return fontBBox; } |
269 | |
270 | // Return the ascent and descent values. |
271 | double getAscent() const { return ascent; } |
272 | double getDescent() const { return descent; } |
273 | |
274 | // Return the writing mode (0=horizontal, 1=vertical). |
275 | virtual int getWMode() const { return 0; } |
276 | |
277 | // Locate the font file for this font. If <ps> is not null, includes PS |
278 | // printer-resident fonts. Returns std::optional without a value on failure. |
279 | // substituteFontName is passed down to the GlobalParams::findSystemFontFile/findBase14FontFile call |
280 | std::optional<GfxFontLoc> locateFont(XRef *xref, PSOutputDev *ps, GooString *substituteFontName = nullptr); |
281 | |
282 | // Read an external or embedded font file into a buffer. |
283 | std::optional<std::vector<unsigned char>> readEmbFontFile(XRef *xref); |
284 | |
285 | // Get the next char from a string <s> of <len> bytes, returning the |
286 | // char <code>, its Unicode mapping <u>, its displacement vector |
287 | // (<dx>, <dy>), and its origin offset vector (<ox>, <oy>). <uSize> |
288 | // is the number of entries available in <u>, and <uLen> is set to |
289 | // the number actually used. Returns the number of bytes used by |
290 | // the char code. |
291 | virtual int getNextChar(const char *s, int len, CharCode *code, Unicode const **u, int *uLen, double *dx, double *dy, double *ox, double *oy) const = 0; |
292 | |
293 | // Does this font have a toUnicode map? |
294 | bool hasToUnicodeCMap() const { return hasToUnicode; } |
295 | |
296 | // Return the name of the encoding |
297 | const std::string &getEncodingName() const { return encodingName; } |
298 | |
299 | // Return AGLFN names of ligatures in the Standard and Expert encodings |
300 | // for use with fonts that are not compatible with the Standard 14 fonts. |
301 | // http://sourceforge.net/adobe/aglfn/wiki/AGL%20Specification/ |
302 | static const char *getAlternateName(const char *name); |
303 | |
304 | protected: |
305 | GfxFont(const char *tagA, Ref idA, std::optional<std::string> &&nameA, GfxFontType typeA, Ref embFontIDA); |
306 | |
307 | static GfxFontType getFontType(XRef *xref, Dict *fontDict, Ref *embID); |
308 | void readFontDescriptor(XRef *xref, Dict *fontDict); |
309 | CharCodeToUnicode *readToUnicodeCMap(Dict *fontDict, int nBits, CharCodeToUnicode *ctu); |
310 | static std::optional<GfxFontLoc> getExternalFont(GooString *path, bool cid); |
311 | |
312 | const std::string tag; // PDF font tag |
313 | const Ref id; // reference (used as unique ID) |
314 | std::optional<std::string> name; // font name |
315 | GooString *family; // font family |
316 | Stretch stretch; // font stretch |
317 | Weight weight; // font weight |
318 | const GfxFontType type; // type of font |
319 | int flags; // font descriptor flags |
320 | GooString *embFontName; // name of embedded font |
321 | Ref embFontID; // ref to embedded font file stream |
322 | double fontMat[6]; // font matrix (Type 3 only) |
323 | double fontBBox[4]; // font bounding box (Type 3 only) |
324 | double missingWidth; // "default" width |
325 | double ascent; // max height above baseline |
326 | double descent; // max depth below baseline |
327 | bool ok; |
328 | bool hasToUnicode; |
329 | std::string encodingName; |
330 | }; |
331 | |
332 | //------------------------------------------------------------------------ |
333 | // Gfx8BitFont |
334 | //------------------------------------------------------------------------ |
335 | |
336 | class POPPLER_PRIVATE_EXPORT Gfx8BitFont : public GfxFont |
337 | { |
338 | public: |
339 | Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, std::optional<std::string> &&nameA, GfxFontType typeA, Ref embFontIDA, Dict *fontDict); |
340 | |
341 | int getNextChar(const char *s, int len, CharCode *code, Unicode const **u, int *uLen, double *dx, double *dy, double *ox, double *oy) const override; |
342 | |
343 | // Return the encoding. |
344 | char **getEncoding() { return enc; } |
345 | |
346 | // Return the Unicode map. |
347 | const CharCodeToUnicode *getToUnicode() const override; |
348 | |
349 | // Return the character name associated with <code>. |
350 | const char *getCharName(int code) const { return enc[code]; } |
351 | |
352 | // Returns true if the PDF font specified an encoding. |
353 | bool getHasEncoding() const { return hasEncoding; } |
354 | |
355 | // Returns true if the PDF font specified MacRomanEncoding. |
356 | bool getUsesMacRomanEnc() const { return usesMacRomanEnc; } |
357 | |
358 | // Get width of a character. |
359 | double getWidth(unsigned char c) const { return widths[c]; } |
360 | |
361 | // Return a char code-to-GID mapping for the provided font file. |
362 | // (This is only useful for TrueType fonts.) |
363 | int *getCodeToGIDMap(FoFiTrueType *ff); |
364 | |
365 | // Return the Type 3 CharProc dictionary, or NULL if none. |
366 | Dict *getCharProcs(); |
367 | |
368 | // Return the Type 3 CharProc for the character associated with <code>. |
369 | Object getCharProc(int code); |
370 | Object getCharProcNF(int code); |
371 | |
372 | // Return the Type 3 Resources dictionary, or NULL if none. |
373 | Dict *getResources(); |
374 | |
375 | private: |
376 | ~Gfx8BitFont() override; |
377 | |
378 | const Base14FontMapEntry *base14; // for Base-14 fonts only; NULL otherwise |
379 | char *enc[256]; // char code --> char name |
380 | char encFree[256]; // boolean for each char name: if set, |
381 | // the string is malloc'ed |
382 | CharCodeToUnicode *ctu; // char code --> Unicode |
383 | bool hasEncoding; |
384 | bool usesMacRomanEnc; |
385 | double widths[256]; // character widths |
386 | Object charProcs; // Type 3 CharProcs dictionary |
387 | Object resources; // Type 3 Resources dictionary |
388 | |
389 | friend class GfxFont; |
390 | }; |
391 | |
392 | //------------------------------------------------------------------------ |
393 | // GfxCIDFont |
394 | //------------------------------------------------------------------------ |
395 | |
396 | class POPPLER_PRIVATE_EXPORT GfxCIDFont : public GfxFont |
397 | { |
398 | public: |
399 | GfxCIDFont(XRef *xref, const char *tagA, Ref idA, std::optional<std::string> &&nameA, GfxFontType typeA, Ref embFontIDA, Dict *fontDict); |
400 | |
401 | bool isCIDFont() const override { return true; } |
402 | |
403 | int getNextChar(const char *s, int len, CharCode *code, Unicode const **u, int *uLen, double *dx, double *dy, double *ox, double *oy) const override; |
404 | |
405 | // Return the writing mode (0=horizontal, 1=vertical). |
406 | int getWMode() const override; |
407 | |
408 | // Return the Unicode map. |
409 | const CharCodeToUnicode *getToUnicode() const override; |
410 | |
411 | // Get the collection name (<registry>-<ordering>). |
412 | const GooString *getCollection() const; |
413 | |
414 | // Return the CID-to-GID mapping table. These should only be called |
415 | // if type is fontCIDType2. |
416 | int *getCIDToGID() const { return cidToGID; } |
417 | unsigned int getCIDToGIDLen() const { return cidToGIDLen; } |
418 | |
419 | int *getCodeToGIDMap(FoFiTrueType *ff, int *codeToGIDLen); |
420 | |
421 | double getWidth(char *s, int len) const; |
422 | |
423 | private: |
424 | ~GfxCIDFont() override; |
425 | |
426 | int mapCodeToGID(FoFiTrueType *ff, int cmapi, Unicode unicode, bool wmode); |
427 | double getWidth(CID cid) const; // Get width of a character. |
428 | |
429 | GooString *collection; // collection name |
430 | std::shared_ptr<CMap> cMap; // char code --> CID |
431 | CharCodeToUnicode *ctu; // CID --> Unicode |
432 | bool ctuUsesCharCode; // true: ctu maps char code to Unicode; |
433 | // false: ctu maps CID to Unicode |
434 | GfxFontCIDWidths widths; // character widths |
435 | int *cidToGID; // CID --> GID mapping (for embedded |
436 | // TrueType fonts) |
437 | unsigned int cidToGIDLen; |
438 | }; |
439 | |
440 | //------------------------------------------------------------------------ |
441 | // GfxFontDict |
442 | //------------------------------------------------------------------------ |
443 | |
444 | class GfxFontDict |
445 | { |
446 | public: |
447 | // Build the font dictionary, given the PDF font dictionary. |
448 | GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict); |
449 | |
450 | GfxFontDict(const GfxFontDict &) = delete; |
451 | GfxFontDict &operator=(const GfxFontDict &) = delete; |
452 | |
453 | // Get the specified font. |
454 | std::shared_ptr<GfxFont> lookup(const char *tag) const; |
455 | |
456 | // Iterative access. |
457 | int getNumFonts() const { return fonts.size(); } |
458 | const std::shared_ptr<GfxFont> &getFont(int i) const { return fonts[i]; } |
459 | |
460 | private: |
461 | int hashFontObject(Object *obj); |
462 | void hashFontObject1(const Object *obj, FNVHash *h); |
463 | |
464 | std::vector<std::shared_ptr<GfxFont>> fonts; |
465 | }; |
466 | |
467 | #endif |
468 | |