1 | //======================================================================== |
2 | // |
3 | // GfxFont.cc |
4 | // |
5 | // Copyright 1996-2003 Glyph & Cog, LLC |
6 | // |
7 | //======================================================================== |
8 | |
9 | //======================================================================== |
10 | // |
11 | // Modified under the Poppler project - http://poppler.freedesktop.org |
12 | // |
13 | // All changes made under the Poppler project to this file are licensed |
14 | // under GPL version 2 or later |
15 | // |
16 | // Copyright (C) 2005, 2006, 2008-2010, 2012, 2014, 2015, 2017-2023 Albert Astals Cid <aacid@kde.org> |
17 | // Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com> |
18 | // Copyright (C) 2006 Takashi Iwai <tiwai@suse.de> |
19 | // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org> |
20 | // Copyright (C) 2007 Jeff Muizelaar <jeff@infidigm.net> |
21 | // Copyright (C) 2007 Koji Otani <sho@bbr.jp> |
22 | // Copyright (C) 2007 Ed Catmur <ed@catmur.co.uk> |
23 | // Copyright (C) 2008 Jonathan Kew <jonathan_kew@sil.org> |
24 | // Copyright (C) 2008 Ed Avis <eda@waniasset.com> |
25 | // Copyright (C) 2008, 2010 Hib Eris <hib@hiberis.nl> |
26 | // Copyright (C) 2009 Peter Kerzum <kerzum@yandex-team.ru> |
27 | // Copyright (C) 2009, 2010 David Benjamin <davidben@mit.edu> |
28 | // Copyright (C) 2011 Axel Strübing <axel.struebing@freenet.de> |
29 | // Copyright (C) 2011, 2012, 2014 Adrian Johnson <ajohnson@redneon.com> |
30 | // Copyright (C) 2012 Yi Yang <ahyangyi@gmail.com> |
31 | // Copyright (C) 2012 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp> |
32 | // Copyright (C) 2012, 2017 Thomas Freitag <Thomas.Freitag@alfa.de> |
33 | // Copyright (C) 2013-2016, 2018 Jason Crain <jason@aquaticape.us> |
34 | // Copyright (C) 2014 Olly Betts <olly@survex.com> |
35 | // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
36 | // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> |
37 | // Copyright (C) 2019 LE GARREC Vincent <legarrec.vincent@gmail.com> |
38 | // Copyright (C) 2021, 2022, 2024 Oliver Sander <oliver.sander@tu-dresden.de> |
39 | // Copyright (C) 2023 Khaled Hosny <khaled@aliftype.com> |
40 | // Copyright (C) 2024 Nelson Benítez León <nbenitezl@gmail.com> |
41 | // |
42 | // To see a description of the changes please see the Changelog file that |
43 | // came with your tarball or type make ChangeLog if you are building from git |
44 | // |
45 | //======================================================================== |
46 | |
47 | #include <config.h> |
48 | |
49 | #include <cstdio> |
50 | #include <cstdlib> |
51 | #include <cstring> |
52 | #include <cctype> |
53 | #include <cmath> |
54 | #include <climits> |
55 | #include <algorithm> |
56 | #include "goo/gmem.h" |
57 | #include "Error.h" |
58 | #include "Object.h" |
59 | #include "Dict.h" |
60 | #include "GlobalParams.h" |
61 | #include "CMap.h" |
62 | #include "CharCodeToUnicode.h" |
63 | #include "FontEncodingTables.h" |
64 | #include "BuiltinFont.h" |
65 | #include "UnicodeTypeTable.h" |
66 | #include <fofi/FoFiIdentifier.h> |
67 | #include <fofi/FoFiType1.h> |
68 | #include <fofi/FoFiType1C.h> |
69 | #include <fofi/FoFiTrueType.h> |
70 | #include "GfxFont.h" |
71 | #include "PSOutputDev.h" |
72 | |
73 | //------------------------------------------------------------------------ |
74 | |
75 | struct Base14FontMapEntry |
76 | { |
77 | const char *altName; |
78 | const char *base14Name; |
79 | }; |
80 | |
81 | static const Base14FontMapEntry base14FontMap[] = { { .altName: "Arial" , .base14Name: "Helvetica" }, |
82 | { .altName: "Arial,Bold" , .base14Name: "Helvetica-Bold" }, |
83 | { .altName: "Arial,BoldItalic" , .base14Name: "Helvetica-BoldOblique" }, |
84 | { .altName: "Arial,Italic" , .base14Name: "Helvetica-Oblique" }, |
85 | { .altName: "Arial-Bold" , .base14Name: "Helvetica-Bold" }, |
86 | { .altName: "Arial-BoldItalic" , .base14Name: "Helvetica-BoldOblique" }, |
87 | { .altName: "Arial-BoldItalicMT" , .base14Name: "Helvetica-BoldOblique" }, |
88 | { .altName: "Arial-BoldMT" , .base14Name: "Helvetica-Bold" }, |
89 | { .altName: "Arial-Italic" , .base14Name: "Helvetica-Oblique" }, |
90 | { .altName: "Arial-ItalicMT" , .base14Name: "Helvetica-Oblique" }, |
91 | { .altName: "ArialMT" , .base14Name: "Helvetica" }, |
92 | { .altName: "Courier" , .base14Name: "Courier" }, |
93 | { .altName: "Courier,Bold" , .base14Name: "Courier-Bold" }, |
94 | { .altName: "Courier,BoldItalic" , .base14Name: "Courier-BoldOblique" }, |
95 | { .altName: "Courier,Italic" , .base14Name: "Courier-Oblique" }, |
96 | { .altName: "Courier-Bold" , .base14Name: "Courier-Bold" }, |
97 | { .altName: "Courier-BoldOblique" , .base14Name: "Courier-BoldOblique" }, |
98 | { .altName: "Courier-Oblique" , .base14Name: "Courier-Oblique" }, |
99 | { .altName: "CourierNew" , .base14Name: "Courier" }, |
100 | { .altName: "CourierNew,Bold" , .base14Name: "Courier-Bold" }, |
101 | { .altName: "CourierNew,BoldItalic" , .base14Name: "Courier-BoldOblique" }, |
102 | { .altName: "CourierNew,Italic" , .base14Name: "Courier-Oblique" }, |
103 | { .altName: "CourierNew-Bold" , .base14Name: "Courier-Bold" }, |
104 | { .altName: "CourierNew-BoldItalic" , .base14Name: "Courier-BoldOblique" }, |
105 | { .altName: "CourierNew-Italic" , .base14Name: "Courier-Oblique" }, |
106 | { .altName: "CourierNewPS-BoldItalicMT" , .base14Name: "Courier-BoldOblique" }, |
107 | { .altName: "CourierNewPS-BoldMT" , .base14Name: "Courier-Bold" }, |
108 | { .altName: "CourierNewPS-ItalicMT" , .base14Name: "Courier-Oblique" }, |
109 | { .altName: "CourierNewPSMT" , .base14Name: "Courier" }, |
110 | { .altName: "Helvetica" , .base14Name: "Helvetica" }, |
111 | { .altName: "Helvetica,Bold" , .base14Name: "Helvetica-Bold" }, |
112 | { .altName: "Helvetica,BoldItalic" , .base14Name: "Helvetica-BoldOblique" }, |
113 | { .altName: "Helvetica,Italic" , .base14Name: "Helvetica-Oblique" }, |
114 | { .altName: "Helvetica-Bold" , .base14Name: "Helvetica-Bold" }, |
115 | { .altName: "Helvetica-BoldItalic" , .base14Name: "Helvetica-BoldOblique" }, |
116 | { .altName: "Helvetica-BoldOblique" , .base14Name: "Helvetica-BoldOblique" }, |
117 | { .altName: "Helvetica-Italic" , .base14Name: "Helvetica-Oblique" }, |
118 | { .altName: "Helvetica-Oblique" , .base14Name: "Helvetica-Oblique" }, |
119 | { .altName: "Symbol" , .base14Name: "Symbol" }, |
120 | { .altName: "Symbol,Bold" , .base14Name: "Symbol" }, |
121 | { .altName: "Symbol,BoldItalic" , .base14Name: "Symbol" }, |
122 | { .altName: "Symbol,Italic" , .base14Name: "Symbol" }, |
123 | { .altName: "SymbolMT" , .base14Name: "Symbol" }, |
124 | { .altName: "SymbolMT,Bold" , .base14Name: "Symbol" }, |
125 | { .altName: "SymbolMT,BoldItalic" , .base14Name: "Symbol" }, |
126 | { .altName: "SymbolMT,Italic" , .base14Name: "Symbol" }, |
127 | { .altName: "Times-Bold" , .base14Name: "Times-Bold" }, |
128 | { .altName: "Times-BoldItalic" , .base14Name: "Times-BoldItalic" }, |
129 | { .altName: "Times-Italic" , .base14Name: "Times-Italic" }, |
130 | { .altName: "Times-Roman" , .base14Name: "Times-Roman" }, |
131 | { .altName: "TimesNewRoman" , .base14Name: "Times-Roman" }, |
132 | { .altName: "TimesNewRoman,Bold" , .base14Name: "Times-Bold" }, |
133 | { .altName: "TimesNewRoman,BoldItalic" , .base14Name: "Times-BoldItalic" }, |
134 | { .altName: "TimesNewRoman,Italic" , .base14Name: "Times-Italic" }, |
135 | { .altName: "TimesNewRoman-Bold" , .base14Name: "Times-Bold" }, |
136 | { .altName: "TimesNewRoman-BoldItalic" , .base14Name: "Times-BoldItalic" }, |
137 | { .altName: "TimesNewRoman-Italic" , .base14Name: "Times-Italic" }, |
138 | { .altName: "TimesNewRomanPS" , .base14Name: "Times-Roman" }, |
139 | { .altName: "TimesNewRomanPS-Bold" , .base14Name: "Times-Bold" }, |
140 | { .altName: "TimesNewRomanPS-BoldItalic" , .base14Name: "Times-BoldItalic" }, |
141 | { .altName: "TimesNewRomanPS-BoldItalicMT" , .base14Name: "Times-BoldItalic" }, |
142 | { .altName: "TimesNewRomanPS-BoldMT" , .base14Name: "Times-Bold" }, |
143 | { .altName: "TimesNewRomanPS-Italic" , .base14Name: "Times-Italic" }, |
144 | { .altName: "TimesNewRomanPS-ItalicMT" , .base14Name: "Times-Italic" }, |
145 | { .altName: "TimesNewRomanPSMT" , .base14Name: "Times-Roman" }, |
146 | { .altName: "TimesNewRomanPSMT,Bold" , .base14Name: "Times-Bold" }, |
147 | { .altName: "TimesNewRomanPSMT,BoldItalic" , .base14Name: "Times-BoldItalic" }, |
148 | { .altName: "TimesNewRomanPSMT,Italic" , .base14Name: "Times-Italic" }, |
149 | { .altName: "ZapfDingbats" , .base14Name: "ZapfDingbats" } }; |
150 | |
151 | //------------------------------------------------------------------------ |
152 | |
153 | // index: {fixed:0, sans-serif:4, serif:8} + bold*2 + italic |
154 | // NB: must be in same order as psSubstFonts in PSOutputDev.cc |
155 | static const char *base14SubstFonts[14] = { "Courier" , "Courier-Oblique" , "Courier-Bold" , "Courier-BoldOblique" , "Helvetica" , "Helvetica-Oblique" , "Helvetica-Bold" , "Helvetica-BoldOblique" , "Times-Roman" , "Times-Italic" , "Times-Bold" , |
156 | "Times-BoldItalic" , |
157 | // the last two are never used for substitution |
158 | "Symbol" , "ZapfDingbats" }; |
159 | |
160 | //------------------------------------------------------------------------ |
161 | |
162 | static int parseCharName(char *charName, Unicode *uBuf, int uLen, bool names, bool ligatures, bool numeric, bool hex, bool variants); |
163 | |
164 | //------------------------------------------------------------------------ |
165 | |
166 | static int readFromStream(void *data) |
167 | { |
168 | return ((Stream *)data)->getChar(); |
169 | } |
170 | |
171 | //------------------------------------------------------------------------ |
172 | // GfxFontLoc |
173 | //------------------------------------------------------------------------ |
174 | |
175 | GfxFontLoc::GfxFontLoc() |
176 | { |
177 | fontNum = 0; |
178 | substIdx = -1; |
179 | } |
180 | |
181 | GfxFontLoc::~GfxFontLoc() = default; |
182 | |
183 | GfxFontLoc::GfxFontLoc(GfxFontLoc &&other) noexcept = default; |
184 | |
185 | GfxFontLoc &GfxFontLoc::operator=(GfxFontLoc &&other) noexcept = default; |
186 | |
187 | void GfxFontLoc::setPath(GooString *pathA) |
188 | { |
189 | path = pathA->toStr(); |
190 | delete pathA; |
191 | } |
192 | |
193 | const GooString *GfxFontLoc::pathAsGooString() const |
194 | { |
195 | return (const GooString *)(&path); |
196 | } |
197 | |
198 | //------------------------------------------------------------------------ |
199 | // GfxFont |
200 | //------------------------------------------------------------------------ |
201 | |
202 | std::unique_ptr<GfxFont> GfxFont::makeFont(XRef *xref, const char *tagA, Ref idA, Dict *fontDict) |
203 | { |
204 | std::optional<std::string> name; |
205 | Ref embFontIDA; |
206 | GfxFontType typeA; |
207 | |
208 | // get base font name |
209 | Object obj1 = fontDict->lookup(key: "BaseFont" ); |
210 | if (obj1.isName()) { |
211 | name = obj1.getName(); |
212 | } |
213 | |
214 | // There is no BaseFont in Type 3 fonts, try fontDescriptor.FontName |
215 | if (!name) { |
216 | Object fontDesc = fontDict->lookup(key: "FontDescriptor" ); |
217 | if (fontDesc.isDict()) { |
218 | Object obj2 = fontDesc.dictLookup(key: "FontName" ); |
219 | if (obj2.isName()) { |
220 | name = obj2.getName(); |
221 | } |
222 | } |
223 | } |
224 | |
225 | // As a last resort try the Name key |
226 | if (!name) { |
227 | Object obj2 = fontDict->lookup(key: "Name" ); |
228 | if (obj2.isName()) { |
229 | name = obj2.getName(); |
230 | } |
231 | } |
232 | |
233 | // get embedded font ID and font type |
234 | typeA = getFontType(xref, fontDict, embID: &embFontIDA); |
235 | |
236 | // create the font object |
237 | GfxFont *font; |
238 | if (typeA < fontCIDType0) { |
239 | font = new Gfx8BitFont(xref, tagA, idA, std::move(name), typeA, embFontIDA, fontDict); |
240 | } else { |
241 | font = new GfxCIDFont(xref, tagA, idA, std::move(name), typeA, embFontIDA, fontDict); |
242 | } |
243 | |
244 | return std::unique_ptr<GfxFont>(font); |
245 | } |
246 | |
247 | GfxFont::GfxFont(const char *tagA, Ref idA, std::optional<std::string> &&nameA, GfxFontType typeA, Ref embFontIDA) : tag(tagA), id(idA), name(std::move(nameA)), type(typeA) |
248 | { |
249 | ok = false; |
250 | embFontID = embFontIDA; |
251 | embFontName = nullptr; |
252 | family = nullptr; |
253 | stretch = StretchNotDefined; |
254 | weight = WeightNotDefined; |
255 | hasToUnicode = false; |
256 | } |
257 | |
258 | GfxFont::~GfxFont() |
259 | { |
260 | delete family; |
261 | if (embFontName) { |
262 | delete embFontName; |
263 | } |
264 | } |
265 | |
266 | bool GfxFont::isSubset() const |
267 | { |
268 | if (name) { |
269 | unsigned int i; |
270 | for (i = 0; i < name->size(); ++i) { |
271 | if ((*name)[i] < 'A' || (*name)[i] > 'Z') { |
272 | break; |
273 | } |
274 | } |
275 | return i == 6 && name->size() > 7 && (*name)[6] == '+'; |
276 | } |
277 | return false; |
278 | } |
279 | |
280 | std::string GfxFont::getNameWithoutSubsetTag() const |
281 | { |
282 | if (!name) { |
283 | return {}; |
284 | } |
285 | |
286 | if (!isSubset()) { |
287 | return *name; |
288 | } |
289 | |
290 | return name->substr(pos: 7); |
291 | } |
292 | |
293 | // This function extracts three pieces of information: |
294 | // 1. the "expected" font type, i.e., the font type implied by |
295 | // Font.Subtype, DescendantFont.Subtype, and |
296 | // FontDescriptor.FontFile3.Subtype |
297 | // 2. the embedded font object ID |
298 | // 3. the actual font type - determined by examining the embedded font |
299 | // if there is one, otherwise equal to the expected font type |
300 | // If the expected and actual font types don't match, a warning |
301 | // message is printed. The expected font type is not used for |
302 | // anything else. |
303 | GfxFontType GfxFont::getFontType(XRef *xref, Dict *fontDict, Ref *embID) |
304 | { |
305 | GfxFontType t, expectedType; |
306 | FoFiIdentifierType fft; |
307 | Dict *fontDict2; |
308 | bool isType0, err; |
309 | |
310 | t = fontUnknownType; |
311 | *embID = Ref::INVALID(); |
312 | err = false; |
313 | |
314 | Object subtype = fontDict->lookup(key: "Subtype" ); |
315 | expectedType = fontUnknownType; |
316 | isType0 = false; |
317 | if (subtype.isName(nameA: "Type1" ) || subtype.isName(nameA: "MMType1" )) { |
318 | expectedType = fontType1; |
319 | } else if (subtype.isName(nameA: "Type1C" )) { |
320 | expectedType = fontType1C; |
321 | } else if (subtype.isName(nameA: "Type3" )) { |
322 | expectedType = fontType3; |
323 | } else if (subtype.isName(nameA: "TrueType" )) { |
324 | expectedType = fontTrueType; |
325 | } else if (subtype.isName(nameA: "Type0" )) { |
326 | isType0 = true; |
327 | } else { |
328 | error(category: errSyntaxWarning, pos: -1, msg: "Unknown font type: '{0:s}'" , subtype.isName() ? subtype.getName() : "???" ); |
329 | } |
330 | |
331 | fontDict2 = fontDict; |
332 | Object obj1 = fontDict->lookup(key: "DescendantFonts" ); |
333 | Object obj2; // Do not move to inside the if |
334 | // we need it around so that fontDict2 remains valid |
335 | if (obj1.isArray()) { |
336 | if (obj1.arrayGetLength() == 0) { |
337 | error(category: errSyntaxWarning, pos: -1, msg: "Empty DescendantFonts array in font" ); |
338 | } else { |
339 | obj2 = obj1.arrayGet(i: 0); |
340 | if (obj2.isDict()) { |
341 | if (!isType0) { |
342 | error(category: errSyntaxWarning, pos: -1, msg: "Non-CID font with DescendantFonts array" ); |
343 | } |
344 | fontDict2 = obj2.getDict(); |
345 | subtype = fontDict2->lookup(key: "Subtype" ); |
346 | if (subtype.isName(nameA: "CIDFontType0" )) { |
347 | if (isType0) { |
348 | expectedType = fontCIDType0; |
349 | } |
350 | } else if (subtype.isName(nameA: "CIDFontType2" )) { |
351 | if (isType0) { |
352 | expectedType = fontCIDType2; |
353 | } |
354 | } |
355 | } |
356 | } |
357 | } |
358 | |
359 | Object fontDesc = fontDict2->lookup(key: "FontDescriptor" ); |
360 | if (fontDesc.isDict()) { |
361 | Object obj3 = fontDesc.dictLookupNF(key: "FontFile" ).copy(); |
362 | if (obj3.isRef()) { |
363 | *embID = obj3.getRef(); |
364 | if (expectedType != fontType1) { |
365 | err = true; |
366 | } |
367 | } |
368 | if (*embID == Ref::INVALID() && (obj3 = fontDesc.dictLookupNF(key: "FontFile2" ).copy(), obj3.isRef())) { |
369 | *embID = obj3.getRef(); |
370 | if (isType0) { |
371 | expectedType = fontCIDType2; |
372 | } else if (expectedType != fontTrueType) { |
373 | err = true; |
374 | } |
375 | } |
376 | if (*embID == Ref::INVALID() && (obj3 = fontDesc.dictLookupNF(key: "FontFile3" ).copy(), obj3.isRef())) { |
377 | *embID = obj3.getRef(); |
378 | Object obj4 = obj3.fetch(xref); |
379 | if (obj4.isStream()) { |
380 | subtype = obj4.streamGetDict()->lookup(key: "Subtype" ); |
381 | if (subtype.isName(nameA: "Type1" )) { |
382 | if (expectedType != fontType1) { |
383 | err = true; |
384 | expectedType = isType0 ? fontCIDType0 : fontType1; |
385 | } |
386 | } else if (subtype.isName(nameA: "Type1C" )) { |
387 | if (expectedType == fontType1) { |
388 | expectedType = fontType1C; |
389 | } else if (expectedType != fontType1C) { |
390 | err = true; |
391 | expectedType = isType0 ? fontCIDType0C : fontType1C; |
392 | } |
393 | } else if (subtype.isName(nameA: "TrueType" )) { |
394 | if (expectedType != fontTrueType) { |
395 | err = true; |
396 | expectedType = isType0 ? fontCIDType2 : fontTrueType; |
397 | } |
398 | } else if (subtype.isName(nameA: "CIDFontType0C" )) { |
399 | if (expectedType == fontCIDType0) { |
400 | expectedType = fontCIDType0C; |
401 | } else { |
402 | err = true; |
403 | expectedType = isType0 ? fontCIDType0C : fontType1C; |
404 | } |
405 | } else if (subtype.isName(nameA: "OpenType" )) { |
406 | if (expectedType == fontTrueType) { |
407 | expectedType = fontTrueTypeOT; |
408 | } else if (expectedType == fontType1) { |
409 | expectedType = fontType1COT; |
410 | } else if (expectedType == fontCIDType0) { |
411 | expectedType = fontCIDType0COT; |
412 | } else if (expectedType == fontCIDType2) { |
413 | expectedType = fontCIDType2OT; |
414 | } else { |
415 | err = true; |
416 | } |
417 | } else { |
418 | error(category: errSyntaxError, pos: -1, msg: "Unknown font type '{0:s}'" , subtype.isName() ? subtype.getName() : "???" ); |
419 | } |
420 | } |
421 | } |
422 | } |
423 | |
424 | t = fontUnknownType; |
425 | if (*embID != Ref::INVALID()) { |
426 | Object obj3(*embID); |
427 | Object obj4 = obj3.fetch(xref); |
428 | if (obj4.isStream()) { |
429 | obj4.streamReset(); |
430 | fft = FoFiIdentifier::identifyStream(getChar: &readFromStream, data: obj4.getStream()); |
431 | obj4.streamClose(); |
432 | switch (fft) { |
433 | case fofiIdType1PFA: |
434 | case fofiIdType1PFB: |
435 | t = fontType1; |
436 | break; |
437 | case fofiIdCFF8Bit: |
438 | t = isType0 ? fontCIDType0C : fontType1C; |
439 | break; |
440 | case fofiIdCFFCID: |
441 | t = fontCIDType0C; |
442 | break; |
443 | case fofiIdTrueType: |
444 | case fofiIdTrueTypeCollection: |
445 | t = isType0 ? fontCIDType2 : fontTrueType; |
446 | break; |
447 | case fofiIdOpenTypeCFF8Bit: |
448 | t = isType0 ? fontCIDType0COT : fontType1COT; |
449 | break; |
450 | case fofiIdOpenTypeCFFCID: |
451 | t = fontCIDType0COT; |
452 | break; |
453 | default: |
454 | error(category: errSyntaxError, pos: -1, msg: "Embedded font file may be invalid" ); |
455 | break; |
456 | } |
457 | } |
458 | } |
459 | |
460 | if (t == fontUnknownType) { |
461 | t = expectedType; |
462 | } |
463 | |
464 | if (t != expectedType) { |
465 | err = true; |
466 | } |
467 | |
468 | if (err) { |
469 | error(category: errSyntaxWarning, pos: -1, msg: "Mismatch between font type and embedded font file" ); |
470 | } |
471 | |
472 | return t; |
473 | } |
474 | |
475 | void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) |
476 | { |
477 | double t; |
478 | |
479 | // assume Times-Roman by default (for substitution purposes) |
480 | flags = fontSerif; |
481 | |
482 | missingWidth = 0; |
483 | |
484 | Object obj1 = fontDict->lookup(key: "FontDescriptor" ); |
485 | if (obj1.isDict()) { |
486 | |
487 | // get flags |
488 | Object obj2 = obj1.dictLookup(key: "Flags" ); |
489 | if (obj2.isInt()) { |
490 | flags = obj2.getInt(); |
491 | } |
492 | |
493 | // get name |
494 | obj2 = obj1.dictLookup(key: "FontName" ); |
495 | if (obj2.isName()) { |
496 | embFontName = new GooString(obj2.getName()); |
497 | } |
498 | if (embFontName == nullptr) { |
499 | // get name with typo |
500 | obj2 = obj1.dictLookup(key: "Fontname" ); |
501 | if (obj2.isName()) { |
502 | embFontName = new GooString(obj2.getName()); |
503 | error(category: errSyntaxWarning, pos: -1, msg: "The file uses Fontname instead of FontName please notify the creator that the file is broken" ); |
504 | } |
505 | } |
506 | |
507 | // get family |
508 | obj2 = obj1.dictLookup(key: "FontFamily" ); |
509 | if (obj2.isString()) { |
510 | family = new GooString(obj2.getString()); |
511 | } |
512 | |
513 | // get stretch |
514 | obj2 = obj1.dictLookup(key: "FontStretch" ); |
515 | if (obj2.isName()) { |
516 | if (strcmp(s1: obj2.getName(), s2: "UltraCondensed" ) == 0) { |
517 | stretch = UltraCondensed; |
518 | } else if (strcmp(s1: obj2.getName(), s2: "ExtraCondensed" ) == 0) { |
519 | stretch = ExtraCondensed; |
520 | } else if (strcmp(s1: obj2.getName(), s2: "Condensed" ) == 0) { |
521 | stretch = Condensed; |
522 | } else if (strcmp(s1: obj2.getName(), s2: "SemiCondensed" ) == 0) { |
523 | stretch = SemiCondensed; |
524 | } else if (strcmp(s1: obj2.getName(), s2: "Normal" ) == 0) { |
525 | stretch = Normal; |
526 | } else if (strcmp(s1: obj2.getName(), s2: "SemiExpanded" ) == 0) { |
527 | stretch = SemiExpanded; |
528 | } else if (strcmp(s1: obj2.getName(), s2: "Expanded" ) == 0) { |
529 | stretch = Expanded; |
530 | } else if (strcmp(s1: obj2.getName(), s2: "ExtraExpanded" ) == 0) { |
531 | stretch = ExtraExpanded; |
532 | } else if (strcmp(s1: obj2.getName(), s2: "UltraExpanded" ) == 0) { |
533 | stretch = UltraExpanded; |
534 | } else { |
535 | error(category: errSyntaxWarning, pos: -1, msg: "Invalid Font Stretch" ); |
536 | } |
537 | } |
538 | |
539 | // get weight |
540 | obj2 = obj1.dictLookup(key: "FontWeight" ); |
541 | if (obj2.isNum()) { |
542 | if (obj2.getNum() == 100) { |
543 | weight = W100; |
544 | } else if (obj2.getNum() == 200) { |
545 | weight = W200; |
546 | } else if (obj2.getNum() == 300) { |
547 | weight = W300; |
548 | } else if (obj2.getNum() == 400) { |
549 | weight = W400; |
550 | } else if (obj2.getNum() == 500) { |
551 | weight = W500; |
552 | } else if (obj2.getNum() == 600) { |
553 | weight = W600; |
554 | } else if (obj2.getNum() == 700) { |
555 | weight = W700; |
556 | } else if (obj2.getNum() == 800) { |
557 | weight = W800; |
558 | } else if (obj2.getNum() == 900) { |
559 | weight = W900; |
560 | } else { |
561 | error(category: errSyntaxWarning, pos: -1, msg: "Invalid Font Weight" ); |
562 | } |
563 | } |
564 | |
565 | // look for MissingWidth |
566 | obj2 = obj1.dictLookup(key: "MissingWidth" ); |
567 | if (obj2.isNum()) { |
568 | missingWidth = obj2.getNum(); |
569 | } |
570 | |
571 | // get Ascent and Descent |
572 | obj2 = obj1.dictLookup(key: "Ascent" ); |
573 | if (obj2.isNum()) { |
574 | t = 0.001 * obj2.getNum(); |
575 | // some broken font descriptors specify a negative ascent |
576 | if (t < 0) { |
577 | t = -t; |
578 | } |
579 | // some broken font descriptors set ascent and descent to 0; |
580 | // others set it to ridiculous values (e.g., 32768) |
581 | if (t != 0 && t < 3) { |
582 | ascent = t; |
583 | } |
584 | } |
585 | obj2 = obj1.dictLookup(key: "Descent" ); |
586 | if (obj2.isNum()) { |
587 | t = 0.001 * obj2.getNum(); |
588 | // some broken font descriptors specify a positive descent |
589 | if (t > 0) { |
590 | t = -t; |
591 | } |
592 | // some broken font descriptors set ascent and descent to 0 |
593 | if (t != 0 && t > -3) { |
594 | descent = t; |
595 | } |
596 | } |
597 | |
598 | // font FontBBox |
599 | obj2 = obj1.dictLookup(key: "FontBBox" ); |
600 | if (obj2.isArray()) { |
601 | for (int i = 0; i < 4 && i < obj2.arrayGetLength(); ++i) { |
602 | Object obj3 = obj2.arrayGet(i); |
603 | if (obj3.isNum()) { |
604 | fontBBox[i] = 0.001 * obj3.getNum(); |
605 | } |
606 | } |
607 | } |
608 | } |
609 | } |
610 | |
611 | CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits, CharCodeToUnicode *ctu) |
612 | { |
613 | GooString *buf; |
614 | |
615 | Object obj1 = fontDict->lookup(key: "ToUnicode" ); |
616 | if (!obj1.isStream()) { |
617 | return nullptr; |
618 | } |
619 | buf = new GooString(); |
620 | obj1.getStream()->fillGooString(s: buf); |
621 | obj1.streamClose(); |
622 | if (ctu) { |
623 | ctu->mergeCMap(buf, nBits); |
624 | } else { |
625 | ctu = CharCodeToUnicode::parseCMap(buf, nBits); |
626 | } |
627 | hasToUnicode = true; |
628 | delete buf; |
629 | return ctu; |
630 | } |
631 | |
632 | std::optional<GfxFontLoc> GfxFont::locateFont(XRef *xref, PSOutputDev *ps, GooString *substituteFontName) |
633 | { |
634 | SysFontType sysFontType; |
635 | GooString *path, *base14Name; |
636 | int substIdx, fontNum; |
637 | bool embed; |
638 | |
639 | if (type == fontType3) { |
640 | return std::nullopt; |
641 | } |
642 | |
643 | //----- embedded font |
644 | if (embFontID != Ref::INVALID()) { |
645 | embed = true; |
646 | Object refObj(embFontID); |
647 | Object embFontObj = refObj.fetch(xref); |
648 | if (!embFontObj.isStream()) { |
649 | error(category: errSyntaxError, pos: -1, msg: "Embedded font object is wrong type" ); |
650 | embed = false; |
651 | } |
652 | if (embed) { |
653 | if (ps) { |
654 | switch (type) { |
655 | case fontType1: |
656 | case fontType1C: |
657 | case fontType1COT: |
658 | embed = ps->getEmbedType1(); |
659 | break; |
660 | case fontTrueType: |
661 | case fontTrueTypeOT: |
662 | embed = ps->getEmbedTrueType(); |
663 | break; |
664 | case fontCIDType0C: |
665 | case fontCIDType0COT: |
666 | embed = ps->getEmbedCIDPostScript(); |
667 | break; |
668 | case fontCIDType2: |
669 | case fontCIDType2OT: |
670 | embed = ps->getEmbedCIDTrueType(); |
671 | break; |
672 | default: |
673 | break; |
674 | } |
675 | } |
676 | if (embed) { |
677 | GfxFontLoc fontLoc; |
678 | fontLoc.locType = gfxFontLocEmbedded; |
679 | fontLoc.fontType = type; |
680 | fontLoc.embFontID = embFontID; |
681 | return fontLoc; |
682 | } |
683 | } |
684 | } |
685 | |
686 | //----- PS passthrough |
687 | if (ps && !isCIDFont() && ps->getFontPassthrough()) { |
688 | GfxFontLoc fontLoc; |
689 | fontLoc.locType = gfxFontLocResident; |
690 | fontLoc.fontType = fontType1; |
691 | fontLoc.path = *name; |
692 | return fontLoc; |
693 | } |
694 | |
695 | //----- PS resident Base-14 font |
696 | if (ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) { |
697 | GfxFontLoc fontLoc; |
698 | fontLoc.locType = gfxFontLocResident; |
699 | fontLoc.fontType = fontType1; |
700 | fontLoc.path = ((Gfx8BitFont *)this)->base14->base14Name; |
701 | return fontLoc; |
702 | } |
703 | |
704 | //----- external font file (fontFile, fontDir) |
705 | if (name && (path = globalParams->findFontFile(fontName: *name))) { |
706 | if (std::optional<GfxFontLoc> fontLoc = getExternalFont(path, cid: isCIDFont())) { |
707 | return fontLoc; |
708 | } |
709 | } |
710 | |
711 | //----- external font file for Base-14 font |
712 | if (!ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) { |
713 | base14Name = new GooString(((Gfx8BitFont *)this)->base14->base14Name); |
714 | if ((path = globalParams->findBase14FontFile(base14Name, font: this, substituteFontName))) { |
715 | if (std::optional<GfxFontLoc> fontLoc = getExternalFont(path, cid: false)) { |
716 | delete base14Name; |
717 | return fontLoc; |
718 | } |
719 | } |
720 | delete base14Name; |
721 | } |
722 | |
723 | //----- system font |
724 | if ((path = globalParams->findSystemFontFile(font: this, type: &sysFontType, fontNum: &fontNum, substituteFontName))) { |
725 | if (isCIDFont()) { |
726 | if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) { |
727 | GfxFontLoc fontLoc; |
728 | fontLoc.locType = gfxFontLocExternal; |
729 | fontLoc.fontType = fontCIDType2; |
730 | fontLoc.setPath(path); |
731 | fontLoc.fontNum = fontNum; |
732 | return fontLoc; |
733 | } |
734 | } else { |
735 | GfxFontLoc fontLoc; |
736 | fontLoc.setPath(path); |
737 | fontLoc.locType = gfxFontLocExternal; |
738 | if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) { |
739 | fontLoc.fontType = fontTrueType; |
740 | } else if (sysFontType == sysFontPFA || sysFontType == sysFontPFB) { |
741 | fontLoc.fontType = fontType1; |
742 | fontLoc.fontNum = fontNum; |
743 | } |
744 | return fontLoc; |
745 | } |
746 | delete path; |
747 | } |
748 | |
749 | if (!isCIDFont()) { |
750 | |
751 | //----- 8-bit font substitution |
752 | if (flags & fontFixedWidth) { |
753 | substIdx = 0; |
754 | } else if (flags & fontSerif) { |
755 | substIdx = 8; |
756 | } else { |
757 | substIdx = 4; |
758 | } |
759 | if (isBold()) { |
760 | substIdx += 2; |
761 | } |
762 | if (isItalic()) { |
763 | substIdx += 1; |
764 | } |
765 | const std::string substName = base14SubstFonts[substIdx]; |
766 | if (ps) { |
767 | error(category: errSyntaxWarning, pos: -1, msg: "Substituting font '{0:s}' for '{1:s}'" , base14SubstFonts[substIdx], name ? name->c_str() : "null" ); |
768 | GfxFontLoc fontLoc; |
769 | fontLoc.locType = gfxFontLocResident; |
770 | fontLoc.fontType = fontType1; |
771 | fontLoc.path = substName; |
772 | fontLoc.substIdx = substIdx; |
773 | return fontLoc; |
774 | } else { |
775 | path = globalParams->findFontFile(fontName: substName); |
776 | if (path) { |
777 | if (std::optional<GfxFontLoc> fontLoc = getExternalFont(path, cid: false)) { |
778 | error(category: errSyntaxWarning, pos: -1, msg: "Substituting font '{0:s}' for '{1:s}'" , base14SubstFonts[substIdx], name ? name->c_str() : "" ); |
779 | name = base14SubstFonts[substIdx]; |
780 | fontLoc->substIdx = substIdx; |
781 | return fontLoc; |
782 | } |
783 | } |
784 | } |
785 | |
786 | // failed to find a substitute font |
787 | return std::nullopt; |
788 | } |
789 | |
790 | // failed to find a substitute font |
791 | return std::nullopt; |
792 | } |
793 | |
794 | std::optional<GfxFontLoc> GfxFont::getExternalFont(GooString *path, bool cid) |
795 | { |
796 | FoFiIdentifierType fft; |
797 | GfxFontType fontType; |
798 | |
799 | fft = FoFiIdentifier::identifyFile(fileName: path->c_str()); |
800 | switch (fft) { |
801 | case fofiIdType1PFA: |
802 | case fofiIdType1PFB: |
803 | fontType = fontType1; |
804 | break; |
805 | case fofiIdCFF8Bit: |
806 | fontType = fontType1C; |
807 | break; |
808 | case fofiIdCFFCID: |
809 | fontType = fontCIDType0C; |
810 | break; |
811 | case fofiIdTrueType: |
812 | case fofiIdTrueTypeCollection: |
813 | fontType = cid ? fontCIDType2 : fontTrueType; |
814 | break; |
815 | case fofiIdOpenTypeCFF8Bit: |
816 | fontType = fontType1COT; |
817 | break; |
818 | case fofiIdOpenTypeCFFCID: |
819 | fontType = fontCIDType0COT; |
820 | break; |
821 | case fofiIdUnknown: |
822 | case fofiIdError: |
823 | default: |
824 | fontType = fontUnknownType; |
825 | break; |
826 | } |
827 | if (fontType == fontUnknownType || (cid ? (fontType < fontCIDType0) : (fontType >= fontCIDType0))) { |
828 | delete path; |
829 | return std::nullopt; |
830 | } |
831 | GfxFontLoc fontLoc; |
832 | fontLoc.locType = gfxFontLocExternal; |
833 | fontLoc.fontType = fontType; |
834 | fontLoc.setPath(path); |
835 | return fontLoc; |
836 | } |
837 | |
838 | std::optional<std::vector<unsigned char>> GfxFont::readEmbFontFile(XRef *xref) |
839 | { |
840 | Stream *str; |
841 | |
842 | Object obj1(embFontID); |
843 | Object obj2 = obj1.fetch(xref); |
844 | if (!obj2.isStream()) { |
845 | error(category: errSyntaxError, pos: -1, msg: "Embedded font file is not a stream" ); |
846 | embFontID = Ref::INVALID(); |
847 | return {}; |
848 | } |
849 | str = obj2.getStream(); |
850 | |
851 | std::vector<unsigned char> buf = str->toUnsignedChars(); |
852 | str->close(); |
853 | |
854 | return buf; |
855 | } |
856 | |
857 | struct AlternateNameMap |
858 | { |
859 | const char *name; |
860 | const char *alt; |
861 | }; |
862 | |
863 | static const AlternateNameMap alternateNameMap[] = { { .name: "fi" , .alt: "f_i" }, { .name: "fl" , .alt: "f_l" }, { .name: "ff" , .alt: "f_f" }, { .name: "ffi" , .alt: "f_f_i" }, { .name: "ffl" , .alt: "f_f_l" }, { .name: nullptr, .alt: nullptr } }; |
864 | |
865 | const char *GfxFont::getAlternateName(const char *name) |
866 | { |
867 | const AlternateNameMap *map = alternateNameMap; |
868 | while (map->name) { |
869 | if (strcmp(s1: name, s2: map->name) == 0) { |
870 | return map->alt; |
871 | } |
872 | map++; |
873 | } |
874 | return nullptr; |
875 | } |
876 | |
877 | //------------------------------------------------------------------------ |
878 | // Gfx8BitFont |
879 | //------------------------------------------------------------------------ |
880 | |
881 | // Parse character names of the form 'Axx', 'xx', 'Ann', 'ABnn', or |
882 | // 'nn', where 'A' and 'B' are any letters, 'xx' is two hex digits, |
883 | // and 'nn' is decimal digits. |
884 | static bool parseNumericName(const char *s, bool hex, unsigned int *u) |
885 | { |
886 | char *endptr; |
887 | |
888 | // Strip leading alpha characters. |
889 | if (hex) { |
890 | int n = 0; |
891 | |
892 | // Get string length while ignoring junk at end. |
893 | while (isalnum(s[n])) { |
894 | ++n; |
895 | } |
896 | |
897 | // Only 2 hex characters with optional leading alpha is allowed. |
898 | if (n == 3 && isalpha(*s)) { |
899 | ++s; |
900 | } else if (n != 2) { |
901 | return false; |
902 | } |
903 | } else { |
904 | // Strip up to two alpha characters. |
905 | for (int i = 0; i < 2 && isalpha(*s); ++i) { |
906 | ++s; |
907 | } |
908 | } |
909 | |
910 | int v = strtol(nptr: s, endptr: &endptr, base: hex ? 16 : 10); |
911 | |
912 | if (endptr == s) { |
913 | return false; |
914 | } |
915 | |
916 | // Skip trailing junk characters. |
917 | while (*endptr != '\0' && !isalnum(*endptr)) { |
918 | ++endptr; |
919 | } |
920 | |
921 | if (*endptr == '\0') { |
922 | if (u) { |
923 | *u = v; |
924 | } |
925 | return true; |
926 | } |
927 | return false; |
928 | } |
929 | |
930 | // Returns true if the font has character names like xx or Axx which |
931 | // should be parsed for hex or decimal values. |
932 | static bool testForNumericNames(Dict *fontDict, bool hex) |
933 | { |
934 | bool numeric = true; |
935 | |
936 | Object enc = fontDict->lookup(key: "Encoding" ); |
937 | if (!enc.isDict()) { |
938 | return false; |
939 | } |
940 | |
941 | Object diff = enc.dictLookup(key: "Differences" ); |
942 | if (!diff.isArray()) { |
943 | return false; |
944 | } |
945 | |
946 | for (int i = 0; i < diff.arrayGetLength() && numeric; ++i) { |
947 | Object obj = diff.arrayGet(i); |
948 | if (obj.isInt()) { |
949 | // All sequences must start between character codes 0 and 5. |
950 | if (obj.getInt() > 5) { |
951 | numeric = false; |
952 | } |
953 | } else if (obj.isName()) { |
954 | // All character names must successfully parse. |
955 | if (!parseNumericName(s: obj.getName(), hex, u: nullptr)) { |
956 | numeric = false; |
957 | } |
958 | } else { |
959 | numeric = false; |
960 | } |
961 | } |
962 | |
963 | return numeric; |
964 | } |
965 | |
966 | Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, std::optional<std::string> &&nameA, GfxFontType typeA, Ref embFontIDA, Dict *fontDict) : GfxFont(tagA, idA, std::move(nameA), typeA, embFontIDA) |
967 | { |
968 | const BuiltinFont *builtinFont; |
969 | const char **baseEnc; |
970 | bool baseEncFromFontFile; |
971 | int len; |
972 | FoFiType1 *ffT1; |
973 | FoFiType1C *ffT1C; |
974 | char *charName; |
975 | bool missing, hex; |
976 | bool numeric; |
977 | Unicode toUnicode[256]; |
978 | Unicode uBuf[8]; |
979 | double mul; |
980 | int firstChar, lastChar; |
981 | unsigned short w; |
982 | Object obj1; |
983 | int n, a, b, m; |
984 | |
985 | ctu = nullptr; |
986 | |
987 | // do font name substitution for various aliases of the Base 14 font |
988 | // names |
989 | base14 = nullptr; |
990 | if (name) { |
991 | std::string name2 = *name; |
992 | size_t i = 0; |
993 | while (i < name2.size()) { |
994 | if (name2[i] == ' ') { |
995 | name2.erase(pos: i, n: 1); |
996 | } else { |
997 | ++i; |
998 | } |
999 | } |
1000 | a = 0; |
1001 | b = sizeof(base14FontMap) / sizeof(Base14FontMapEntry); |
1002 | // invariant: base14FontMap[a].altName <= name2 < base14FontMap[b].altName |
1003 | while (b - a > 1) { |
1004 | m = (a + b) / 2; |
1005 | if (name2.compare(s: base14FontMap[m].altName) >= 0) { |
1006 | a = m; |
1007 | } else { |
1008 | b = m; |
1009 | } |
1010 | } |
1011 | if (name2 == base14FontMap[a].altName) { |
1012 | base14 = &base14FontMap[a]; |
1013 | } |
1014 | } |
1015 | |
1016 | // is it a built-in font? |
1017 | builtinFont = nullptr; |
1018 | if (base14) { |
1019 | for (const BuiltinFont &bf : builtinFonts) { |
1020 | if (!strcmp(s1: base14->base14Name, s2: bf.name)) { |
1021 | builtinFont = &bf; |
1022 | break; |
1023 | } |
1024 | } |
1025 | } |
1026 | |
1027 | // default ascent/descent values |
1028 | if (builtinFont) { |
1029 | ascent = 0.001 * builtinFont->ascent; |
1030 | descent = 0.001 * builtinFont->descent; |
1031 | fontBBox[0] = 0.001 * builtinFont->bbox[0]; |
1032 | fontBBox[1] = 0.001 * builtinFont->bbox[1]; |
1033 | fontBBox[2] = 0.001 * builtinFont->bbox[2]; |
1034 | fontBBox[3] = 0.001 * builtinFont->bbox[3]; |
1035 | } else { |
1036 | ascent = 0.95; |
1037 | descent = -0.35; |
1038 | fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0; |
1039 | } |
1040 | |
1041 | // get info from font descriptor |
1042 | readFontDescriptor(xref, fontDict); |
1043 | |
1044 | // for non-embedded fonts, don't trust the ascent/descent/bbox |
1045 | // values from the font descriptor |
1046 | if (builtinFont && embFontID == Ref::INVALID()) { |
1047 | ascent = 0.001 * builtinFont->ascent; |
1048 | descent = 0.001 * builtinFont->descent; |
1049 | fontBBox[0] = 0.001 * builtinFont->bbox[0]; |
1050 | fontBBox[1] = 0.001 * builtinFont->bbox[1]; |
1051 | fontBBox[2] = 0.001 * builtinFont->bbox[2]; |
1052 | fontBBox[3] = 0.001 * builtinFont->bbox[3]; |
1053 | } |
1054 | |
1055 | // get font matrix |
1056 | fontMat[0] = fontMat[3] = 1; |
1057 | fontMat[1] = fontMat[2] = fontMat[4] = fontMat[5] = 0; |
1058 | obj1 = fontDict->lookup(key: "FontMatrix" ); |
1059 | if (obj1.isArray()) { |
1060 | for (int i = 0; i < 6 && i < obj1.arrayGetLength(); ++i) { |
1061 | Object obj2 = obj1.arrayGet(i); |
1062 | if (obj2.isNum()) { |
1063 | fontMat[i] = obj2.getNum(); |
1064 | } |
1065 | } |
1066 | } |
1067 | |
1068 | // get Type 3 bounding box, font definition, and resources |
1069 | if (type == fontType3) { |
1070 | obj1 = fontDict->lookup(key: "FontBBox" ); |
1071 | if (obj1.isArray()) { |
1072 | for (int i = 0; i < 4 && i < obj1.arrayGetLength(); ++i) { |
1073 | Object obj2 = obj1.arrayGet(i); |
1074 | if (obj2.isNum()) { |
1075 | fontBBox[i] = obj2.getNum(); |
1076 | } |
1077 | } |
1078 | } |
1079 | charProcs = fontDict->lookup(key: "CharProcs" ); |
1080 | if (!charProcs.isDict()) { |
1081 | error(category: errSyntaxError, pos: -1, msg: "Missing or invalid CharProcs dictionary in Type 3 font" ); |
1082 | charProcs.setToNull(); |
1083 | } |
1084 | resources = fontDict->lookup(key: "Resources" ); |
1085 | if (!resources.isDict()) { |
1086 | resources.setToNull(); |
1087 | } |
1088 | } |
1089 | |
1090 | //----- build the font encoding ----- |
1091 | |
1092 | // Encodings start with a base encoding, which can come from |
1093 | // (in order of priority): |
1094 | // 1. FontDict.Encoding or FontDict.Encoding.BaseEncoding |
1095 | // - MacRoman / MacExpert / WinAnsi / Standard |
1096 | // 2. embedded or external font file |
1097 | // 3. default: |
1098 | // - builtin --> builtin encoding |
1099 | // - TrueType --> WinAnsiEncoding |
1100 | // - others --> StandardEncoding |
1101 | // and then add a list of differences (if any) from |
1102 | // FontDict.Encoding.Differences. |
1103 | |
1104 | // check FontDict for base encoding |
1105 | hasEncoding = false; |
1106 | usesMacRomanEnc = false; |
1107 | baseEnc = nullptr; |
1108 | baseEncFromFontFile = false; |
1109 | obj1 = fontDict->lookup(key: "Encoding" ); |
1110 | if (obj1.isDict()) { |
1111 | Object obj2 = obj1.dictLookup(key: "BaseEncoding" ); |
1112 | if (obj2.isName(nameA: "MacRomanEncoding" )) { |
1113 | hasEncoding = true; |
1114 | usesMacRomanEnc = true; |
1115 | baseEnc = macRomanEncoding; |
1116 | } else if (obj2.isName(nameA: "MacExpertEncoding" )) { |
1117 | hasEncoding = true; |
1118 | baseEnc = macExpertEncoding; |
1119 | } else if (obj2.isName(nameA: "WinAnsiEncoding" )) { |
1120 | hasEncoding = true; |
1121 | baseEnc = winAnsiEncoding; |
1122 | } |
1123 | } else if (obj1.isName(nameA: "MacRomanEncoding" )) { |
1124 | hasEncoding = true; |
1125 | usesMacRomanEnc = true; |
1126 | baseEnc = macRomanEncoding; |
1127 | } else if (obj1.isName(nameA: "MacExpertEncoding" )) { |
1128 | hasEncoding = true; |
1129 | baseEnc = macExpertEncoding; |
1130 | } else if (obj1.isName(nameA: "WinAnsiEncoding" )) { |
1131 | hasEncoding = true; |
1132 | baseEnc = winAnsiEncoding; |
1133 | } |
1134 | |
1135 | // check embedded font file for base encoding |
1136 | // (only for Type 1 fonts - trying to get an encoding out of a |
1137 | // TrueType font is a losing proposition) |
1138 | ffT1 = nullptr; |
1139 | ffT1C = nullptr; |
1140 | if (type == fontType1 && embFontID != Ref::INVALID()) { |
1141 | const std::optional<std::vector<unsigned char>> buf = readEmbFontFile(xref); |
1142 | if (buf) { |
1143 | if ((ffT1 = FoFiType1::make(fileA: buf->data(), lenA: buf->size()))) { |
1144 | const std::string fontName = ffT1->getName(); |
1145 | if (!fontName.empty()) { |
1146 | delete embFontName; |
1147 | embFontName = new GooString(fontName); |
1148 | } |
1149 | if (!baseEnc) { |
1150 | baseEnc = (const char **)ffT1->getEncoding(); |
1151 | baseEncFromFontFile = true; |
1152 | } |
1153 | } |
1154 | } |
1155 | } else if (type == fontType1C && embFontID != Ref::INVALID()) { |
1156 | const std::optional<std::vector<unsigned char>> buf = readEmbFontFile(xref); |
1157 | if (buf) { |
1158 | if ((ffT1C = FoFiType1C::make(fileA: buf->data(), lenA: buf->size()))) { |
1159 | if (ffT1C->getName()) { |
1160 | if (embFontName) { |
1161 | delete embFontName; |
1162 | } |
1163 | embFontName = new GooString(ffT1C->getName()); |
1164 | } |
1165 | if (!baseEnc) { |
1166 | baseEnc = (const char **)ffT1C->getEncoding(); |
1167 | baseEncFromFontFile = true; |
1168 | } |
1169 | } |
1170 | } |
1171 | } |
1172 | |
1173 | // get default base encoding |
1174 | if (!baseEnc) { |
1175 | if (builtinFont && embFontID == Ref::INVALID()) { |
1176 | baseEnc = builtinFont->defaultBaseEnc; |
1177 | hasEncoding = true; |
1178 | } else if (type == fontTrueType) { |
1179 | baseEnc = winAnsiEncoding; |
1180 | } else { |
1181 | baseEnc = standardEncoding; |
1182 | } |
1183 | } |
1184 | |
1185 | if (baseEncFromFontFile) { |
1186 | encodingName = "Builtin" ; |
1187 | } else if (baseEnc == winAnsiEncoding) { |
1188 | encodingName = "WinAnsi" ; |
1189 | } else if (baseEnc == macRomanEncoding) { |
1190 | encodingName = "MacRoman" ; |
1191 | } else if (baseEnc == macExpertEncoding) { |
1192 | encodingName = "MacExpert" ; |
1193 | } else if (baseEnc == symbolEncoding) { |
1194 | encodingName = "Symbol" ; |
1195 | } else if (baseEnc == zapfDingbatsEncoding) { |
1196 | encodingName = "ZapfDingbats" ; |
1197 | } else { |
1198 | encodingName = "Standard" ; |
1199 | } |
1200 | |
1201 | // copy the base encoding |
1202 | for (int i = 0; i < 256; ++i) { |
1203 | enc[i] = (char *)baseEnc[i]; |
1204 | if ((encFree[i] = baseEncFromFontFile) && enc[i]) { |
1205 | enc[i] = copyString(s: baseEnc[i]); |
1206 | } |
1207 | } |
1208 | |
1209 | // some Type 1C font files have empty encodings, which can break the |
1210 | // T1C->T1 conversion (since the 'seac' operator depends on having |
1211 | // the accents in the encoding), so we fill in any gaps from |
1212 | // StandardEncoding |
1213 | if (type == fontType1C && embFontID != Ref::INVALID() && baseEncFromFontFile) { |
1214 | for (int i = 0; i < 256; ++i) { |
1215 | if (!enc[i] && standardEncoding[i]) { |
1216 | enc[i] = (char *)standardEncoding[i]; |
1217 | encFree[i] = false; |
1218 | } |
1219 | } |
1220 | } |
1221 | |
1222 | // merge differences into encoding |
1223 | if (obj1.isDict()) { |
1224 | Object obj2 = obj1.dictLookup(key: "Differences" ); |
1225 | if (obj2.isArray()) { |
1226 | encodingName = "Custom" ; |
1227 | hasEncoding = true; |
1228 | int code = 0; |
1229 | for (int i = 0; i < obj2.arrayGetLength(); ++i) { |
1230 | Object obj3 = obj2.arrayGet(i); |
1231 | if (obj3.isInt()) { |
1232 | code = obj3.getInt(); |
1233 | } else if (obj3.isName()) { |
1234 | if (code >= 0 && code < 256) { |
1235 | if (encFree[code]) { |
1236 | gfree(p: enc[code]); |
1237 | } |
1238 | enc[code] = copyString(s: obj3.getName()); |
1239 | encFree[code] = true; |
1240 | ++code; |
1241 | } |
1242 | } else { |
1243 | error(category: errSyntaxError, pos: -1, msg: "Wrong type in font encoding resource differences ({0:s})" , obj3.getTypeName()); |
1244 | } |
1245 | } |
1246 | } |
1247 | } |
1248 | delete ffT1; |
1249 | delete ffT1C; |
1250 | |
1251 | //----- build the mapping to Unicode ----- |
1252 | |
1253 | // pass 1: use the name-to-Unicode mapping table |
1254 | missing = hex = false; |
1255 | bool isZapfDingbats = name && name->ends_with(x: "ZapfDingbats" ); |
1256 | for (int code = 0; code < 256; ++code) { |
1257 | if ((charName = enc[code])) { |
1258 | if (isZapfDingbats) { |
1259 | // include ZapfDingbats names |
1260 | toUnicode[code] = globalParams->mapNameToUnicodeAll(charName); |
1261 | } else { |
1262 | toUnicode[code] = globalParams->mapNameToUnicodeText(charName); |
1263 | } |
1264 | if (!toUnicode[code] && strcmp(s1: charName, s2: ".notdef" )) { |
1265 | // if it wasn't in the name-to-Unicode table, check for a |
1266 | // name that looks like 'Axx' or 'xx', where 'A' is any letter |
1267 | // and 'xx' is two hex digits |
1268 | if ((strlen(s: charName) == 3 && isalpha(charName[0]) && isxdigit(charName[1]) && isxdigit(charName[2]) |
1269 | && ((charName[1] >= 'a' && charName[1] <= 'f') || (charName[1] >= 'A' && charName[1] <= 'F') || (charName[2] >= 'a' && charName[2] <= 'f') || (charName[2] >= 'A' && charName[2] <= 'F'))) |
1270 | || (strlen(s: charName) == 2 && isxdigit(charName[0]) && isxdigit(charName[1]) && |
1271 | // Only check idx 1 to avoid misidentifying a decimal |
1272 | // number like a0 |
1273 | ((charName[1] >= 'a' && charName[1] <= 'f') || (charName[1] >= 'A' && charName[1] <= 'F')))) { |
1274 | hex = true; |
1275 | } |
1276 | missing = true; |
1277 | } |
1278 | } else { |
1279 | toUnicode[code] = 0; |
1280 | } |
1281 | } |
1282 | |
1283 | numeric = testForNumericNames(fontDict, hex); |
1284 | |
1285 | // construct the char code -> Unicode mapping object |
1286 | ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); |
1287 | |
1288 | // pass 1a: Expand ligatures in the Alphabetic Presentation Form |
1289 | // block (eg "fi", "ffi") to normal form |
1290 | for (int code = 0; code < 256; ++code) { |
1291 | if (unicodeIsAlphabeticPresentationForm(c: toUnicode[code])) { |
1292 | Unicode *normalized = unicodeNormalizeNFKC(in: &toUnicode[code], len: 1, out_len: &len, indices: nullptr); |
1293 | if (len > 1) { |
1294 | ctu->setMapping(c: (CharCode)code, u: normalized, len); |
1295 | } |
1296 | gfree(p: normalized); |
1297 | } |
1298 | } |
1299 | |
1300 | // pass 2: try to fill in the missing chars, looking for ligatures, numeric |
1301 | // references and variants |
1302 | if (missing) { |
1303 | for (int code = 0; code < 256; ++code) { |
1304 | if (!toUnicode[code]) { |
1305 | if ((charName = enc[code]) && strcmp(s1: charName, s2: ".notdef" ) |
1306 | && (n = parseCharName(charName, uBuf, uLen: sizeof(uBuf) / sizeof(*uBuf), |
1307 | names: false, // don't check simple names (pass 1) |
1308 | ligatures: true, // do check ligatures |
1309 | numeric, hex, |
1310 | variants: true))) { // do check variants |
1311 | ctu->setMapping(c: (CharCode)code, u: uBuf, len: n); |
1312 | continue; |
1313 | } |
1314 | |
1315 | // do a simple pass-through |
1316 | // mapping for unknown character names |
1317 | uBuf[0] = code; |
1318 | ctu->setMapping(c: (CharCode)code, u: uBuf, len: 1); |
1319 | } |
1320 | } |
1321 | } |
1322 | |
1323 | // merge in a ToUnicode CMap, if there is one -- this overwrites |
1324 | // existing entries in ctu, i.e., the ToUnicode CMap takes |
1325 | // precedence, but the other encoding info is allowed to fill in any |
1326 | // holes |
1327 | readToUnicodeCMap(fontDict, nBits: 16, ctu); |
1328 | |
1329 | //----- get the character widths ----- |
1330 | |
1331 | // initialize all widths |
1332 | for (double &width : widths) { |
1333 | width = missingWidth * 0.001; |
1334 | } |
1335 | |
1336 | // use widths from font dict, if present |
1337 | obj1 = fontDict->lookup(key: "FirstChar" ); |
1338 | firstChar = obj1.isInt() ? obj1.getInt() : 0; |
1339 | if (firstChar < 0 || firstChar > 255) { |
1340 | firstChar = 0; |
1341 | } |
1342 | obj1 = fontDict->lookup(key: "LastChar" ); |
1343 | lastChar = obj1.isInt() ? obj1.getInt() : 255; |
1344 | if (lastChar < 0 || lastChar > 255) { |
1345 | lastChar = 255; |
1346 | } |
1347 | mul = (type == fontType3) ? fontMat[0] : 0.001; |
1348 | obj1 = fontDict->lookup(key: "Widths" ); |
1349 | if (obj1.isArray()) { |
1350 | flags |= fontFixedWidth; |
1351 | if (obj1.arrayGetLength() < lastChar - firstChar + 1) { |
1352 | lastChar = firstChar + obj1.arrayGetLength() - 1; |
1353 | } |
1354 | double firstNonZeroWidth = 0; |
1355 | for (int code = firstChar; code <= lastChar; ++code) { |
1356 | Object obj2 = obj1.arrayGet(i: code - firstChar); |
1357 | if (obj2.isNum()) { |
1358 | widths[code] = obj2.getNum() * mul; |
1359 | |
1360 | // Check if the font is fixed width |
1361 | if (firstNonZeroWidth == 0) { |
1362 | firstNonZeroWidth = widths[code]; |
1363 | } |
1364 | if (firstNonZeroWidth != 0 && widths[code] != 0 && fabs(x: widths[code] - firstNonZeroWidth) > 0.00001) { |
1365 | flags &= ~fontFixedWidth; |
1366 | } |
1367 | } |
1368 | } |
1369 | |
1370 | // use widths from built-in font |
1371 | } else if (builtinFont) { |
1372 | // this is a kludge for broken PDF files that encode char 32 |
1373 | // as .notdef |
1374 | if (builtinFont->getWidth(n: "space" , w: &w)) { |
1375 | widths[32] = 0.001 * w; |
1376 | } |
1377 | for (int code = 0; code < 256; ++code) { |
1378 | if (enc[code] && builtinFont->getWidth(n: enc[code], w: &w)) { |
1379 | widths[code] = 0.001 * w; |
1380 | } |
1381 | } |
1382 | |
1383 | // couldn't find widths -- use defaults |
1384 | } else { |
1385 | // this is technically an error -- the Widths entry is required |
1386 | // for all but the Base-14 fonts -- but certain PDF generators |
1387 | // apparently don't include widths for Arial and TimesNewRoman |
1388 | int i; |
1389 | if (isFixedWidth()) { |
1390 | i = 0; |
1391 | } else if (isSerif()) { |
1392 | i = 8; |
1393 | } else { |
1394 | i = 4; |
1395 | } |
1396 | if (isBold()) { |
1397 | i += 2; |
1398 | } |
1399 | if (isItalic()) { |
1400 | i += 1; |
1401 | } |
1402 | builtinFont = builtinFontSubst[i]; |
1403 | // this is a kludge for broken PDF files that encode char 32 |
1404 | // as .notdef |
1405 | if (builtinFont->getWidth(n: "space" , w: &w)) { |
1406 | widths[32] = 0.001 * w; |
1407 | } |
1408 | for (int code = 0; code < 256; ++code) { |
1409 | if (enc[code] && builtinFont->getWidth(n: enc[code], w: &w)) { |
1410 | widths[code] = 0.001 * w; |
1411 | } |
1412 | } |
1413 | } |
1414 | |
1415 | ok = true; |
1416 | } |
1417 | |
1418 | Gfx8BitFont::~Gfx8BitFont() |
1419 | { |
1420 | int i; |
1421 | |
1422 | for (i = 0; i < 256; ++i) { |
1423 | if (encFree[i] && enc[i]) { |
1424 | gfree(p: enc[i]); |
1425 | } |
1426 | } |
1427 | ctu->decRefCnt(); |
1428 | } |
1429 | |
1430 | // This function is in part a derived work of the Adobe Glyph Mapping |
1431 | // Convention: http://www.adobe.com/devnet/opentype/archives/glyph.html |
1432 | // Algorithmic comments are excerpted from that document to aid |
1433 | // maintainability. |
1434 | static int parseCharName(char *charName, Unicode *uBuf, int uLen, bool names, bool ligatures, bool numeric, bool hex, bool variants) |
1435 | { |
1436 | if (uLen <= 0) { |
1437 | error(category: errInternal, pos: -1, |
1438 | msg: "Zero-length output buffer (recursion overflow?) in " |
1439 | "parseCharName, component \"{0:s}\"" , |
1440 | charName); |
1441 | return 0; |
1442 | } |
1443 | // Step 1: drop all the characters from the glyph name starting with the |
1444 | // first occurrence of a period (U+002E FULL STOP), if any. |
1445 | if (variants) { |
1446 | char *var_part = strchr(s: charName, c: '.'); |
1447 | if (var_part == charName) { |
1448 | return 0; // .notdef or similar |
1449 | } else if (var_part != nullptr) { |
1450 | // parse names of the form 7.oldstyle, P.swash, s.sc, etc. |
1451 | char *main_part = copyString(s: charName, n: var_part - charName); |
1452 | bool namesRecurse = true, variantsRecurse = false; |
1453 | int n = parseCharName(charName: main_part, uBuf, uLen, names: namesRecurse, ligatures, numeric, hex, variants: variantsRecurse); |
1454 | gfree(p: main_part); |
1455 | return n; |
1456 | } |
1457 | } |
1458 | // Step 2: split the remaining string into a sequence of components, using |
1459 | // underscore (U+005F LOW LINE) as the delimiter. |
1460 | if (ligatures && strchr(s: charName, c: '_')) { |
1461 | // parse names of the form A_a (e.g. f_i, T_h, l_quotesingle) |
1462 | char *lig_part, *lig_end, *lig_copy; |
1463 | int n = 0, m; |
1464 | lig_part = lig_copy = copyString(s: charName); |
1465 | do { |
1466 | if ((lig_end = strchr(s: lig_part, c: '_'))) { |
1467 | *lig_end = '\0'; |
1468 | } |
1469 | if (lig_part[0] != '\0') { |
1470 | bool namesRecurse = true, ligaturesRecurse = false; |
1471 | if ((m = parseCharName(charName: lig_part, uBuf: uBuf + n, uLen: uLen - n, names: namesRecurse, ligatures: ligaturesRecurse, numeric, hex, variants))) { |
1472 | n += m; |
1473 | } else { |
1474 | error(category: errSyntaxWarning, pos: -1, |
1475 | msg: "Could not parse ligature component \"{0:s}\" of \"{1:s}\" in " |
1476 | "parseCharName" , |
1477 | lig_part, charName); |
1478 | } |
1479 | } |
1480 | if (lig_end) { |
1481 | lig_part = lig_end + 1; |
1482 | } |
1483 | } while (lig_end && n < uLen); |
1484 | gfree(p: lig_copy); |
1485 | return n; |
1486 | } |
1487 | // Step 3: map each component to a character string according to the |
1488 | // procedure below, and concatenate those strings; the result is the |
1489 | // character string to which the glyph name is mapped. |
1490 | // 3.1. if the font is Zapf Dingbats (PostScript FontName ZapfDingbats), and |
1491 | // the component is in the ZapfDingbats list, then map it to the |
1492 | // corresponding character in that list. |
1493 | // 3.2. otherwise, if the component is in the Adobe Glyph List, then map it |
1494 | // to the corresponding character in that list. |
1495 | if (names && (uBuf[0] = globalParams->mapNameToUnicodeText(charName))) { |
1496 | return 1; |
1497 | } |
1498 | unsigned int n = strlen(s: charName); |
1499 | // 3.3. otherwise, if the component is of the form "uni" (U+0075 U+006E |
1500 | // U+0069) followed by a sequence of uppercase hexadecimal digits (0 .. 9, |
1501 | // A .. F, i.e. U+0030 .. U+0039, U+0041 .. U+0046), the length of that |
1502 | // sequence is a multiple of four, and each group of four digits represents |
1503 | // a number in the set {0x0000 .. 0xD7FF, 0xE000 .. 0xFFFF}, then interpret |
1504 | // each such number as a Unicode scalar value and map the component to the |
1505 | // string made of those scalar values. Note that the range and digit length |
1506 | // restrictions mean that the "uni" prefix can be used only with Unicode |
1507 | // values from the Basic Multilingual Plane (BMP). |
1508 | if (n >= 7 && (n % 4) == 3 && !strncmp(s1: charName, s2: "uni" , n: 3)) { |
1509 | int i; |
1510 | unsigned int m; |
1511 | for (i = 0, m = 3; i < uLen && m < n; m += 4) { |
1512 | if (isxdigit(charName[m]) && isxdigit(charName[m + 1]) && isxdigit(charName[m + 2]) && isxdigit(charName[m + 3])) { |
1513 | unsigned int u; |
1514 | sscanf(s: charName + m, format: "%4x" , &u); |
1515 | if (u <= 0xD7FF || (0xE000 <= u && u <= 0xFFFF)) { |
1516 | uBuf[i++] = u; |
1517 | } |
1518 | } |
1519 | } |
1520 | return i; |
1521 | } |
1522 | // 3.4. otherwise, if the component is of the form "u" (U+0075) followed by |
1523 | // a sequence of four to six uppercase hexadecimal digits {0 .. 9, A .. F} |
1524 | // (U+0030 .. U+0039, U+0041 .. U+0046), and those digits represent a |
1525 | // number in {0x0000 .. 0xD7FF, 0xE000 .. 0x10FFFF}, then interpret this |
1526 | // number as a Unicode scalar value and map the component to the string |
1527 | // made of this scalar value. |
1528 | if (n >= 5 && n <= 7 && charName[0] == 'u' && isxdigit(charName[1]) && isxdigit(charName[2]) && isxdigit(charName[3]) && isxdigit(charName[4]) && (n <= 5 || isxdigit(charName[5])) && (n <= 6 || isxdigit(charName[6]))) { |
1529 | unsigned int u; |
1530 | sscanf(s: charName + 1, format: "%x" , &u); |
1531 | if (u <= 0xD7FF || (0xE000 <= u && u <= 0x10FFFF)) { |
1532 | uBuf[0] = u; |
1533 | return 1; |
1534 | } |
1535 | } |
1536 | // Not in Adobe Glyph Mapping convention: look for names like xx |
1537 | // or Axx and parse for hex or decimal values. |
1538 | if (numeric && parseNumericName(s: charName, hex, u: uBuf)) { |
1539 | return 1; |
1540 | } |
1541 | // 3.5. otherwise, map the component to the empty string |
1542 | return 0; |
1543 | } |
1544 | |
1545 | int Gfx8BitFont::getNextChar(const char *s, int len, CharCode *code, Unicode const **u, int *uLen, double *dx, double *dy, double *ox, double *oy) const |
1546 | { |
1547 | CharCode c; |
1548 | |
1549 | *code = c = (CharCode)(*s & 0xff); |
1550 | *uLen = ctu->mapToUnicode(c, u); |
1551 | *dx = widths[c]; |
1552 | *dy = *ox = *oy = 0; |
1553 | return 1; |
1554 | } |
1555 | |
1556 | const CharCodeToUnicode *Gfx8BitFont::getToUnicode() const |
1557 | { |
1558 | return ctu; |
1559 | } |
1560 | |
1561 | int *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) |
1562 | { |
1563 | int *map; |
1564 | int cmapPlatform, cmapEncoding; |
1565 | int unicodeCmap, macRomanCmap, msSymbolCmap, cmap; |
1566 | bool useMacRoman, useUnicode; |
1567 | char *charName; |
1568 | Unicode u; |
1569 | int code, i, n; |
1570 | |
1571 | map = (int *)gmallocn(count: 256, size: sizeof(int)); |
1572 | for (i = 0; i < 256; ++i) { |
1573 | map[i] = 0; |
1574 | } |
1575 | |
1576 | // To match up with the Adobe-defined behaviour, we choose a cmap |
1577 | // like this: |
1578 | // 1. If the PDF font has an encoding: |
1579 | // 1a. If the TrueType font has a Microsoft Unicode |
1580 | // cmap or a non-Microsoft Unicode cmap, use it, and use the |
1581 | // Unicode indexes, not the char codes. |
1582 | // 1b. If the PDF font specified MacRomanEncoding and the |
1583 | // TrueType font has a Macintosh Roman cmap, use it, and |
1584 | // reverse map the char names through MacRomanEncoding to |
1585 | // get char codes. |
1586 | // 1c. If the PDF font is symbolic and the TrueType font has a |
1587 | // Microsoft Symbol cmap, use it, and use char codes |
1588 | // directly (possibly with an offset of 0xf000). |
1589 | // 1d. If the TrueType font has a Macintosh Roman cmap, use it, |
1590 | // as in case 1a. |
1591 | // 2. If the PDF font does not have an encoding or the PDF font is |
1592 | // symbolic: |
1593 | // 2a. If the TrueType font has a Macintosh Roman cmap, use it, |
1594 | // and use char codes directly (possibly with an offset of |
1595 | // 0xf000). |
1596 | // 2b. If the TrueType font has a Microsoft Symbol cmap, use it, |
1597 | // and use char codes directly (possible with an offset of |
1598 | // 0xf000). |
1599 | // 3. If none of these rules apply, use the first cmap and hope for |
1600 | // the best (this shouldn't happen). |
1601 | unicodeCmap = macRomanCmap = msSymbolCmap = -1; |
1602 | for (i = 0; i < ff->getNumCmaps(); ++i) { |
1603 | cmapPlatform = ff->getCmapPlatform(i); |
1604 | cmapEncoding = ff->getCmapEncoding(i); |
1605 | if ((cmapPlatform == 3 && cmapEncoding == 1) || cmapPlatform == 0) { |
1606 | unicodeCmap = i; |
1607 | } else if (cmapPlatform == 1 && cmapEncoding == 0) { |
1608 | macRomanCmap = i; |
1609 | } else if (cmapPlatform == 3 && cmapEncoding == 0) { |
1610 | msSymbolCmap = i; |
1611 | } |
1612 | } |
1613 | cmap = 0; |
1614 | useMacRoman = false; |
1615 | useUnicode = false; |
1616 | if (hasEncoding || type == fontType1) { |
1617 | if (unicodeCmap >= 0) { |
1618 | cmap = unicodeCmap; |
1619 | useUnicode = true; |
1620 | } else if (usesMacRomanEnc && macRomanCmap >= 0) { |
1621 | cmap = macRomanCmap; |
1622 | useMacRoman = true; |
1623 | } else if ((flags & fontSymbolic) && msSymbolCmap >= 0) { |
1624 | cmap = msSymbolCmap; |
1625 | } else if ((flags & fontSymbolic) && macRomanCmap >= 0) { |
1626 | cmap = macRomanCmap; |
1627 | } else if (macRomanCmap >= 0) { |
1628 | cmap = macRomanCmap; |
1629 | useMacRoman = true; |
1630 | } |
1631 | } else { |
1632 | if (msSymbolCmap >= 0) { |
1633 | cmap = msSymbolCmap; |
1634 | } else if (macRomanCmap >= 0) { |
1635 | cmap = macRomanCmap; |
1636 | } |
1637 | } |
1638 | |
1639 | // reverse map the char names through MacRomanEncoding, then map the |
1640 | // char codes through the cmap |
1641 | if (useMacRoman) { |
1642 | for (i = 0; i < 256; ++i) { |
1643 | if ((charName = enc[i])) { |
1644 | if ((code = globalParams->getMacRomanCharCode(charName))) { |
1645 | map[i] = ff->mapCodeToGID(i: cmap, c: code); |
1646 | } |
1647 | } else { |
1648 | map[i] = -1; |
1649 | } |
1650 | } |
1651 | |
1652 | // map Unicode through the cmap |
1653 | } else if (useUnicode) { |
1654 | const Unicode *uAux; |
1655 | for (i = 0; i < 256; ++i) { |
1656 | if (((charName = enc[i]) && (u = globalParams->mapNameToUnicodeAll(charName)))) { |
1657 | map[i] = ff->mapCodeToGID(i: cmap, c: u); |
1658 | } else { |
1659 | n = ctu->mapToUnicode(c: (CharCode)i, u: &uAux); |
1660 | if (n > 0) { |
1661 | map[i] = ff->mapCodeToGID(i: cmap, c: uAux[0]); |
1662 | } else { |
1663 | map[i] = -1; |
1664 | } |
1665 | } |
1666 | } |
1667 | |
1668 | // map the char codes through the cmap, possibly with an offset of |
1669 | // 0xf000 |
1670 | } else { |
1671 | for (i = 0; i < 256; ++i) { |
1672 | if (!(map[i] = ff->mapCodeToGID(i: cmap, c: i))) { |
1673 | map[i] = ff->mapCodeToGID(i: cmap, c: 0xf000 + i); |
1674 | } |
1675 | } |
1676 | } |
1677 | |
1678 | // try the TrueType 'post' table to handle any unmapped characters |
1679 | for (i = 0; i < 256; ++i) { |
1680 | if (map[i] <= 0 && (charName = enc[i])) { |
1681 | map[i] = ff->mapNameToGID(name: charName); |
1682 | } |
1683 | } |
1684 | |
1685 | return map; |
1686 | } |
1687 | |
1688 | Dict *Gfx8BitFont::getCharProcs() |
1689 | { |
1690 | return charProcs.isDict() ? charProcs.getDict() : nullptr; |
1691 | } |
1692 | |
1693 | Object Gfx8BitFont::getCharProc(int code) |
1694 | { |
1695 | if (enc[code] && charProcs.isDict()) { |
1696 | return charProcs.dictLookup(key: enc[code]); |
1697 | } else { |
1698 | return Object(objNull); |
1699 | } |
1700 | } |
1701 | |
1702 | Object Gfx8BitFont::getCharProcNF(int code) |
1703 | { |
1704 | if (enc[code] && charProcs.isDict()) { |
1705 | return charProcs.dictLookupNF(key: enc[code]).copy(); |
1706 | } else { |
1707 | return Object(objNull); |
1708 | } |
1709 | } |
1710 | |
1711 | Dict *Gfx8BitFont::getResources() |
1712 | { |
1713 | return resources.isDict() ? resources.getDict() : nullptr; |
1714 | } |
1715 | |
1716 | //------------------------------------------------------------------------ |
1717 | // GfxCIDFont |
1718 | //------------------------------------------------------------------------ |
1719 | |
1720 | struct cmpWidthExcepFunctor |
1721 | { |
1722 | bool operator()(const GfxFontCIDWidthExcep w1, const GfxFontCIDWidthExcep w2) { return w1.first < w2.first; } |
1723 | }; |
1724 | |
1725 | struct cmpWidthExcepVFunctor |
1726 | { |
1727 | bool operator()(const GfxFontCIDWidthExcepV &w1, const GfxFontCIDWidthExcepV &w2) { return w1.first < w2.first; } |
1728 | }; |
1729 | |
1730 | GfxCIDFont::GfxCIDFont(XRef *xref, const char *tagA, Ref idA, std::optional<std::string> &&nameA, GfxFontType typeA, Ref embFontIDA, Dict *fontDict) : GfxFont(tagA, idA, std::move(nameA), typeA, embFontIDA) |
1731 | { |
1732 | Dict *desFontDict; |
1733 | Object desFontDictObj; |
1734 | Object obj1, obj2, obj3, obj4, obj5, obj6; |
1735 | int c1, c2; |
1736 | int excepsSize; |
1737 | |
1738 | ascent = 0.95; |
1739 | descent = -0.35; |
1740 | fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0; |
1741 | collection = nullptr; |
1742 | ctu = nullptr; |
1743 | ctuUsesCharCode = true; |
1744 | widths.defWidth = 1.0; |
1745 | widths.defHeight = -1.0; |
1746 | widths.defVY = 0.880; |
1747 | widths.exceps = nullptr; |
1748 | widths.nExceps = 0; |
1749 | widths.excepsV = nullptr; |
1750 | widths.nExcepsV = 0; |
1751 | cidToGID = nullptr; |
1752 | cidToGIDLen = 0; |
1753 | |
1754 | // get the descendant font |
1755 | obj1 = fontDict->lookup(key: "DescendantFonts" ); |
1756 | if (!obj1.isArray() || obj1.arrayGetLength() == 0) { |
1757 | error(category: errSyntaxError, pos: -1, msg: "Missing or empty DescendantFonts entry in Type 0 font" ); |
1758 | return; |
1759 | } |
1760 | desFontDictObj = obj1.arrayGet(i: 0); |
1761 | if (!desFontDictObj.isDict()) { |
1762 | error(category: errSyntaxError, pos: -1, msg: "Bad descendant font in Type 0 font" ); |
1763 | return; |
1764 | } |
1765 | desFontDict = desFontDictObj.getDict(); |
1766 | |
1767 | // get info from font descriptor |
1768 | readFontDescriptor(xref, fontDict: desFontDict); |
1769 | |
1770 | //----- encoding info ----- |
1771 | |
1772 | // char collection |
1773 | obj1 = desFontDict->lookup(key: "CIDSystemInfo" ); |
1774 | if (obj1.isDict()) { |
1775 | obj2 = obj1.dictLookup(key: "Registry" ); |
1776 | obj3 = obj1.dictLookup(key: "Ordering" ); |
1777 | if (!obj2.isString() || !obj3.isString()) { |
1778 | error(category: errSyntaxError, pos: -1, msg: "Invalid CIDSystemInfo dictionary in Type 0 descendant font" ); |
1779 | error(category: errSyntaxError, pos: -1, msg: "Assuming Adobe-Identity for character collection" ); |
1780 | obj2 = Object(new GooString("Adobe" )); |
1781 | obj3 = Object(new GooString("Identity" )); |
1782 | } |
1783 | collection = obj2.getString()->copy()->append(c: '-')->append(str: obj3.getString()); |
1784 | } else { |
1785 | error(category: errSyntaxError, pos: -1, msg: "Missing CIDSystemInfo dictionary in Type 0 descendant font" ); |
1786 | error(category: errSyntaxError, pos: -1, msg: "Assuming Adobe-Identity for character collection" ); |
1787 | collection = new GooString("Adobe-Identity" ); |
1788 | } |
1789 | |
1790 | // look for a ToUnicode CMap |
1791 | if (!(ctu = readToUnicodeCMap(fontDict, nBits: 16, ctu: nullptr))) { |
1792 | ctuUsesCharCode = false; |
1793 | |
1794 | // use an identity mapping for the "Adobe-Identity" and |
1795 | // "Adobe-UCS" collections |
1796 | if (!collection->cmp(sA: "Adobe-Identity" ) || !collection->cmp(sA: "Adobe-UCS" )) { |
1797 | ctu = CharCodeToUnicode::makeIdentityMapping(); |
1798 | } else { |
1799 | // look for a user-supplied .cidToUnicode file |
1800 | if (!(ctu = globalParams->getCIDToUnicode(collection))) { |
1801 | // I'm not completely sure that this is the best thing to do |
1802 | // but it seems to produce better results when the .cidToUnicode |
1803 | // files from the poppler-data package are missing. At least |
1804 | // we know that assuming the Identity mapping is definitely wrong. |
1805 | // -- jrmuizel |
1806 | static const char *knownCollections[] = { |
1807 | "Adobe-CNS1" , "Adobe-GB1" , "Adobe-Japan1" , "Adobe-Japan2" , "Adobe-Korea1" , |
1808 | }; |
1809 | for (const char *knownCollection : knownCollections) { |
1810 | if (collection->cmp(sA: knownCollection) == 0) { |
1811 | error(category: errSyntaxError, pos: -1, msg: "Missing language pack for '{0:t}' mapping" , collection); |
1812 | return; |
1813 | } |
1814 | } |
1815 | error(category: errSyntaxError, pos: -1, msg: "Unknown character collection '{0:t}'" , collection); |
1816 | // fall-through, assuming the Identity mapping -- this appears |
1817 | // to match Adobe's behavior |
1818 | } |
1819 | } |
1820 | } |
1821 | |
1822 | // encoding (i.e., CMap) |
1823 | obj1 = fontDict->lookup(key: "Encoding" ); |
1824 | if (obj1.isNull()) { |
1825 | error(category: errSyntaxError, pos: -1, msg: "Missing Encoding entry in Type 0 font" ); |
1826 | return; |
1827 | } |
1828 | if (!(cMap = CMap::parse(cache: nullptr, collectionA: collection, obj: &obj1))) { |
1829 | return; |
1830 | } |
1831 | if (cMap->getCMapName()) { |
1832 | encodingName = cMap->getCMapName()->toStr(); |
1833 | } else { |
1834 | encodingName = "Custom" ; |
1835 | } |
1836 | |
1837 | // CIDToGIDMap (for embedded TrueType fonts) |
1838 | obj1 = desFontDict->lookup(key: "CIDToGIDMap" ); |
1839 | if (obj1.isStream()) { |
1840 | cidToGIDLen = 0; |
1841 | unsigned int i = 64; |
1842 | cidToGID = (int *)gmallocn(count: i, size: sizeof(int)); |
1843 | obj1.streamReset(); |
1844 | while ((c1 = obj1.streamGetChar()) != EOF && (c2 = obj1.streamGetChar()) != EOF) { |
1845 | if (cidToGIDLen == i) { |
1846 | i *= 2; |
1847 | cidToGID = (int *)greallocn(p: cidToGID, count: i, size: sizeof(int)); |
1848 | } |
1849 | cidToGID[cidToGIDLen++] = (c1 << 8) + c2; |
1850 | } |
1851 | } else if (!obj1.isName(nameA: "Identity" ) && !obj1.isNull()) { |
1852 | error(category: errSyntaxError, pos: -1, msg: "Invalid CIDToGIDMap entry in CID font" ); |
1853 | } |
1854 | |
1855 | //----- character metrics ----- |
1856 | |
1857 | // default char width |
1858 | obj1 = desFontDict->lookup(key: "DW" ); |
1859 | if (obj1.isInt()) { |
1860 | widths.defWidth = obj1.getInt() * 0.001; |
1861 | } |
1862 | |
1863 | // char width exceptions |
1864 | obj1 = desFontDict->lookup(key: "W" ); |
1865 | if (obj1.isArray()) { |
1866 | excepsSize = 0; |
1867 | int i = 0; |
1868 | while (i + 1 < obj1.arrayGetLength()) { |
1869 | obj2 = obj1.arrayGet(i); |
1870 | obj3 = obj1.arrayGet(i: i + 1); |
1871 | if (obj2.isInt() && obj3.isInt() && i + 2 < obj1.arrayGetLength()) { |
1872 | obj4 = obj1.arrayGet(i: i + 2); |
1873 | if (obj4.isNum()) { |
1874 | if (widths.nExceps == excepsSize) { |
1875 | excepsSize += 16; |
1876 | widths.exceps = (GfxFontCIDWidthExcep *)greallocn(p: widths.exceps, count: excepsSize, size: sizeof(GfxFontCIDWidthExcep)); |
1877 | } |
1878 | widths.exceps[widths.nExceps].first = obj2.getInt(); |
1879 | widths.exceps[widths.nExceps].last = obj3.getInt(); |
1880 | widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001; |
1881 | ++widths.nExceps; |
1882 | } else { |
1883 | error(category: errSyntaxError, pos: -1, msg: "Bad widths array in Type 0 font" ); |
1884 | } |
1885 | i += 3; |
1886 | } else if (obj2.isInt() && obj3.isArray()) { |
1887 | if (widths.nExceps + obj3.arrayGetLength() > excepsSize) { |
1888 | excepsSize = (widths.nExceps + obj3.arrayGetLength() + 15) & ~15; |
1889 | widths.exceps = (GfxFontCIDWidthExcep *)greallocn(p: widths.exceps, count: excepsSize, size: sizeof(GfxFontCIDWidthExcep)); |
1890 | } |
1891 | int j = obj2.getInt(); |
1892 | if (likely(j < INT_MAX - obj3.arrayGetLength())) { |
1893 | for (int k = 0; k < obj3.arrayGetLength(); ++k) { |
1894 | obj4 = obj3.arrayGet(i: k); |
1895 | if (obj4.isNum()) { |
1896 | widths.exceps[widths.nExceps].first = j; |
1897 | widths.exceps[widths.nExceps].last = j; |
1898 | widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001; |
1899 | ++j; |
1900 | ++widths.nExceps; |
1901 | } else { |
1902 | error(category: errSyntaxError, pos: -1, msg: "Bad widths array in Type 0 font" ); |
1903 | } |
1904 | } |
1905 | } |
1906 | i += 2; |
1907 | } else { |
1908 | error(category: errSyntaxError, pos: -1, msg: "Bad widths array in Type 0 font" ); |
1909 | ++i; |
1910 | } |
1911 | } |
1912 | std::sort(first: widths.exceps, last: widths.exceps + widths.nExceps, comp: cmpWidthExcepFunctor()); |
1913 | } |
1914 | |
1915 | // default metrics for vertical font |
1916 | obj1 = desFontDict->lookup(key: "DW2" ); |
1917 | if (obj1.isArray() && obj1.arrayGetLength() == 2) { |
1918 | obj2 = obj1.arrayGet(i: 0); |
1919 | if (obj2.isNum()) { |
1920 | widths.defVY = obj2.getNum() * 0.001; |
1921 | } |
1922 | obj2 = obj1.arrayGet(i: 1); |
1923 | if (obj2.isNum()) { |
1924 | widths.defHeight = obj2.getNum() * 0.001; |
1925 | } |
1926 | } |
1927 | |
1928 | // char metric exceptions for vertical font |
1929 | obj1 = desFontDict->lookup(key: "W2" ); |
1930 | if (obj1.isArray()) { |
1931 | excepsSize = 0; |
1932 | int i = 0; |
1933 | while (i + 1 < obj1.arrayGetLength()) { |
1934 | obj2 = obj1.arrayGet(i); |
1935 | obj3 = obj1.arrayGet(i: i + 1); |
1936 | if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) { |
1937 | if ((obj4 = obj1.arrayGet(i: i + 2), obj4.isNum()) && (obj5 = obj1.arrayGet(i: i + 3), obj5.isNum()) && (obj6 = obj1.arrayGet(i: i + 4), obj6.isNum())) { |
1938 | if (widths.nExcepsV == excepsSize) { |
1939 | excepsSize += 16; |
1940 | widths.excepsV = (GfxFontCIDWidthExcepV *)greallocn(p: widths.excepsV, count: excepsSize, size: sizeof(GfxFontCIDWidthExcepV)); |
1941 | } |
1942 | widths.excepsV[widths.nExcepsV].first = obj2.getInt(); |
1943 | widths.excepsV[widths.nExcepsV].last = obj3.getInt(); |
1944 | widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001; |
1945 | widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001; |
1946 | widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001; |
1947 | ++widths.nExcepsV; |
1948 | } else { |
1949 | error(category: errSyntaxError, pos: -1, msg: "Bad widths (W2) array in Type 0 font" ); |
1950 | } |
1951 | i += 5; |
1952 | } else if (obj2.isInt() && obj3.isArray()) { |
1953 | if (widths.nExcepsV + obj3.arrayGetLength() / 3 > excepsSize) { |
1954 | excepsSize = (widths.nExcepsV + obj3.arrayGetLength() / 3 + 15) & ~15; |
1955 | widths.excepsV = (GfxFontCIDWidthExcepV *)greallocn(p: widths.excepsV, count: excepsSize, size: sizeof(GfxFontCIDWidthExcepV)); |
1956 | } |
1957 | int j = obj2.getInt(); |
1958 | for (int k = 0; k < obj3.arrayGetLength(); k += 3) { |
1959 | if ((obj4 = obj3.arrayGet(i: k), obj4.isNum()) && (obj5 = obj3.arrayGet(i: k + 1), obj5.isNum()) && (obj6 = obj3.arrayGet(i: k + 2), obj6.isNum())) { |
1960 | widths.excepsV[widths.nExcepsV].first = j; |
1961 | widths.excepsV[widths.nExcepsV].last = j; |
1962 | widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001; |
1963 | widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001; |
1964 | widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001; |
1965 | ++j; |
1966 | ++widths.nExcepsV; |
1967 | } else { |
1968 | error(category: errSyntaxError, pos: -1, msg: "Bad widths (W2) array in Type 0 font" ); |
1969 | } |
1970 | } |
1971 | i += 2; |
1972 | } else { |
1973 | error(category: errSyntaxError, pos: -1, msg: "Bad widths (W2) array in Type 0 font" ); |
1974 | ++i; |
1975 | } |
1976 | } |
1977 | std::sort(first: widths.excepsV, last: widths.excepsV + widths.nExcepsV, comp: cmpWidthExcepVFunctor()); |
1978 | } |
1979 | |
1980 | ok = true; |
1981 | } |
1982 | |
1983 | GfxCIDFont::~GfxCIDFont() |
1984 | { |
1985 | if (collection) { |
1986 | delete collection; |
1987 | } |
1988 | if (ctu) { |
1989 | ctu->decRefCnt(); |
1990 | } |
1991 | gfree(p: widths.exceps); |
1992 | gfree(p: widths.excepsV); |
1993 | if (cidToGID) { |
1994 | gfree(p: cidToGID); |
1995 | } |
1996 | } |
1997 | |
1998 | int GfxCIDFont::getNextChar(const char *s, int len, CharCode *code, Unicode const **u, int *uLen, double *dx, double *dy, double *ox, double *oy) const |
1999 | { |
2000 | CID cid; |
2001 | CharCode dummy; |
2002 | double w, h, vx, vy; |
2003 | int n, a, b, m; |
2004 | |
2005 | if (!cMap) { |
2006 | *code = 0; |
2007 | *uLen = 0; |
2008 | *dx = *dy = *ox = *oy = 0; |
2009 | return 1; |
2010 | } |
2011 | |
2012 | *code = (CharCode)(cid = cMap->getCID(s, len, c: &dummy, nUsed: &n)); |
2013 | if (ctu) { |
2014 | if (hasToUnicode) { |
2015 | int i = 0, c = 0; |
2016 | while (i < n) { |
2017 | c = (c << 8) + (s[i] & 0xff); |
2018 | ++i; |
2019 | } |
2020 | *uLen = ctu->mapToUnicode(c, u); |
2021 | } else { |
2022 | *uLen = ctu->mapToUnicode(c: cid, u); |
2023 | } |
2024 | } else { |
2025 | *uLen = 0; |
2026 | } |
2027 | |
2028 | // horizontal |
2029 | if (cMap->getWMode() == 0) { |
2030 | w = getWidth(cid); |
2031 | h = vx = vy = 0; |
2032 | |
2033 | // vertical |
2034 | } else { |
2035 | w = 0; |
2036 | h = widths.defHeight; |
2037 | vx = getWidth(cid) / 2; |
2038 | vy = widths.defVY; |
2039 | if (widths.nExcepsV > 0 && cid >= widths.excepsV[0].first) { |
2040 | a = 0; |
2041 | b = widths.nExcepsV; |
2042 | // invariant: widths.excepsV[a].first <= cid < widths.excepsV[b].first |
2043 | while (b - a > 1) { |
2044 | m = (a + b) / 2; |
2045 | if (widths.excepsV[m].last <= cid) { |
2046 | a = m; |
2047 | } else { |
2048 | b = m; |
2049 | } |
2050 | } |
2051 | if (cid <= widths.excepsV[a].last) { |
2052 | h = widths.excepsV[a].height; |
2053 | vx = widths.excepsV[a].vx; |
2054 | vy = widths.excepsV[a].vy; |
2055 | } |
2056 | } |
2057 | } |
2058 | |
2059 | *dx = w; |
2060 | *dy = h; |
2061 | *ox = vx; |
2062 | *oy = vy; |
2063 | |
2064 | return n; |
2065 | } |
2066 | |
2067 | int GfxCIDFont::getWMode() const |
2068 | { |
2069 | return cMap ? cMap->getWMode() : 0; |
2070 | } |
2071 | |
2072 | const CharCodeToUnicode *GfxCIDFont::getToUnicode() const |
2073 | { |
2074 | return ctu; |
2075 | } |
2076 | |
2077 | const GooString *GfxCIDFont::getCollection() const |
2078 | { |
2079 | return cMap ? cMap->getCollection() : nullptr; |
2080 | } |
2081 | |
2082 | int GfxCIDFont::mapCodeToGID(FoFiTrueType *ff, int cmapi, Unicode unicode, bool wmode) |
2083 | { |
2084 | unsigned short gid = ff->mapCodeToGID(i: cmapi, c: unicode); |
2085 | if (wmode) { |
2086 | unsigned short vgid = ff->mapToVertGID(orgGID: gid); |
2087 | if (vgid != 0) { |
2088 | gid = vgid; |
2089 | } |
2090 | } |
2091 | return gid; |
2092 | } |
2093 | |
2094 | int *GfxCIDFont::getCodeToGIDMap(FoFiTrueType *ff, int *codeToGIDLen) |
2095 | { |
2096 | #define N_UCS_CANDIDATES 2 |
2097 | /* space characters */ |
2098 | static const unsigned long spaces[] = { 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x00A0, 0x200B, 0x2060, 0x3000, 0xFEFF, 0 }; |
2099 | static const char *adobe_cns1_cmaps[] = { "UniCNS-UTF32-V" , "UniCNS-UCS2-V" , "UniCNS-UTF32-H" , "UniCNS-UCS2-H" , nullptr }; |
2100 | static const char *adobe_gb1_cmaps[] = { "UniGB-UTF32-V" , "UniGB-UCS2-V" , "UniGB-UTF32-H" , "UniGB-UCS2-H" , nullptr }; |
2101 | static const char *adobe_japan1_cmaps[] = { "UniJIS-UTF32-V" , "UniJIS-UCS2-V" , "UniJIS-UTF32-H" , "UniJIS-UCS2-H" , nullptr }; |
2102 | static const char *adobe_japan2_cmaps[] = { "UniHojo-UTF32-V" , "UniHojo-UCS2-V" , "UniHojo-UTF32-H" , "UniHojo-UCS2-H" , nullptr }; |
2103 | static const char *adobe_korea1_cmaps[] = { "UniKS-UTF32-V" , "UniKS-UCS2-V" , "UniKS-UTF32-H" , "UniKS-UCS2-H" , nullptr }; |
2104 | static struct CMapListEntry |
2105 | { |
2106 | const char *collection; |
2107 | const char *scriptTag; |
2108 | const char *languageTag; |
2109 | const char *toUnicodeMap; |
2110 | const char **CMaps; |
2111 | } CMapList[] = { { |
2112 | .collection: "Adobe-CNS1" , |
2113 | .scriptTag: "hani" , |
2114 | .languageTag: "CHN " , |
2115 | .toUnicodeMap: "Adobe-CNS1-UCS2" , |
2116 | .CMaps: adobe_cns1_cmaps, |
2117 | }, |
2118 | { |
2119 | .collection: "Adobe-GB1" , |
2120 | .scriptTag: "hani" , |
2121 | .languageTag: "CHN " , |
2122 | .toUnicodeMap: "Adobe-GB1-UCS2" , |
2123 | .CMaps: adobe_gb1_cmaps, |
2124 | }, |
2125 | { |
2126 | .collection: "Adobe-Japan1" , |
2127 | .scriptTag: "kana" , |
2128 | .languageTag: "JAN " , |
2129 | .toUnicodeMap: "Adobe-Japan1-UCS2" , |
2130 | .CMaps: adobe_japan1_cmaps, |
2131 | }, |
2132 | { |
2133 | .collection: "Adobe-Japan2" , |
2134 | .scriptTag: "kana" , |
2135 | .languageTag: "JAN " , |
2136 | .toUnicodeMap: "Adobe-Japan2-UCS2" , |
2137 | .CMaps: adobe_japan2_cmaps, |
2138 | }, |
2139 | { |
2140 | .collection: "Adobe-Korea1" , |
2141 | .scriptTag: "hang" , |
2142 | .languageTag: "KOR " , |
2143 | .toUnicodeMap: "Adobe-Korea1-UCS2" , |
2144 | .CMaps: adobe_korea1_cmaps, |
2145 | }, |
2146 | { .collection: nullptr, .scriptTag: nullptr, .languageTag: nullptr, .toUnicodeMap: nullptr, .CMaps: nullptr } }; |
2147 | Unicode *humap = nullptr; |
2148 | Unicode *vumap = nullptr; |
2149 | Unicode *tumap = nullptr; |
2150 | int *codeToGID = nullptr; |
2151 | int i; |
2152 | unsigned long code; |
2153 | int wmode; |
2154 | const char **cmapName; |
2155 | CMapListEntry *lp; |
2156 | int cmap; |
2157 | int cmapPlatform, cmapEncoding; |
2158 | Ref embID; |
2159 | |
2160 | *codeToGIDLen = 0; |
2161 | if (!ctu || !getCollection()) { |
2162 | return nullptr; |
2163 | } |
2164 | |
2165 | if (getEmbeddedFontID(embID: &embID)) { |
2166 | if (getCollection()->cmp(sA: "Adobe-Identity" ) == 0) { |
2167 | return nullptr; |
2168 | } |
2169 | |
2170 | /* if this font is embedded font, |
2171 | * CIDToGIDMap should be embedded in PDF file |
2172 | * and already set. So return it. |
2173 | */ |
2174 | *codeToGIDLen = getCIDToGIDLen(); |
2175 | return getCIDToGID(); |
2176 | } |
2177 | |
2178 | /* we use only unicode cmap */ |
2179 | cmap = -1; |
2180 | for (i = 0; i < ff->getNumCmaps(); ++i) { |
2181 | cmapPlatform = ff->getCmapPlatform(i); |
2182 | cmapEncoding = ff->getCmapEncoding(i); |
2183 | if (cmapPlatform == 3 && cmapEncoding == 10) { |
2184 | /* UCS-4 */ |
2185 | cmap = i; |
2186 | /* use UCS-4 cmap */ |
2187 | break; |
2188 | } else if (cmapPlatform == 3 && cmapEncoding == 1) { |
2189 | /* Unicode */ |
2190 | cmap = i; |
2191 | } else if (cmapPlatform == 0 && cmap < 0) { |
2192 | cmap = i; |
2193 | } |
2194 | } |
2195 | if (cmap < 0) { |
2196 | return nullptr; |
2197 | } |
2198 | |
2199 | wmode = getWMode(); |
2200 | for (lp = CMapList; lp->collection != nullptr; lp++) { |
2201 | if (strcmp(s1: lp->collection, s2: getCollection()->c_str()) == 0) { |
2202 | break; |
2203 | } |
2204 | } |
2205 | const unsigned int n = 65536; |
2206 | humap = new Unicode[n * N_UCS_CANDIDATES]; |
2207 | memset(s: humap, c: 0, n: sizeof(Unicode) * n * N_UCS_CANDIDATES); |
2208 | if (lp->collection != nullptr) { |
2209 | CharCodeToUnicode *tctu; |
2210 | GooString tname(lp->toUnicodeMap); |
2211 | |
2212 | if ((tctu = CharCodeToUnicode::parseCMapFromFile(fileName: &tname, nBits: 16)) != nullptr) { |
2213 | tumap = new Unicode[n]; |
2214 | CharCode cid; |
2215 | for (cid = 0; cid < n; cid++) { |
2216 | int len; |
2217 | const Unicode *ucodes; |
2218 | |
2219 | len = tctu->mapToUnicode(c: cid, u: &ucodes); |
2220 | if (len == 1) { |
2221 | tumap[cid] = ucodes[0]; |
2222 | } else { |
2223 | /* if not single character, ignore it */ |
2224 | tumap[cid] = 0; |
2225 | } |
2226 | } |
2227 | delete tctu; |
2228 | } |
2229 | vumap = new Unicode[n]; |
2230 | memset(s: vumap, c: 0, n: sizeof(Unicode) * n); |
2231 | for (cmapName = lp->CMaps; *cmapName != nullptr; cmapName++) { |
2232 | GooString cname(*cmapName); |
2233 | |
2234 | std::shared_ptr<CMap> cnameCMap; |
2235 | if ((cnameCMap = globalParams->getCMap(collection: getCollection(), cMapName: &cname)) != nullptr) { |
2236 | if (cnameCMap->getWMode()) { |
2237 | cnameCMap->setReverseMap(rmap: vumap, rmapSize: n, ncand: 1); |
2238 | } else { |
2239 | cnameCMap->setReverseMap(rmap: humap, rmapSize: n, N_UCS_CANDIDATES); |
2240 | } |
2241 | } |
2242 | } |
2243 | ff->setupGSUB(scriptName: lp->scriptTag, languageName: lp->languageTag); |
2244 | } else { |
2245 | if (getCollection()->cmp(sA: "Adobe-Identity" ) == 0) { |
2246 | error(category: errSyntaxError, pos: -1, msg: "non-embedded font using identity encoding: {0:s}" , name ? name->c_str() : "(null)" ); |
2247 | } else { |
2248 | error(category: errSyntaxError, pos: -1, msg: "Unknown character collection {0:t}\n" , getCollection()); |
2249 | } |
2250 | if (ctu) { |
2251 | CharCode cid; |
2252 | for (cid = 0; cid < n; cid++) { |
2253 | const Unicode *ucode; |
2254 | |
2255 | if (ctu->mapToUnicode(c: cid, u: &ucode)) { |
2256 | humap[cid * N_UCS_CANDIDATES] = ucode[0]; |
2257 | } else { |
2258 | humap[cid * N_UCS_CANDIDATES] = 0; |
2259 | } |
2260 | for (i = 1; i < N_UCS_CANDIDATES; i++) { |
2261 | humap[cid * N_UCS_CANDIDATES + i] = 0; |
2262 | } |
2263 | } |
2264 | } |
2265 | } |
2266 | // map CID -> Unicode -> GID |
2267 | codeToGID = (int *)gmallocn(count: n, size: sizeof(int)); |
2268 | for (code = 0; code < n; ++code) { |
2269 | Unicode unicode; |
2270 | unsigned long gid; |
2271 | |
2272 | unicode = 0; |
2273 | gid = 0; |
2274 | if (humap != nullptr) { |
2275 | for (i = 0; i < N_UCS_CANDIDATES && gid == 0 && (unicode = humap[code * N_UCS_CANDIDATES + i]) != 0; i++) { |
2276 | gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode: false); |
2277 | } |
2278 | } |
2279 | if (gid == 0 && vumap != nullptr) { |
2280 | unicode = vumap[code]; |
2281 | if (unicode != 0) { |
2282 | gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode: true); |
2283 | if (gid == 0 && tumap != nullptr) { |
2284 | if ((unicode = tumap[code]) != 0) { |
2285 | gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode: true); |
2286 | } |
2287 | } |
2288 | } |
2289 | } |
2290 | if (gid == 0 && tumap != nullptr) { |
2291 | if ((unicode = tumap[code]) != 0) { |
2292 | gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode: false); |
2293 | } |
2294 | } |
2295 | if (gid == 0) { |
2296 | /* special handling space characters */ |
2297 | const unsigned long *p; |
2298 | |
2299 | if (humap != nullptr) { |
2300 | unicode = humap[code]; |
2301 | } |
2302 | if (unicode != 0) { |
2303 | /* check if code is space character , so map code to 0x0020 */ |
2304 | for (p = spaces; *p != 0; p++) { |
2305 | if (*p == unicode) { |
2306 | unicode = 0x20; |
2307 | gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode); |
2308 | break; |
2309 | } |
2310 | } |
2311 | } |
2312 | } |
2313 | codeToGID[code] = gid; |
2314 | } |
2315 | *codeToGIDLen = n; |
2316 | if (humap != nullptr) { |
2317 | delete[] humap; |
2318 | } |
2319 | if (tumap != nullptr) { |
2320 | delete[] tumap; |
2321 | } |
2322 | if (vumap != nullptr) { |
2323 | delete[] vumap; |
2324 | } |
2325 | return codeToGID; |
2326 | } |
2327 | |
2328 | double GfxCIDFont::getWidth(CID cid) const |
2329 | { |
2330 | double w; |
2331 | int a, b, m; |
2332 | |
2333 | w = widths.defWidth; |
2334 | if (widths.nExceps > 0 && cid >= widths.exceps[0].first) { |
2335 | a = 0; |
2336 | b = widths.nExceps; |
2337 | // invariant: widths.exceps[a].first <= cid < widths.exceps[b].first |
2338 | while (b - a > 1) { |
2339 | m = (a + b) / 2; |
2340 | if (widths.exceps[m].first <= cid) { |
2341 | a = m; |
2342 | } else { |
2343 | b = m; |
2344 | } |
2345 | } |
2346 | if (cid <= widths.exceps[a].last) { |
2347 | w = widths.exceps[a].width; |
2348 | } |
2349 | } |
2350 | return w; |
2351 | } |
2352 | |
2353 | double GfxCIDFont::getWidth(char *s, int len) const |
2354 | { |
2355 | int nUsed; |
2356 | CharCode c; |
2357 | |
2358 | CID cid = cMap->getCID(s, len, c: &c, nUsed: &nUsed); |
2359 | return getWidth(cid); |
2360 | } |
2361 | |
2362 | //------------------------------------------------------------------------ |
2363 | // GfxFontDict |
2364 | //------------------------------------------------------------------------ |
2365 | |
2366 | GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict) |
2367 | { |
2368 | Ref r; |
2369 | |
2370 | fonts.resize(new_size: fontDict->getLength()); |
2371 | for (std::size_t i = 0; i < fonts.size(); ++i) { |
2372 | const Object &obj1 = fontDict->getValNF(i); |
2373 | Object obj2 = obj1.fetch(xref); |
2374 | if (obj2.isDict()) { |
2375 | if (obj1.isRef()) { |
2376 | r = obj1.getRef(); |
2377 | } else if (fontDictRef) { |
2378 | // legal generation numbers are five digits, so we use a |
2379 | // 6-digit number here |
2380 | r.gen = 100000 + fontDictRef->num; |
2381 | r.num = i; |
2382 | } else { |
2383 | // no indirect reference for this font, or for the containing |
2384 | // font dict, so hash the font and use that |
2385 | r.gen = 100000; |
2386 | r.num = hashFontObject(obj: &obj2); |
2387 | } |
2388 | fonts[i] = GfxFont::makeFont(xref, tagA: fontDict->getKey(i), idA: r, fontDict: obj2.getDict()); |
2389 | if (fonts[i] && !fonts[i]->isOk()) { |
2390 | // XXX: it may be meaningful to distinguish between |
2391 | // NULL and !isOk() so that when we do lookups |
2392 | // we can tell the difference between a missing font |
2393 | // and a font that is just !isOk() |
2394 | fonts[i].reset(); |
2395 | } |
2396 | } else { |
2397 | error(category: errSyntaxError, pos: -1, msg: "font resource is not a dictionary" ); |
2398 | fonts[i] = nullptr; |
2399 | } |
2400 | } |
2401 | } |
2402 | |
2403 | std::shared_ptr<GfxFont> GfxFontDict::lookup(const char *tag) const |
2404 | { |
2405 | for (const auto &font : fonts) { |
2406 | if (font && font->matches(tagA: tag)) { |
2407 | return font; |
2408 | } |
2409 | } |
2410 | return nullptr; |
2411 | } |
2412 | |
2413 | // FNV-1a hash |
2414 | class FNVHash |
2415 | { |
2416 | public: |
2417 | FNVHash() { h = 2166136261U; } |
2418 | |
2419 | void hash(char c) |
2420 | { |
2421 | h ^= c & 0xff; |
2422 | h *= 16777619; |
2423 | } |
2424 | |
2425 | void hash(const char *p, int n) |
2426 | { |
2427 | int i; |
2428 | for (i = 0; i < n; ++i) { |
2429 | hash(c: p[i]); |
2430 | } |
2431 | } |
2432 | |
2433 | int get31() { return (h ^ (h >> 31)) & 0x7fffffff; } |
2434 | |
2435 | private: |
2436 | unsigned int h; |
2437 | }; |
2438 | |
2439 | int GfxFontDict::hashFontObject(Object *obj) |
2440 | { |
2441 | FNVHash h; |
2442 | |
2443 | hashFontObject1(obj, h: &h); |
2444 | return h.get31(); |
2445 | } |
2446 | |
2447 | void GfxFontDict::hashFontObject1(const Object *obj, FNVHash *h) |
2448 | { |
2449 | const GooString *s; |
2450 | const char *p; |
2451 | double r; |
2452 | int n, i; |
2453 | |
2454 | switch (obj->getType()) { |
2455 | case objBool: |
2456 | h->hash(c: 'b'); |
2457 | h->hash(c: obj->getBool() ? 1 : 0); |
2458 | break; |
2459 | case objInt: |
2460 | h->hash(c: 'i'); |
2461 | n = obj->getInt(); |
2462 | h->hash(p: (char *)&n, n: sizeof(int)); |
2463 | break; |
2464 | case objReal: |
2465 | h->hash(c: 'r'); |
2466 | r = obj->getReal(); |
2467 | h->hash(p: (char *)&r, n: sizeof(double)); |
2468 | break; |
2469 | case objString: |
2470 | h->hash(c: 's'); |
2471 | s = obj->getString(); |
2472 | h->hash(p: s->c_str(), n: s->getLength()); |
2473 | break; |
2474 | case objName: |
2475 | h->hash(c: 'n'); |
2476 | p = obj->getName(); |
2477 | h->hash(p, n: (int)strlen(s: p)); |
2478 | break; |
2479 | case objNull: |
2480 | h->hash(c: 'z'); |
2481 | break; |
2482 | case objArray: |
2483 | h->hash(c: 'a'); |
2484 | n = obj->arrayGetLength(); |
2485 | h->hash(p: (char *)&n, n: sizeof(int)); |
2486 | for (i = 0; i < n; ++i) { |
2487 | const Object &obj2 = obj->arrayGetNF(i); |
2488 | hashFontObject1(obj: &obj2, h); |
2489 | } |
2490 | break; |
2491 | case objDict: |
2492 | h->hash(c: 'd'); |
2493 | n = obj->dictGetLength(); |
2494 | h->hash(p: (char *)&n, n: sizeof(int)); |
2495 | for (i = 0; i < n; ++i) { |
2496 | p = obj->dictGetKey(i); |
2497 | h->hash(p, n: (int)strlen(s: p)); |
2498 | const Object &obj2 = obj->dictGetValNF(i); |
2499 | hashFontObject1(obj: &obj2, h); |
2500 | } |
2501 | break; |
2502 | case objStream: |
2503 | // this should never happen - streams must be indirect refs |
2504 | break; |
2505 | case objRef: |
2506 | h->hash(c: 'f'); |
2507 | n = obj->getRefNum(); |
2508 | h->hash(p: (char *)&n, n: sizeof(int)); |
2509 | n = obj->getRefGen(); |
2510 | h->hash(p: (char *)&n, n: sizeof(int)); |
2511 | break; |
2512 | default: |
2513 | h->hash(c: 'u'); |
2514 | break; |
2515 | } |
2516 | } |
2517 | |