1//========================================================================
2//
3// GfxFont.cc
4//
5// Copyright 1996-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2005, 2006, 2008-2010, 2012, 2014, 2015, 2017-2023 Albert Astals Cid <aacid@kde.org>
17// Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com>
18// Copyright (C) 2006 Takashi Iwai <tiwai@suse.de>
19// Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
20// Copyright (C) 2007 Jeff Muizelaar <jeff@infidigm.net>
21// Copyright (C) 2007 Koji Otani <sho@bbr.jp>
22// Copyright (C) 2007 Ed Catmur <ed@catmur.co.uk>
23// Copyright (C) 2008 Jonathan Kew <jonathan_kew@sil.org>
24// Copyright (C) 2008 Ed Avis <eda@waniasset.com>
25// Copyright (C) 2008, 2010 Hib Eris <hib@hiberis.nl>
26// Copyright (C) 2009 Peter Kerzum <kerzum@yandex-team.ru>
27// Copyright (C) 2009, 2010 David Benjamin <davidben@mit.edu>
28// Copyright (C) 2011 Axel Strübing <axel.struebing@freenet.de>
29// Copyright (C) 2011, 2012, 2014 Adrian Johnson <ajohnson@redneon.com>
30// Copyright (C) 2012 Yi Yang <ahyangyi@gmail.com>
31// Copyright (C) 2012 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
32// Copyright (C) 2012, 2017 Thomas Freitag <Thomas.Freitag@alfa.de>
33// Copyright (C) 2013-2016, 2018 Jason Crain <jason@aquaticape.us>
34// Copyright (C) 2014 Olly Betts <olly@survex.com>
35// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
36// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
37// Copyright (C) 2019 LE GARREC Vincent <legarrec.vincent@gmail.com>
38// Copyright (C) 2021, 2022, 2024 Oliver Sander <oliver.sander@tu-dresden.de>
39// Copyright (C) 2023 Khaled Hosny <khaled@aliftype.com>
40// Copyright (C) 2024 Nelson Benítez León <nbenitezl@gmail.com>
41//
42// To see a description of the changes please see the Changelog file that
43// came with your tarball or type make ChangeLog if you are building from git
44//
45//========================================================================
46
47#include <config.h>
48
49#include <cstdio>
50#include <cstdlib>
51#include <cstring>
52#include <cctype>
53#include <cmath>
54#include <climits>
55#include <algorithm>
56#include "goo/gmem.h"
57#include "Error.h"
58#include "Object.h"
59#include "Dict.h"
60#include "GlobalParams.h"
61#include "CMap.h"
62#include "CharCodeToUnicode.h"
63#include "FontEncodingTables.h"
64#include "BuiltinFont.h"
65#include "UnicodeTypeTable.h"
66#include <fofi/FoFiIdentifier.h>
67#include <fofi/FoFiType1.h>
68#include <fofi/FoFiType1C.h>
69#include <fofi/FoFiTrueType.h>
70#include "GfxFont.h"
71#include "PSOutputDev.h"
72
73//------------------------------------------------------------------------
74
75struct Base14FontMapEntry
76{
77 const char *altName;
78 const char *base14Name;
79};
80
81static const Base14FontMapEntry base14FontMap[] = { { .altName: "Arial", .base14Name: "Helvetica" },
82 { .altName: "Arial,Bold", .base14Name: "Helvetica-Bold" },
83 { .altName: "Arial,BoldItalic", .base14Name: "Helvetica-BoldOblique" },
84 { .altName: "Arial,Italic", .base14Name: "Helvetica-Oblique" },
85 { .altName: "Arial-Bold", .base14Name: "Helvetica-Bold" },
86 { .altName: "Arial-BoldItalic", .base14Name: "Helvetica-BoldOblique" },
87 { .altName: "Arial-BoldItalicMT", .base14Name: "Helvetica-BoldOblique" },
88 { .altName: "Arial-BoldMT", .base14Name: "Helvetica-Bold" },
89 { .altName: "Arial-Italic", .base14Name: "Helvetica-Oblique" },
90 { .altName: "Arial-ItalicMT", .base14Name: "Helvetica-Oblique" },
91 { .altName: "ArialMT", .base14Name: "Helvetica" },
92 { .altName: "Courier", .base14Name: "Courier" },
93 { .altName: "Courier,Bold", .base14Name: "Courier-Bold" },
94 { .altName: "Courier,BoldItalic", .base14Name: "Courier-BoldOblique" },
95 { .altName: "Courier,Italic", .base14Name: "Courier-Oblique" },
96 { .altName: "Courier-Bold", .base14Name: "Courier-Bold" },
97 { .altName: "Courier-BoldOblique", .base14Name: "Courier-BoldOblique" },
98 { .altName: "Courier-Oblique", .base14Name: "Courier-Oblique" },
99 { .altName: "CourierNew", .base14Name: "Courier" },
100 { .altName: "CourierNew,Bold", .base14Name: "Courier-Bold" },
101 { .altName: "CourierNew,BoldItalic", .base14Name: "Courier-BoldOblique" },
102 { .altName: "CourierNew,Italic", .base14Name: "Courier-Oblique" },
103 { .altName: "CourierNew-Bold", .base14Name: "Courier-Bold" },
104 { .altName: "CourierNew-BoldItalic", .base14Name: "Courier-BoldOblique" },
105 { .altName: "CourierNew-Italic", .base14Name: "Courier-Oblique" },
106 { .altName: "CourierNewPS-BoldItalicMT", .base14Name: "Courier-BoldOblique" },
107 { .altName: "CourierNewPS-BoldMT", .base14Name: "Courier-Bold" },
108 { .altName: "CourierNewPS-ItalicMT", .base14Name: "Courier-Oblique" },
109 { .altName: "CourierNewPSMT", .base14Name: "Courier" },
110 { .altName: "Helvetica", .base14Name: "Helvetica" },
111 { .altName: "Helvetica,Bold", .base14Name: "Helvetica-Bold" },
112 { .altName: "Helvetica,BoldItalic", .base14Name: "Helvetica-BoldOblique" },
113 { .altName: "Helvetica,Italic", .base14Name: "Helvetica-Oblique" },
114 { .altName: "Helvetica-Bold", .base14Name: "Helvetica-Bold" },
115 { .altName: "Helvetica-BoldItalic", .base14Name: "Helvetica-BoldOblique" },
116 { .altName: "Helvetica-BoldOblique", .base14Name: "Helvetica-BoldOblique" },
117 { .altName: "Helvetica-Italic", .base14Name: "Helvetica-Oblique" },
118 { .altName: "Helvetica-Oblique", .base14Name: "Helvetica-Oblique" },
119 { .altName: "Symbol", .base14Name: "Symbol" },
120 { .altName: "Symbol,Bold", .base14Name: "Symbol" },
121 { .altName: "Symbol,BoldItalic", .base14Name: "Symbol" },
122 { .altName: "Symbol,Italic", .base14Name: "Symbol" },
123 { .altName: "SymbolMT", .base14Name: "Symbol" },
124 { .altName: "SymbolMT,Bold", .base14Name: "Symbol" },
125 { .altName: "SymbolMT,BoldItalic", .base14Name: "Symbol" },
126 { .altName: "SymbolMT,Italic", .base14Name: "Symbol" },
127 { .altName: "Times-Bold", .base14Name: "Times-Bold" },
128 { .altName: "Times-BoldItalic", .base14Name: "Times-BoldItalic" },
129 { .altName: "Times-Italic", .base14Name: "Times-Italic" },
130 { .altName: "Times-Roman", .base14Name: "Times-Roman" },
131 { .altName: "TimesNewRoman", .base14Name: "Times-Roman" },
132 { .altName: "TimesNewRoman,Bold", .base14Name: "Times-Bold" },
133 { .altName: "TimesNewRoman,BoldItalic", .base14Name: "Times-BoldItalic" },
134 { .altName: "TimesNewRoman,Italic", .base14Name: "Times-Italic" },
135 { .altName: "TimesNewRoman-Bold", .base14Name: "Times-Bold" },
136 { .altName: "TimesNewRoman-BoldItalic", .base14Name: "Times-BoldItalic" },
137 { .altName: "TimesNewRoman-Italic", .base14Name: "Times-Italic" },
138 { .altName: "TimesNewRomanPS", .base14Name: "Times-Roman" },
139 { .altName: "TimesNewRomanPS-Bold", .base14Name: "Times-Bold" },
140 { .altName: "TimesNewRomanPS-BoldItalic", .base14Name: "Times-BoldItalic" },
141 { .altName: "TimesNewRomanPS-BoldItalicMT", .base14Name: "Times-BoldItalic" },
142 { .altName: "TimesNewRomanPS-BoldMT", .base14Name: "Times-Bold" },
143 { .altName: "TimesNewRomanPS-Italic", .base14Name: "Times-Italic" },
144 { .altName: "TimesNewRomanPS-ItalicMT", .base14Name: "Times-Italic" },
145 { .altName: "TimesNewRomanPSMT", .base14Name: "Times-Roman" },
146 { .altName: "TimesNewRomanPSMT,Bold", .base14Name: "Times-Bold" },
147 { .altName: "TimesNewRomanPSMT,BoldItalic", .base14Name: "Times-BoldItalic" },
148 { .altName: "TimesNewRomanPSMT,Italic", .base14Name: "Times-Italic" },
149 { .altName: "ZapfDingbats", .base14Name: "ZapfDingbats" } };
150
151//------------------------------------------------------------------------
152
153// index: {fixed:0, sans-serif:4, serif:8} + bold*2 + italic
154// NB: must be in same order as psSubstFonts in PSOutputDev.cc
155static const char *base14SubstFonts[14] = { "Courier", "Courier-Oblique", "Courier-Bold", "Courier-BoldOblique", "Helvetica", "Helvetica-Oblique", "Helvetica-Bold", "Helvetica-BoldOblique", "Times-Roman", "Times-Italic", "Times-Bold",
156 "Times-BoldItalic",
157 // the last two are never used for substitution
158 "Symbol", "ZapfDingbats" };
159
160//------------------------------------------------------------------------
161
162static int parseCharName(char *charName, Unicode *uBuf, int uLen, bool names, bool ligatures, bool numeric, bool hex, bool variants);
163
164//------------------------------------------------------------------------
165
166static int readFromStream(void *data)
167{
168 return ((Stream *)data)->getChar();
169}
170
171//------------------------------------------------------------------------
172// GfxFontLoc
173//------------------------------------------------------------------------
174
175GfxFontLoc::GfxFontLoc()
176{
177 fontNum = 0;
178 substIdx = -1;
179}
180
181GfxFontLoc::~GfxFontLoc() = default;
182
183GfxFontLoc::GfxFontLoc(GfxFontLoc &&other) noexcept = default;
184
185GfxFontLoc &GfxFontLoc::operator=(GfxFontLoc &&other) noexcept = default;
186
187void GfxFontLoc::setPath(GooString *pathA)
188{
189 path = pathA->toStr();
190 delete pathA;
191}
192
193const GooString *GfxFontLoc::pathAsGooString() const
194{
195 return (const GooString *)(&path);
196}
197
198//------------------------------------------------------------------------
199// GfxFont
200//------------------------------------------------------------------------
201
202std::unique_ptr<GfxFont> GfxFont::makeFont(XRef *xref, const char *tagA, Ref idA, Dict *fontDict)
203{
204 std::optional<std::string> name;
205 Ref embFontIDA;
206 GfxFontType typeA;
207
208 // get base font name
209 Object obj1 = fontDict->lookup(key: "BaseFont");
210 if (obj1.isName()) {
211 name = obj1.getName();
212 }
213
214 // There is no BaseFont in Type 3 fonts, try fontDescriptor.FontName
215 if (!name) {
216 Object fontDesc = fontDict->lookup(key: "FontDescriptor");
217 if (fontDesc.isDict()) {
218 Object obj2 = fontDesc.dictLookup(key: "FontName");
219 if (obj2.isName()) {
220 name = obj2.getName();
221 }
222 }
223 }
224
225 // As a last resort try the Name key
226 if (!name) {
227 Object obj2 = fontDict->lookup(key: "Name");
228 if (obj2.isName()) {
229 name = obj2.getName();
230 }
231 }
232
233 // get embedded font ID and font type
234 typeA = getFontType(xref, fontDict, embID: &embFontIDA);
235
236 // create the font object
237 GfxFont *font;
238 if (typeA < fontCIDType0) {
239 font = new Gfx8BitFont(xref, tagA, idA, std::move(name), typeA, embFontIDA, fontDict);
240 } else {
241 font = new GfxCIDFont(xref, tagA, idA, std::move(name), typeA, embFontIDA, fontDict);
242 }
243
244 return std::unique_ptr<GfxFont>(font);
245}
246
247GfxFont::GfxFont(const char *tagA, Ref idA, std::optional<std::string> &&nameA, GfxFontType typeA, Ref embFontIDA) : tag(tagA), id(idA), name(std::move(nameA)), type(typeA)
248{
249 ok = false;
250 embFontID = embFontIDA;
251 embFontName = nullptr;
252 family = nullptr;
253 stretch = StretchNotDefined;
254 weight = WeightNotDefined;
255 hasToUnicode = false;
256}
257
258GfxFont::~GfxFont()
259{
260 delete family;
261 if (embFontName) {
262 delete embFontName;
263 }
264}
265
266bool GfxFont::isSubset() const
267{
268 if (name) {
269 unsigned int i;
270 for (i = 0; i < name->size(); ++i) {
271 if ((*name)[i] < 'A' || (*name)[i] > 'Z') {
272 break;
273 }
274 }
275 return i == 6 && name->size() > 7 && (*name)[6] == '+';
276 }
277 return false;
278}
279
280std::string GfxFont::getNameWithoutSubsetTag() const
281{
282 if (!name) {
283 return {};
284 }
285
286 if (!isSubset()) {
287 return *name;
288 }
289
290 return name->substr(pos: 7);
291}
292
293// This function extracts three pieces of information:
294// 1. the "expected" font type, i.e., the font type implied by
295// Font.Subtype, DescendantFont.Subtype, and
296// FontDescriptor.FontFile3.Subtype
297// 2. the embedded font object ID
298// 3. the actual font type - determined by examining the embedded font
299// if there is one, otherwise equal to the expected font type
300// If the expected and actual font types don't match, a warning
301// message is printed. The expected font type is not used for
302// anything else.
303GfxFontType GfxFont::getFontType(XRef *xref, Dict *fontDict, Ref *embID)
304{
305 GfxFontType t, expectedType;
306 FoFiIdentifierType fft;
307 Dict *fontDict2;
308 bool isType0, err;
309
310 t = fontUnknownType;
311 *embID = Ref::INVALID();
312 err = false;
313
314 Object subtype = fontDict->lookup(key: "Subtype");
315 expectedType = fontUnknownType;
316 isType0 = false;
317 if (subtype.isName(nameA: "Type1") || subtype.isName(nameA: "MMType1")) {
318 expectedType = fontType1;
319 } else if (subtype.isName(nameA: "Type1C")) {
320 expectedType = fontType1C;
321 } else if (subtype.isName(nameA: "Type3")) {
322 expectedType = fontType3;
323 } else if (subtype.isName(nameA: "TrueType")) {
324 expectedType = fontTrueType;
325 } else if (subtype.isName(nameA: "Type0")) {
326 isType0 = true;
327 } else {
328 error(category: errSyntaxWarning, pos: -1, msg: "Unknown font type: '{0:s}'", subtype.isName() ? subtype.getName() : "???");
329 }
330
331 fontDict2 = fontDict;
332 Object obj1 = fontDict->lookup(key: "DescendantFonts");
333 Object obj2; // Do not move to inside the if
334 // we need it around so that fontDict2 remains valid
335 if (obj1.isArray()) {
336 if (obj1.arrayGetLength() == 0) {
337 error(category: errSyntaxWarning, pos: -1, msg: "Empty DescendantFonts array in font");
338 } else {
339 obj2 = obj1.arrayGet(i: 0);
340 if (obj2.isDict()) {
341 if (!isType0) {
342 error(category: errSyntaxWarning, pos: -1, msg: "Non-CID font with DescendantFonts array");
343 }
344 fontDict2 = obj2.getDict();
345 subtype = fontDict2->lookup(key: "Subtype");
346 if (subtype.isName(nameA: "CIDFontType0")) {
347 if (isType0) {
348 expectedType = fontCIDType0;
349 }
350 } else if (subtype.isName(nameA: "CIDFontType2")) {
351 if (isType0) {
352 expectedType = fontCIDType2;
353 }
354 }
355 }
356 }
357 }
358
359 Object fontDesc = fontDict2->lookup(key: "FontDescriptor");
360 if (fontDesc.isDict()) {
361 Object obj3 = fontDesc.dictLookupNF(key: "FontFile").copy();
362 if (obj3.isRef()) {
363 *embID = obj3.getRef();
364 if (expectedType != fontType1) {
365 err = true;
366 }
367 }
368 if (*embID == Ref::INVALID() && (obj3 = fontDesc.dictLookupNF(key: "FontFile2").copy(), obj3.isRef())) {
369 *embID = obj3.getRef();
370 if (isType0) {
371 expectedType = fontCIDType2;
372 } else if (expectedType != fontTrueType) {
373 err = true;
374 }
375 }
376 if (*embID == Ref::INVALID() && (obj3 = fontDesc.dictLookupNF(key: "FontFile3").copy(), obj3.isRef())) {
377 *embID = obj3.getRef();
378 Object obj4 = obj3.fetch(xref);
379 if (obj4.isStream()) {
380 subtype = obj4.streamGetDict()->lookup(key: "Subtype");
381 if (subtype.isName(nameA: "Type1")) {
382 if (expectedType != fontType1) {
383 err = true;
384 expectedType = isType0 ? fontCIDType0 : fontType1;
385 }
386 } else if (subtype.isName(nameA: "Type1C")) {
387 if (expectedType == fontType1) {
388 expectedType = fontType1C;
389 } else if (expectedType != fontType1C) {
390 err = true;
391 expectedType = isType0 ? fontCIDType0C : fontType1C;
392 }
393 } else if (subtype.isName(nameA: "TrueType")) {
394 if (expectedType != fontTrueType) {
395 err = true;
396 expectedType = isType0 ? fontCIDType2 : fontTrueType;
397 }
398 } else if (subtype.isName(nameA: "CIDFontType0C")) {
399 if (expectedType == fontCIDType0) {
400 expectedType = fontCIDType0C;
401 } else {
402 err = true;
403 expectedType = isType0 ? fontCIDType0C : fontType1C;
404 }
405 } else if (subtype.isName(nameA: "OpenType")) {
406 if (expectedType == fontTrueType) {
407 expectedType = fontTrueTypeOT;
408 } else if (expectedType == fontType1) {
409 expectedType = fontType1COT;
410 } else if (expectedType == fontCIDType0) {
411 expectedType = fontCIDType0COT;
412 } else if (expectedType == fontCIDType2) {
413 expectedType = fontCIDType2OT;
414 } else {
415 err = true;
416 }
417 } else {
418 error(category: errSyntaxError, pos: -1, msg: "Unknown font type '{0:s}'", subtype.isName() ? subtype.getName() : "???");
419 }
420 }
421 }
422 }
423
424 t = fontUnknownType;
425 if (*embID != Ref::INVALID()) {
426 Object obj3(*embID);
427 Object obj4 = obj3.fetch(xref);
428 if (obj4.isStream()) {
429 obj4.streamReset();
430 fft = FoFiIdentifier::identifyStream(getChar: &readFromStream, data: obj4.getStream());
431 obj4.streamClose();
432 switch (fft) {
433 case fofiIdType1PFA:
434 case fofiIdType1PFB:
435 t = fontType1;
436 break;
437 case fofiIdCFF8Bit:
438 t = isType0 ? fontCIDType0C : fontType1C;
439 break;
440 case fofiIdCFFCID:
441 t = fontCIDType0C;
442 break;
443 case fofiIdTrueType:
444 case fofiIdTrueTypeCollection:
445 t = isType0 ? fontCIDType2 : fontTrueType;
446 break;
447 case fofiIdOpenTypeCFF8Bit:
448 t = isType0 ? fontCIDType0COT : fontType1COT;
449 break;
450 case fofiIdOpenTypeCFFCID:
451 t = fontCIDType0COT;
452 break;
453 default:
454 error(category: errSyntaxError, pos: -1, msg: "Embedded font file may be invalid");
455 break;
456 }
457 }
458 }
459
460 if (t == fontUnknownType) {
461 t = expectedType;
462 }
463
464 if (t != expectedType) {
465 err = true;
466 }
467
468 if (err) {
469 error(category: errSyntaxWarning, pos: -1, msg: "Mismatch between font type and embedded font file");
470 }
471
472 return t;
473}
474
475void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict)
476{
477 double t;
478
479 // assume Times-Roman by default (for substitution purposes)
480 flags = fontSerif;
481
482 missingWidth = 0;
483
484 Object obj1 = fontDict->lookup(key: "FontDescriptor");
485 if (obj1.isDict()) {
486
487 // get flags
488 Object obj2 = obj1.dictLookup(key: "Flags");
489 if (obj2.isInt()) {
490 flags = obj2.getInt();
491 }
492
493 // get name
494 obj2 = obj1.dictLookup(key: "FontName");
495 if (obj2.isName()) {
496 embFontName = new GooString(obj2.getName());
497 }
498 if (embFontName == nullptr) {
499 // get name with typo
500 obj2 = obj1.dictLookup(key: "Fontname");
501 if (obj2.isName()) {
502 embFontName = new GooString(obj2.getName());
503 error(category: errSyntaxWarning, pos: -1, msg: "The file uses Fontname instead of FontName please notify the creator that the file is broken");
504 }
505 }
506
507 // get family
508 obj2 = obj1.dictLookup(key: "FontFamily");
509 if (obj2.isString()) {
510 family = new GooString(obj2.getString());
511 }
512
513 // get stretch
514 obj2 = obj1.dictLookup(key: "FontStretch");
515 if (obj2.isName()) {
516 if (strcmp(s1: obj2.getName(), s2: "UltraCondensed") == 0) {
517 stretch = UltraCondensed;
518 } else if (strcmp(s1: obj2.getName(), s2: "ExtraCondensed") == 0) {
519 stretch = ExtraCondensed;
520 } else if (strcmp(s1: obj2.getName(), s2: "Condensed") == 0) {
521 stretch = Condensed;
522 } else if (strcmp(s1: obj2.getName(), s2: "SemiCondensed") == 0) {
523 stretch = SemiCondensed;
524 } else if (strcmp(s1: obj2.getName(), s2: "Normal") == 0) {
525 stretch = Normal;
526 } else if (strcmp(s1: obj2.getName(), s2: "SemiExpanded") == 0) {
527 stretch = SemiExpanded;
528 } else if (strcmp(s1: obj2.getName(), s2: "Expanded") == 0) {
529 stretch = Expanded;
530 } else if (strcmp(s1: obj2.getName(), s2: "ExtraExpanded") == 0) {
531 stretch = ExtraExpanded;
532 } else if (strcmp(s1: obj2.getName(), s2: "UltraExpanded") == 0) {
533 stretch = UltraExpanded;
534 } else {
535 error(category: errSyntaxWarning, pos: -1, msg: "Invalid Font Stretch");
536 }
537 }
538
539 // get weight
540 obj2 = obj1.dictLookup(key: "FontWeight");
541 if (obj2.isNum()) {
542 if (obj2.getNum() == 100) {
543 weight = W100;
544 } else if (obj2.getNum() == 200) {
545 weight = W200;
546 } else if (obj2.getNum() == 300) {
547 weight = W300;
548 } else if (obj2.getNum() == 400) {
549 weight = W400;
550 } else if (obj2.getNum() == 500) {
551 weight = W500;
552 } else if (obj2.getNum() == 600) {
553 weight = W600;
554 } else if (obj2.getNum() == 700) {
555 weight = W700;
556 } else if (obj2.getNum() == 800) {
557 weight = W800;
558 } else if (obj2.getNum() == 900) {
559 weight = W900;
560 } else {
561 error(category: errSyntaxWarning, pos: -1, msg: "Invalid Font Weight");
562 }
563 }
564
565 // look for MissingWidth
566 obj2 = obj1.dictLookup(key: "MissingWidth");
567 if (obj2.isNum()) {
568 missingWidth = obj2.getNum();
569 }
570
571 // get Ascent and Descent
572 obj2 = obj1.dictLookup(key: "Ascent");
573 if (obj2.isNum()) {
574 t = 0.001 * obj2.getNum();
575 // some broken font descriptors specify a negative ascent
576 if (t < 0) {
577 t = -t;
578 }
579 // some broken font descriptors set ascent and descent to 0;
580 // others set it to ridiculous values (e.g., 32768)
581 if (t != 0 && t < 3) {
582 ascent = t;
583 }
584 }
585 obj2 = obj1.dictLookup(key: "Descent");
586 if (obj2.isNum()) {
587 t = 0.001 * obj2.getNum();
588 // some broken font descriptors specify a positive descent
589 if (t > 0) {
590 t = -t;
591 }
592 // some broken font descriptors set ascent and descent to 0
593 if (t != 0 && t > -3) {
594 descent = t;
595 }
596 }
597
598 // font FontBBox
599 obj2 = obj1.dictLookup(key: "FontBBox");
600 if (obj2.isArray()) {
601 for (int i = 0; i < 4 && i < obj2.arrayGetLength(); ++i) {
602 Object obj3 = obj2.arrayGet(i);
603 if (obj3.isNum()) {
604 fontBBox[i] = 0.001 * obj3.getNum();
605 }
606 }
607 }
608 }
609}
610
611CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits, CharCodeToUnicode *ctu)
612{
613 GooString *buf;
614
615 Object obj1 = fontDict->lookup(key: "ToUnicode");
616 if (!obj1.isStream()) {
617 return nullptr;
618 }
619 buf = new GooString();
620 obj1.getStream()->fillGooString(s: buf);
621 obj1.streamClose();
622 if (ctu) {
623 ctu->mergeCMap(buf, nBits);
624 } else {
625 ctu = CharCodeToUnicode::parseCMap(buf, nBits);
626 }
627 hasToUnicode = true;
628 delete buf;
629 return ctu;
630}
631
632std::optional<GfxFontLoc> GfxFont::locateFont(XRef *xref, PSOutputDev *ps, GooString *substituteFontName)
633{
634 SysFontType sysFontType;
635 GooString *path, *base14Name;
636 int substIdx, fontNum;
637 bool embed;
638
639 if (type == fontType3) {
640 return std::nullopt;
641 }
642
643 //----- embedded font
644 if (embFontID != Ref::INVALID()) {
645 embed = true;
646 Object refObj(embFontID);
647 Object embFontObj = refObj.fetch(xref);
648 if (!embFontObj.isStream()) {
649 error(category: errSyntaxError, pos: -1, msg: "Embedded font object is wrong type");
650 embed = false;
651 }
652 if (embed) {
653 if (ps) {
654 switch (type) {
655 case fontType1:
656 case fontType1C:
657 case fontType1COT:
658 embed = ps->getEmbedType1();
659 break;
660 case fontTrueType:
661 case fontTrueTypeOT:
662 embed = ps->getEmbedTrueType();
663 break;
664 case fontCIDType0C:
665 case fontCIDType0COT:
666 embed = ps->getEmbedCIDPostScript();
667 break;
668 case fontCIDType2:
669 case fontCIDType2OT:
670 embed = ps->getEmbedCIDTrueType();
671 break;
672 default:
673 break;
674 }
675 }
676 if (embed) {
677 GfxFontLoc fontLoc;
678 fontLoc.locType = gfxFontLocEmbedded;
679 fontLoc.fontType = type;
680 fontLoc.embFontID = embFontID;
681 return fontLoc;
682 }
683 }
684 }
685
686 //----- PS passthrough
687 if (ps && !isCIDFont() && ps->getFontPassthrough()) {
688 GfxFontLoc fontLoc;
689 fontLoc.locType = gfxFontLocResident;
690 fontLoc.fontType = fontType1;
691 fontLoc.path = *name;
692 return fontLoc;
693 }
694
695 //----- PS resident Base-14 font
696 if (ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
697 GfxFontLoc fontLoc;
698 fontLoc.locType = gfxFontLocResident;
699 fontLoc.fontType = fontType1;
700 fontLoc.path = ((Gfx8BitFont *)this)->base14->base14Name;
701 return fontLoc;
702 }
703
704 //----- external font file (fontFile, fontDir)
705 if (name && (path = globalParams->findFontFile(fontName: *name))) {
706 if (std::optional<GfxFontLoc> fontLoc = getExternalFont(path, cid: isCIDFont())) {
707 return fontLoc;
708 }
709 }
710
711 //----- external font file for Base-14 font
712 if (!ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
713 base14Name = new GooString(((Gfx8BitFont *)this)->base14->base14Name);
714 if ((path = globalParams->findBase14FontFile(base14Name, font: this, substituteFontName))) {
715 if (std::optional<GfxFontLoc> fontLoc = getExternalFont(path, cid: false)) {
716 delete base14Name;
717 return fontLoc;
718 }
719 }
720 delete base14Name;
721 }
722
723 //----- system font
724 if ((path = globalParams->findSystemFontFile(font: this, type: &sysFontType, fontNum: &fontNum, substituteFontName))) {
725 if (isCIDFont()) {
726 if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
727 GfxFontLoc fontLoc;
728 fontLoc.locType = gfxFontLocExternal;
729 fontLoc.fontType = fontCIDType2;
730 fontLoc.setPath(path);
731 fontLoc.fontNum = fontNum;
732 return fontLoc;
733 }
734 } else {
735 GfxFontLoc fontLoc;
736 fontLoc.setPath(path);
737 fontLoc.locType = gfxFontLocExternal;
738 if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
739 fontLoc.fontType = fontTrueType;
740 } else if (sysFontType == sysFontPFA || sysFontType == sysFontPFB) {
741 fontLoc.fontType = fontType1;
742 fontLoc.fontNum = fontNum;
743 }
744 return fontLoc;
745 }
746 delete path;
747 }
748
749 if (!isCIDFont()) {
750
751 //----- 8-bit font substitution
752 if (flags & fontFixedWidth) {
753 substIdx = 0;
754 } else if (flags & fontSerif) {
755 substIdx = 8;
756 } else {
757 substIdx = 4;
758 }
759 if (isBold()) {
760 substIdx += 2;
761 }
762 if (isItalic()) {
763 substIdx += 1;
764 }
765 const std::string substName = base14SubstFonts[substIdx];
766 if (ps) {
767 error(category: errSyntaxWarning, pos: -1, msg: "Substituting font '{0:s}' for '{1:s}'", base14SubstFonts[substIdx], name ? name->c_str() : "null");
768 GfxFontLoc fontLoc;
769 fontLoc.locType = gfxFontLocResident;
770 fontLoc.fontType = fontType1;
771 fontLoc.path = substName;
772 fontLoc.substIdx = substIdx;
773 return fontLoc;
774 } else {
775 path = globalParams->findFontFile(fontName: substName);
776 if (path) {
777 if (std::optional<GfxFontLoc> fontLoc = getExternalFont(path, cid: false)) {
778 error(category: errSyntaxWarning, pos: -1, msg: "Substituting font '{0:s}' for '{1:s}'", base14SubstFonts[substIdx], name ? name->c_str() : "");
779 name = base14SubstFonts[substIdx];
780 fontLoc->substIdx = substIdx;
781 return fontLoc;
782 }
783 }
784 }
785
786 // failed to find a substitute font
787 return std::nullopt;
788 }
789
790 // failed to find a substitute font
791 return std::nullopt;
792}
793
794std::optional<GfxFontLoc> GfxFont::getExternalFont(GooString *path, bool cid)
795{
796 FoFiIdentifierType fft;
797 GfxFontType fontType;
798
799 fft = FoFiIdentifier::identifyFile(fileName: path->c_str());
800 switch (fft) {
801 case fofiIdType1PFA:
802 case fofiIdType1PFB:
803 fontType = fontType1;
804 break;
805 case fofiIdCFF8Bit:
806 fontType = fontType1C;
807 break;
808 case fofiIdCFFCID:
809 fontType = fontCIDType0C;
810 break;
811 case fofiIdTrueType:
812 case fofiIdTrueTypeCollection:
813 fontType = cid ? fontCIDType2 : fontTrueType;
814 break;
815 case fofiIdOpenTypeCFF8Bit:
816 fontType = fontType1COT;
817 break;
818 case fofiIdOpenTypeCFFCID:
819 fontType = fontCIDType0COT;
820 break;
821 case fofiIdUnknown:
822 case fofiIdError:
823 default:
824 fontType = fontUnknownType;
825 break;
826 }
827 if (fontType == fontUnknownType || (cid ? (fontType < fontCIDType0) : (fontType >= fontCIDType0))) {
828 delete path;
829 return std::nullopt;
830 }
831 GfxFontLoc fontLoc;
832 fontLoc.locType = gfxFontLocExternal;
833 fontLoc.fontType = fontType;
834 fontLoc.setPath(path);
835 return fontLoc;
836}
837
838std::optional<std::vector<unsigned char>> GfxFont::readEmbFontFile(XRef *xref)
839{
840 Stream *str;
841
842 Object obj1(embFontID);
843 Object obj2 = obj1.fetch(xref);
844 if (!obj2.isStream()) {
845 error(category: errSyntaxError, pos: -1, msg: "Embedded font file is not a stream");
846 embFontID = Ref::INVALID();
847 return {};
848 }
849 str = obj2.getStream();
850
851 std::vector<unsigned char> buf = str->toUnsignedChars();
852 str->close();
853
854 return buf;
855}
856
857struct AlternateNameMap
858{
859 const char *name;
860 const char *alt;
861};
862
863static const AlternateNameMap alternateNameMap[] = { { .name: "fi", .alt: "f_i" }, { .name: "fl", .alt: "f_l" }, { .name: "ff", .alt: "f_f" }, { .name: "ffi", .alt: "f_f_i" }, { .name: "ffl", .alt: "f_f_l" }, { .name: nullptr, .alt: nullptr } };
864
865const char *GfxFont::getAlternateName(const char *name)
866{
867 const AlternateNameMap *map = alternateNameMap;
868 while (map->name) {
869 if (strcmp(s1: name, s2: map->name) == 0) {
870 return map->alt;
871 }
872 map++;
873 }
874 return nullptr;
875}
876
877//------------------------------------------------------------------------
878// Gfx8BitFont
879//------------------------------------------------------------------------
880
881// Parse character names of the form 'Axx', 'xx', 'Ann', 'ABnn', or
882// 'nn', where 'A' and 'B' are any letters, 'xx' is two hex digits,
883// and 'nn' is decimal digits.
884static bool parseNumericName(const char *s, bool hex, unsigned int *u)
885{
886 char *endptr;
887
888 // Strip leading alpha characters.
889 if (hex) {
890 int n = 0;
891
892 // Get string length while ignoring junk at end.
893 while (isalnum(s[n])) {
894 ++n;
895 }
896
897 // Only 2 hex characters with optional leading alpha is allowed.
898 if (n == 3 && isalpha(*s)) {
899 ++s;
900 } else if (n != 2) {
901 return false;
902 }
903 } else {
904 // Strip up to two alpha characters.
905 for (int i = 0; i < 2 && isalpha(*s); ++i) {
906 ++s;
907 }
908 }
909
910 int v = strtol(nptr: s, endptr: &endptr, base: hex ? 16 : 10);
911
912 if (endptr == s) {
913 return false;
914 }
915
916 // Skip trailing junk characters.
917 while (*endptr != '\0' && !isalnum(*endptr)) {
918 ++endptr;
919 }
920
921 if (*endptr == '\0') {
922 if (u) {
923 *u = v;
924 }
925 return true;
926 }
927 return false;
928}
929
930// Returns true if the font has character names like xx or Axx which
931// should be parsed for hex or decimal values.
932static bool testForNumericNames(Dict *fontDict, bool hex)
933{
934 bool numeric = true;
935
936 Object enc = fontDict->lookup(key: "Encoding");
937 if (!enc.isDict()) {
938 return false;
939 }
940
941 Object diff = enc.dictLookup(key: "Differences");
942 if (!diff.isArray()) {
943 return false;
944 }
945
946 for (int i = 0; i < diff.arrayGetLength() && numeric; ++i) {
947 Object obj = diff.arrayGet(i);
948 if (obj.isInt()) {
949 // All sequences must start between character codes 0 and 5.
950 if (obj.getInt() > 5) {
951 numeric = false;
952 }
953 } else if (obj.isName()) {
954 // All character names must successfully parse.
955 if (!parseNumericName(s: obj.getName(), hex, u: nullptr)) {
956 numeric = false;
957 }
958 } else {
959 numeric = false;
960 }
961 }
962
963 return numeric;
964}
965
966Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, std::optional<std::string> &&nameA, GfxFontType typeA, Ref embFontIDA, Dict *fontDict) : GfxFont(tagA, idA, std::move(nameA), typeA, embFontIDA)
967{
968 const BuiltinFont *builtinFont;
969 const char **baseEnc;
970 bool baseEncFromFontFile;
971 int len;
972 FoFiType1 *ffT1;
973 FoFiType1C *ffT1C;
974 char *charName;
975 bool missing, hex;
976 bool numeric;
977 Unicode toUnicode[256];
978 Unicode uBuf[8];
979 double mul;
980 int firstChar, lastChar;
981 unsigned short w;
982 Object obj1;
983 int n, a, b, m;
984
985 ctu = nullptr;
986
987 // do font name substitution for various aliases of the Base 14 font
988 // names
989 base14 = nullptr;
990 if (name) {
991 std::string name2 = *name;
992 size_t i = 0;
993 while (i < name2.size()) {
994 if (name2[i] == ' ') {
995 name2.erase(pos: i, n: 1);
996 } else {
997 ++i;
998 }
999 }
1000 a = 0;
1001 b = sizeof(base14FontMap) / sizeof(Base14FontMapEntry);
1002 // invariant: base14FontMap[a].altName <= name2 < base14FontMap[b].altName
1003 while (b - a > 1) {
1004 m = (a + b) / 2;
1005 if (name2.compare(s: base14FontMap[m].altName) >= 0) {
1006 a = m;
1007 } else {
1008 b = m;
1009 }
1010 }
1011 if (name2 == base14FontMap[a].altName) {
1012 base14 = &base14FontMap[a];
1013 }
1014 }
1015
1016 // is it a built-in font?
1017 builtinFont = nullptr;
1018 if (base14) {
1019 for (const BuiltinFont &bf : builtinFonts) {
1020 if (!strcmp(s1: base14->base14Name, s2: bf.name)) {
1021 builtinFont = &bf;
1022 break;
1023 }
1024 }
1025 }
1026
1027 // default ascent/descent values
1028 if (builtinFont) {
1029 ascent = 0.001 * builtinFont->ascent;
1030 descent = 0.001 * builtinFont->descent;
1031 fontBBox[0] = 0.001 * builtinFont->bbox[0];
1032 fontBBox[1] = 0.001 * builtinFont->bbox[1];
1033 fontBBox[2] = 0.001 * builtinFont->bbox[2];
1034 fontBBox[3] = 0.001 * builtinFont->bbox[3];
1035 } else {
1036 ascent = 0.95;
1037 descent = -0.35;
1038 fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
1039 }
1040
1041 // get info from font descriptor
1042 readFontDescriptor(xref, fontDict);
1043
1044 // for non-embedded fonts, don't trust the ascent/descent/bbox
1045 // values from the font descriptor
1046 if (builtinFont && embFontID == Ref::INVALID()) {
1047 ascent = 0.001 * builtinFont->ascent;
1048 descent = 0.001 * builtinFont->descent;
1049 fontBBox[0] = 0.001 * builtinFont->bbox[0];
1050 fontBBox[1] = 0.001 * builtinFont->bbox[1];
1051 fontBBox[2] = 0.001 * builtinFont->bbox[2];
1052 fontBBox[3] = 0.001 * builtinFont->bbox[3];
1053 }
1054
1055 // get font matrix
1056 fontMat[0] = fontMat[3] = 1;
1057 fontMat[1] = fontMat[2] = fontMat[4] = fontMat[5] = 0;
1058 obj1 = fontDict->lookup(key: "FontMatrix");
1059 if (obj1.isArray()) {
1060 for (int i = 0; i < 6 && i < obj1.arrayGetLength(); ++i) {
1061 Object obj2 = obj1.arrayGet(i);
1062 if (obj2.isNum()) {
1063 fontMat[i] = obj2.getNum();
1064 }
1065 }
1066 }
1067
1068 // get Type 3 bounding box, font definition, and resources
1069 if (type == fontType3) {
1070 obj1 = fontDict->lookup(key: "FontBBox");
1071 if (obj1.isArray()) {
1072 for (int i = 0; i < 4 && i < obj1.arrayGetLength(); ++i) {
1073 Object obj2 = obj1.arrayGet(i);
1074 if (obj2.isNum()) {
1075 fontBBox[i] = obj2.getNum();
1076 }
1077 }
1078 }
1079 charProcs = fontDict->lookup(key: "CharProcs");
1080 if (!charProcs.isDict()) {
1081 error(category: errSyntaxError, pos: -1, msg: "Missing or invalid CharProcs dictionary in Type 3 font");
1082 charProcs.setToNull();
1083 }
1084 resources = fontDict->lookup(key: "Resources");
1085 if (!resources.isDict()) {
1086 resources.setToNull();
1087 }
1088 }
1089
1090 //----- build the font encoding -----
1091
1092 // Encodings start with a base encoding, which can come from
1093 // (in order of priority):
1094 // 1. FontDict.Encoding or FontDict.Encoding.BaseEncoding
1095 // - MacRoman / MacExpert / WinAnsi / Standard
1096 // 2. embedded or external font file
1097 // 3. default:
1098 // - builtin --> builtin encoding
1099 // - TrueType --> WinAnsiEncoding
1100 // - others --> StandardEncoding
1101 // and then add a list of differences (if any) from
1102 // FontDict.Encoding.Differences.
1103
1104 // check FontDict for base encoding
1105 hasEncoding = false;
1106 usesMacRomanEnc = false;
1107 baseEnc = nullptr;
1108 baseEncFromFontFile = false;
1109 obj1 = fontDict->lookup(key: "Encoding");
1110 if (obj1.isDict()) {
1111 Object obj2 = obj1.dictLookup(key: "BaseEncoding");
1112 if (obj2.isName(nameA: "MacRomanEncoding")) {
1113 hasEncoding = true;
1114 usesMacRomanEnc = true;
1115 baseEnc = macRomanEncoding;
1116 } else if (obj2.isName(nameA: "MacExpertEncoding")) {
1117 hasEncoding = true;
1118 baseEnc = macExpertEncoding;
1119 } else if (obj2.isName(nameA: "WinAnsiEncoding")) {
1120 hasEncoding = true;
1121 baseEnc = winAnsiEncoding;
1122 }
1123 } else if (obj1.isName(nameA: "MacRomanEncoding")) {
1124 hasEncoding = true;
1125 usesMacRomanEnc = true;
1126 baseEnc = macRomanEncoding;
1127 } else if (obj1.isName(nameA: "MacExpertEncoding")) {
1128 hasEncoding = true;
1129 baseEnc = macExpertEncoding;
1130 } else if (obj1.isName(nameA: "WinAnsiEncoding")) {
1131 hasEncoding = true;
1132 baseEnc = winAnsiEncoding;
1133 }
1134
1135 // check embedded font file for base encoding
1136 // (only for Type 1 fonts - trying to get an encoding out of a
1137 // TrueType font is a losing proposition)
1138 ffT1 = nullptr;
1139 ffT1C = nullptr;
1140 if (type == fontType1 && embFontID != Ref::INVALID()) {
1141 const std::optional<std::vector<unsigned char>> buf = readEmbFontFile(xref);
1142 if (buf) {
1143 if ((ffT1 = FoFiType1::make(fileA: buf->data(), lenA: buf->size()))) {
1144 const std::string fontName = ffT1->getName();
1145 if (!fontName.empty()) {
1146 delete embFontName;
1147 embFontName = new GooString(fontName);
1148 }
1149 if (!baseEnc) {
1150 baseEnc = (const char **)ffT1->getEncoding();
1151 baseEncFromFontFile = true;
1152 }
1153 }
1154 }
1155 } else if (type == fontType1C && embFontID != Ref::INVALID()) {
1156 const std::optional<std::vector<unsigned char>> buf = readEmbFontFile(xref);
1157 if (buf) {
1158 if ((ffT1C = FoFiType1C::make(fileA: buf->data(), lenA: buf->size()))) {
1159 if (ffT1C->getName()) {
1160 if (embFontName) {
1161 delete embFontName;
1162 }
1163 embFontName = new GooString(ffT1C->getName());
1164 }
1165 if (!baseEnc) {
1166 baseEnc = (const char **)ffT1C->getEncoding();
1167 baseEncFromFontFile = true;
1168 }
1169 }
1170 }
1171 }
1172
1173 // get default base encoding
1174 if (!baseEnc) {
1175 if (builtinFont && embFontID == Ref::INVALID()) {
1176 baseEnc = builtinFont->defaultBaseEnc;
1177 hasEncoding = true;
1178 } else if (type == fontTrueType) {
1179 baseEnc = winAnsiEncoding;
1180 } else {
1181 baseEnc = standardEncoding;
1182 }
1183 }
1184
1185 if (baseEncFromFontFile) {
1186 encodingName = "Builtin";
1187 } else if (baseEnc == winAnsiEncoding) {
1188 encodingName = "WinAnsi";
1189 } else if (baseEnc == macRomanEncoding) {
1190 encodingName = "MacRoman";
1191 } else if (baseEnc == macExpertEncoding) {
1192 encodingName = "MacExpert";
1193 } else if (baseEnc == symbolEncoding) {
1194 encodingName = "Symbol";
1195 } else if (baseEnc == zapfDingbatsEncoding) {
1196 encodingName = "ZapfDingbats";
1197 } else {
1198 encodingName = "Standard";
1199 }
1200
1201 // copy the base encoding
1202 for (int i = 0; i < 256; ++i) {
1203 enc[i] = (char *)baseEnc[i];
1204 if ((encFree[i] = baseEncFromFontFile) && enc[i]) {
1205 enc[i] = copyString(s: baseEnc[i]);
1206 }
1207 }
1208
1209 // some Type 1C font files have empty encodings, which can break the
1210 // T1C->T1 conversion (since the 'seac' operator depends on having
1211 // the accents in the encoding), so we fill in any gaps from
1212 // StandardEncoding
1213 if (type == fontType1C && embFontID != Ref::INVALID() && baseEncFromFontFile) {
1214 for (int i = 0; i < 256; ++i) {
1215 if (!enc[i] && standardEncoding[i]) {
1216 enc[i] = (char *)standardEncoding[i];
1217 encFree[i] = false;
1218 }
1219 }
1220 }
1221
1222 // merge differences into encoding
1223 if (obj1.isDict()) {
1224 Object obj2 = obj1.dictLookup(key: "Differences");
1225 if (obj2.isArray()) {
1226 encodingName = "Custom";
1227 hasEncoding = true;
1228 int code = 0;
1229 for (int i = 0; i < obj2.arrayGetLength(); ++i) {
1230 Object obj3 = obj2.arrayGet(i);
1231 if (obj3.isInt()) {
1232 code = obj3.getInt();
1233 } else if (obj3.isName()) {
1234 if (code >= 0 && code < 256) {
1235 if (encFree[code]) {
1236 gfree(p: enc[code]);
1237 }
1238 enc[code] = copyString(s: obj3.getName());
1239 encFree[code] = true;
1240 ++code;
1241 }
1242 } else {
1243 error(category: errSyntaxError, pos: -1, msg: "Wrong type in font encoding resource differences ({0:s})", obj3.getTypeName());
1244 }
1245 }
1246 }
1247 }
1248 delete ffT1;
1249 delete ffT1C;
1250
1251 //----- build the mapping to Unicode -----
1252
1253 // pass 1: use the name-to-Unicode mapping table
1254 missing = hex = false;
1255 bool isZapfDingbats = name && name->ends_with(x: "ZapfDingbats");
1256 for (int code = 0; code < 256; ++code) {
1257 if ((charName = enc[code])) {
1258 if (isZapfDingbats) {
1259 // include ZapfDingbats names
1260 toUnicode[code] = globalParams->mapNameToUnicodeAll(charName);
1261 } else {
1262 toUnicode[code] = globalParams->mapNameToUnicodeText(charName);
1263 }
1264 if (!toUnicode[code] && strcmp(s1: charName, s2: ".notdef")) {
1265 // if it wasn't in the name-to-Unicode table, check for a
1266 // name that looks like 'Axx' or 'xx', where 'A' is any letter
1267 // and 'xx' is two hex digits
1268 if ((strlen(s: charName) == 3 && isalpha(charName[0]) && isxdigit(charName[1]) && isxdigit(charName[2])
1269 && ((charName[1] >= 'a' && charName[1] <= 'f') || (charName[1] >= 'A' && charName[1] <= 'F') || (charName[2] >= 'a' && charName[2] <= 'f') || (charName[2] >= 'A' && charName[2] <= 'F')))
1270 || (strlen(s: charName) == 2 && isxdigit(charName[0]) && isxdigit(charName[1]) &&
1271 // Only check idx 1 to avoid misidentifying a decimal
1272 // number like a0
1273 ((charName[1] >= 'a' && charName[1] <= 'f') || (charName[1] >= 'A' && charName[1] <= 'F')))) {
1274 hex = true;
1275 }
1276 missing = true;
1277 }
1278 } else {
1279 toUnicode[code] = 0;
1280 }
1281 }
1282
1283 numeric = testForNumericNames(fontDict, hex);
1284
1285 // construct the char code -> Unicode mapping object
1286 ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
1287
1288 // pass 1a: Expand ligatures in the Alphabetic Presentation Form
1289 // block (eg "fi", "ffi") to normal form
1290 for (int code = 0; code < 256; ++code) {
1291 if (unicodeIsAlphabeticPresentationForm(c: toUnicode[code])) {
1292 Unicode *normalized = unicodeNormalizeNFKC(in: &toUnicode[code], len: 1, out_len: &len, indices: nullptr);
1293 if (len > 1) {
1294 ctu->setMapping(c: (CharCode)code, u: normalized, len);
1295 }
1296 gfree(p: normalized);
1297 }
1298 }
1299
1300 // pass 2: try to fill in the missing chars, looking for ligatures, numeric
1301 // references and variants
1302 if (missing) {
1303 for (int code = 0; code < 256; ++code) {
1304 if (!toUnicode[code]) {
1305 if ((charName = enc[code]) && strcmp(s1: charName, s2: ".notdef")
1306 && (n = parseCharName(charName, uBuf, uLen: sizeof(uBuf) / sizeof(*uBuf),
1307 names: false, // don't check simple names (pass 1)
1308 ligatures: true, // do check ligatures
1309 numeric, hex,
1310 variants: true))) { // do check variants
1311 ctu->setMapping(c: (CharCode)code, u: uBuf, len: n);
1312 continue;
1313 }
1314
1315 // do a simple pass-through
1316 // mapping for unknown character names
1317 uBuf[0] = code;
1318 ctu->setMapping(c: (CharCode)code, u: uBuf, len: 1);
1319 }
1320 }
1321 }
1322
1323 // merge in a ToUnicode CMap, if there is one -- this overwrites
1324 // existing entries in ctu, i.e., the ToUnicode CMap takes
1325 // precedence, but the other encoding info is allowed to fill in any
1326 // holes
1327 readToUnicodeCMap(fontDict, nBits: 16, ctu);
1328
1329 //----- get the character widths -----
1330
1331 // initialize all widths
1332 for (double &width : widths) {
1333 width = missingWidth * 0.001;
1334 }
1335
1336 // use widths from font dict, if present
1337 obj1 = fontDict->lookup(key: "FirstChar");
1338 firstChar = obj1.isInt() ? obj1.getInt() : 0;
1339 if (firstChar < 0 || firstChar > 255) {
1340 firstChar = 0;
1341 }
1342 obj1 = fontDict->lookup(key: "LastChar");
1343 lastChar = obj1.isInt() ? obj1.getInt() : 255;
1344 if (lastChar < 0 || lastChar > 255) {
1345 lastChar = 255;
1346 }
1347 mul = (type == fontType3) ? fontMat[0] : 0.001;
1348 obj1 = fontDict->lookup(key: "Widths");
1349 if (obj1.isArray()) {
1350 flags |= fontFixedWidth;
1351 if (obj1.arrayGetLength() < lastChar - firstChar + 1) {
1352 lastChar = firstChar + obj1.arrayGetLength() - 1;
1353 }
1354 double firstNonZeroWidth = 0;
1355 for (int code = firstChar; code <= lastChar; ++code) {
1356 Object obj2 = obj1.arrayGet(i: code - firstChar);
1357 if (obj2.isNum()) {
1358 widths[code] = obj2.getNum() * mul;
1359
1360 // Check if the font is fixed width
1361 if (firstNonZeroWidth == 0) {
1362 firstNonZeroWidth = widths[code];
1363 }
1364 if (firstNonZeroWidth != 0 && widths[code] != 0 && fabs(x: widths[code] - firstNonZeroWidth) > 0.00001) {
1365 flags &= ~fontFixedWidth;
1366 }
1367 }
1368 }
1369
1370 // use widths from built-in font
1371 } else if (builtinFont) {
1372 // this is a kludge for broken PDF files that encode char 32
1373 // as .notdef
1374 if (builtinFont->getWidth(n: "space", w: &w)) {
1375 widths[32] = 0.001 * w;
1376 }
1377 for (int code = 0; code < 256; ++code) {
1378 if (enc[code] && builtinFont->getWidth(n: enc[code], w: &w)) {
1379 widths[code] = 0.001 * w;
1380 }
1381 }
1382
1383 // couldn't find widths -- use defaults
1384 } else {
1385 // this is technically an error -- the Widths entry is required
1386 // for all but the Base-14 fonts -- but certain PDF generators
1387 // apparently don't include widths for Arial and TimesNewRoman
1388 int i;
1389 if (isFixedWidth()) {
1390 i = 0;
1391 } else if (isSerif()) {
1392 i = 8;
1393 } else {
1394 i = 4;
1395 }
1396 if (isBold()) {
1397 i += 2;
1398 }
1399 if (isItalic()) {
1400 i += 1;
1401 }
1402 builtinFont = builtinFontSubst[i];
1403 // this is a kludge for broken PDF files that encode char 32
1404 // as .notdef
1405 if (builtinFont->getWidth(n: "space", w: &w)) {
1406 widths[32] = 0.001 * w;
1407 }
1408 for (int code = 0; code < 256; ++code) {
1409 if (enc[code] && builtinFont->getWidth(n: enc[code], w: &w)) {
1410 widths[code] = 0.001 * w;
1411 }
1412 }
1413 }
1414
1415 ok = true;
1416}
1417
1418Gfx8BitFont::~Gfx8BitFont()
1419{
1420 int i;
1421
1422 for (i = 0; i < 256; ++i) {
1423 if (encFree[i] && enc[i]) {
1424 gfree(p: enc[i]);
1425 }
1426 }
1427 ctu->decRefCnt();
1428}
1429
1430// This function is in part a derived work of the Adobe Glyph Mapping
1431// Convention: http://www.adobe.com/devnet/opentype/archives/glyph.html
1432// Algorithmic comments are excerpted from that document to aid
1433// maintainability.
1434static int parseCharName(char *charName, Unicode *uBuf, int uLen, bool names, bool ligatures, bool numeric, bool hex, bool variants)
1435{
1436 if (uLen <= 0) {
1437 error(category: errInternal, pos: -1,
1438 msg: "Zero-length output buffer (recursion overflow?) in "
1439 "parseCharName, component \"{0:s}\"",
1440 charName);
1441 return 0;
1442 }
1443 // Step 1: drop all the characters from the glyph name starting with the
1444 // first occurrence of a period (U+002E FULL STOP), if any.
1445 if (variants) {
1446 char *var_part = strchr(s: charName, c: '.');
1447 if (var_part == charName) {
1448 return 0; // .notdef or similar
1449 } else if (var_part != nullptr) {
1450 // parse names of the form 7.oldstyle, P.swash, s.sc, etc.
1451 char *main_part = copyString(s: charName, n: var_part - charName);
1452 bool namesRecurse = true, variantsRecurse = false;
1453 int n = parseCharName(charName: main_part, uBuf, uLen, names: namesRecurse, ligatures, numeric, hex, variants: variantsRecurse);
1454 gfree(p: main_part);
1455 return n;
1456 }
1457 }
1458 // Step 2: split the remaining string into a sequence of components, using
1459 // underscore (U+005F LOW LINE) as the delimiter.
1460 if (ligatures && strchr(s: charName, c: '_')) {
1461 // parse names of the form A_a (e.g. f_i, T_h, l_quotesingle)
1462 char *lig_part, *lig_end, *lig_copy;
1463 int n = 0, m;
1464 lig_part = lig_copy = copyString(s: charName);
1465 do {
1466 if ((lig_end = strchr(s: lig_part, c: '_'))) {
1467 *lig_end = '\0';
1468 }
1469 if (lig_part[0] != '\0') {
1470 bool namesRecurse = true, ligaturesRecurse = false;
1471 if ((m = parseCharName(charName: lig_part, uBuf: uBuf + n, uLen: uLen - n, names: namesRecurse, ligatures: ligaturesRecurse, numeric, hex, variants))) {
1472 n += m;
1473 } else {
1474 error(category: errSyntaxWarning, pos: -1,
1475 msg: "Could not parse ligature component \"{0:s}\" of \"{1:s}\" in "
1476 "parseCharName",
1477 lig_part, charName);
1478 }
1479 }
1480 if (lig_end) {
1481 lig_part = lig_end + 1;
1482 }
1483 } while (lig_end && n < uLen);
1484 gfree(p: lig_copy);
1485 return n;
1486 }
1487 // Step 3: map each component to a character string according to the
1488 // procedure below, and concatenate those strings; the result is the
1489 // character string to which the glyph name is mapped.
1490 // 3.1. if the font is Zapf Dingbats (PostScript FontName ZapfDingbats), and
1491 // the component is in the ZapfDingbats list, then map it to the
1492 // corresponding character in that list.
1493 // 3.2. otherwise, if the component is in the Adobe Glyph List, then map it
1494 // to the corresponding character in that list.
1495 if (names && (uBuf[0] = globalParams->mapNameToUnicodeText(charName))) {
1496 return 1;
1497 }
1498 unsigned int n = strlen(s: charName);
1499 // 3.3. otherwise, if the component is of the form "uni" (U+0075 U+006E
1500 // U+0069) followed by a sequence of uppercase hexadecimal digits (0 .. 9,
1501 // A .. F, i.e. U+0030 .. U+0039, U+0041 .. U+0046), the length of that
1502 // sequence is a multiple of four, and each group of four digits represents
1503 // a number in the set {0x0000 .. 0xD7FF, 0xE000 .. 0xFFFF}, then interpret
1504 // each such number as a Unicode scalar value and map the component to the
1505 // string made of those scalar values. Note that the range and digit length
1506 // restrictions mean that the "uni" prefix can be used only with Unicode
1507 // values from the Basic Multilingual Plane (BMP).
1508 if (n >= 7 && (n % 4) == 3 && !strncmp(s1: charName, s2: "uni", n: 3)) {
1509 int i;
1510 unsigned int m;
1511 for (i = 0, m = 3; i < uLen && m < n; m += 4) {
1512 if (isxdigit(charName[m]) && isxdigit(charName[m + 1]) && isxdigit(charName[m + 2]) && isxdigit(charName[m + 3])) {
1513 unsigned int u;
1514 sscanf(s: charName + m, format: "%4x", &u);
1515 if (u <= 0xD7FF || (0xE000 <= u && u <= 0xFFFF)) {
1516 uBuf[i++] = u;
1517 }
1518 }
1519 }
1520 return i;
1521 }
1522 // 3.4. otherwise, if the component is of the form "u" (U+0075) followed by
1523 // a sequence of four to six uppercase hexadecimal digits {0 .. 9, A .. F}
1524 // (U+0030 .. U+0039, U+0041 .. U+0046), and those digits represent a
1525 // number in {0x0000 .. 0xD7FF, 0xE000 .. 0x10FFFF}, then interpret this
1526 // number as a Unicode scalar value and map the component to the string
1527 // made of this scalar value.
1528 if (n >= 5 && n <= 7 && charName[0] == 'u' && isxdigit(charName[1]) && isxdigit(charName[2]) && isxdigit(charName[3]) && isxdigit(charName[4]) && (n <= 5 || isxdigit(charName[5])) && (n <= 6 || isxdigit(charName[6]))) {
1529 unsigned int u;
1530 sscanf(s: charName + 1, format: "%x", &u);
1531 if (u <= 0xD7FF || (0xE000 <= u && u <= 0x10FFFF)) {
1532 uBuf[0] = u;
1533 return 1;
1534 }
1535 }
1536 // Not in Adobe Glyph Mapping convention: look for names like xx
1537 // or Axx and parse for hex or decimal values.
1538 if (numeric && parseNumericName(s: charName, hex, u: uBuf)) {
1539 return 1;
1540 }
1541 // 3.5. otherwise, map the component to the empty string
1542 return 0;
1543}
1544
1545int Gfx8BitFont::getNextChar(const char *s, int len, CharCode *code, Unicode const **u, int *uLen, double *dx, double *dy, double *ox, double *oy) const
1546{
1547 CharCode c;
1548
1549 *code = c = (CharCode)(*s & 0xff);
1550 *uLen = ctu->mapToUnicode(c, u);
1551 *dx = widths[c];
1552 *dy = *ox = *oy = 0;
1553 return 1;
1554}
1555
1556const CharCodeToUnicode *Gfx8BitFont::getToUnicode() const
1557{
1558 return ctu;
1559}
1560
1561int *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff)
1562{
1563 int *map;
1564 int cmapPlatform, cmapEncoding;
1565 int unicodeCmap, macRomanCmap, msSymbolCmap, cmap;
1566 bool useMacRoman, useUnicode;
1567 char *charName;
1568 Unicode u;
1569 int code, i, n;
1570
1571 map = (int *)gmallocn(count: 256, size: sizeof(int));
1572 for (i = 0; i < 256; ++i) {
1573 map[i] = 0;
1574 }
1575
1576 // To match up with the Adobe-defined behaviour, we choose a cmap
1577 // like this:
1578 // 1. If the PDF font has an encoding:
1579 // 1a. If the TrueType font has a Microsoft Unicode
1580 // cmap or a non-Microsoft Unicode cmap, use it, and use the
1581 // Unicode indexes, not the char codes.
1582 // 1b. If the PDF font specified MacRomanEncoding and the
1583 // TrueType font has a Macintosh Roman cmap, use it, and
1584 // reverse map the char names through MacRomanEncoding to
1585 // get char codes.
1586 // 1c. If the PDF font is symbolic and the TrueType font has a
1587 // Microsoft Symbol cmap, use it, and use char codes
1588 // directly (possibly with an offset of 0xf000).
1589 // 1d. If the TrueType font has a Macintosh Roman cmap, use it,
1590 // as in case 1a.
1591 // 2. If the PDF font does not have an encoding or the PDF font is
1592 // symbolic:
1593 // 2a. If the TrueType font has a Macintosh Roman cmap, use it,
1594 // and use char codes directly (possibly with an offset of
1595 // 0xf000).
1596 // 2b. If the TrueType font has a Microsoft Symbol cmap, use it,
1597 // and use char codes directly (possible with an offset of
1598 // 0xf000).
1599 // 3. If none of these rules apply, use the first cmap and hope for
1600 // the best (this shouldn't happen).
1601 unicodeCmap = macRomanCmap = msSymbolCmap = -1;
1602 for (i = 0; i < ff->getNumCmaps(); ++i) {
1603 cmapPlatform = ff->getCmapPlatform(i);
1604 cmapEncoding = ff->getCmapEncoding(i);
1605 if ((cmapPlatform == 3 && cmapEncoding == 1) || cmapPlatform == 0) {
1606 unicodeCmap = i;
1607 } else if (cmapPlatform == 1 && cmapEncoding == 0) {
1608 macRomanCmap = i;
1609 } else if (cmapPlatform == 3 && cmapEncoding == 0) {
1610 msSymbolCmap = i;
1611 }
1612 }
1613 cmap = 0;
1614 useMacRoman = false;
1615 useUnicode = false;
1616 if (hasEncoding || type == fontType1) {
1617 if (unicodeCmap >= 0) {
1618 cmap = unicodeCmap;
1619 useUnicode = true;
1620 } else if (usesMacRomanEnc && macRomanCmap >= 0) {
1621 cmap = macRomanCmap;
1622 useMacRoman = true;
1623 } else if ((flags & fontSymbolic) && msSymbolCmap >= 0) {
1624 cmap = msSymbolCmap;
1625 } else if ((flags & fontSymbolic) && macRomanCmap >= 0) {
1626 cmap = macRomanCmap;
1627 } else if (macRomanCmap >= 0) {
1628 cmap = macRomanCmap;
1629 useMacRoman = true;
1630 }
1631 } else {
1632 if (msSymbolCmap >= 0) {
1633 cmap = msSymbolCmap;
1634 } else if (macRomanCmap >= 0) {
1635 cmap = macRomanCmap;
1636 }
1637 }
1638
1639 // reverse map the char names through MacRomanEncoding, then map the
1640 // char codes through the cmap
1641 if (useMacRoman) {
1642 for (i = 0; i < 256; ++i) {
1643 if ((charName = enc[i])) {
1644 if ((code = globalParams->getMacRomanCharCode(charName))) {
1645 map[i] = ff->mapCodeToGID(i: cmap, c: code);
1646 }
1647 } else {
1648 map[i] = -1;
1649 }
1650 }
1651
1652 // map Unicode through the cmap
1653 } else if (useUnicode) {
1654 const Unicode *uAux;
1655 for (i = 0; i < 256; ++i) {
1656 if (((charName = enc[i]) && (u = globalParams->mapNameToUnicodeAll(charName)))) {
1657 map[i] = ff->mapCodeToGID(i: cmap, c: u);
1658 } else {
1659 n = ctu->mapToUnicode(c: (CharCode)i, u: &uAux);
1660 if (n > 0) {
1661 map[i] = ff->mapCodeToGID(i: cmap, c: uAux[0]);
1662 } else {
1663 map[i] = -1;
1664 }
1665 }
1666 }
1667
1668 // map the char codes through the cmap, possibly with an offset of
1669 // 0xf000
1670 } else {
1671 for (i = 0; i < 256; ++i) {
1672 if (!(map[i] = ff->mapCodeToGID(i: cmap, c: i))) {
1673 map[i] = ff->mapCodeToGID(i: cmap, c: 0xf000 + i);
1674 }
1675 }
1676 }
1677
1678 // try the TrueType 'post' table to handle any unmapped characters
1679 for (i = 0; i < 256; ++i) {
1680 if (map[i] <= 0 && (charName = enc[i])) {
1681 map[i] = ff->mapNameToGID(name: charName);
1682 }
1683 }
1684
1685 return map;
1686}
1687
1688Dict *Gfx8BitFont::getCharProcs()
1689{
1690 return charProcs.isDict() ? charProcs.getDict() : nullptr;
1691}
1692
1693Object Gfx8BitFont::getCharProc(int code)
1694{
1695 if (enc[code] && charProcs.isDict()) {
1696 return charProcs.dictLookup(key: enc[code]);
1697 } else {
1698 return Object(objNull);
1699 }
1700}
1701
1702Object Gfx8BitFont::getCharProcNF(int code)
1703{
1704 if (enc[code] && charProcs.isDict()) {
1705 return charProcs.dictLookupNF(key: enc[code]).copy();
1706 } else {
1707 return Object(objNull);
1708 }
1709}
1710
1711Dict *Gfx8BitFont::getResources()
1712{
1713 return resources.isDict() ? resources.getDict() : nullptr;
1714}
1715
1716//------------------------------------------------------------------------
1717// GfxCIDFont
1718//------------------------------------------------------------------------
1719
1720struct cmpWidthExcepFunctor
1721{
1722 bool operator()(const GfxFontCIDWidthExcep w1, const GfxFontCIDWidthExcep w2) { return w1.first < w2.first; }
1723};
1724
1725struct cmpWidthExcepVFunctor
1726{
1727 bool operator()(const GfxFontCIDWidthExcepV &w1, const GfxFontCIDWidthExcepV &w2) { return w1.first < w2.first; }
1728};
1729
1730GfxCIDFont::GfxCIDFont(XRef *xref, const char *tagA, Ref idA, std::optional<std::string> &&nameA, GfxFontType typeA, Ref embFontIDA, Dict *fontDict) : GfxFont(tagA, idA, std::move(nameA), typeA, embFontIDA)
1731{
1732 Dict *desFontDict;
1733 Object desFontDictObj;
1734 Object obj1, obj2, obj3, obj4, obj5, obj6;
1735 int c1, c2;
1736 int excepsSize;
1737
1738 ascent = 0.95;
1739 descent = -0.35;
1740 fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
1741 collection = nullptr;
1742 ctu = nullptr;
1743 ctuUsesCharCode = true;
1744 widths.defWidth = 1.0;
1745 widths.defHeight = -1.0;
1746 widths.defVY = 0.880;
1747 widths.exceps = nullptr;
1748 widths.nExceps = 0;
1749 widths.excepsV = nullptr;
1750 widths.nExcepsV = 0;
1751 cidToGID = nullptr;
1752 cidToGIDLen = 0;
1753
1754 // get the descendant font
1755 obj1 = fontDict->lookup(key: "DescendantFonts");
1756 if (!obj1.isArray() || obj1.arrayGetLength() == 0) {
1757 error(category: errSyntaxError, pos: -1, msg: "Missing or empty DescendantFonts entry in Type 0 font");
1758 return;
1759 }
1760 desFontDictObj = obj1.arrayGet(i: 0);
1761 if (!desFontDictObj.isDict()) {
1762 error(category: errSyntaxError, pos: -1, msg: "Bad descendant font in Type 0 font");
1763 return;
1764 }
1765 desFontDict = desFontDictObj.getDict();
1766
1767 // get info from font descriptor
1768 readFontDescriptor(xref, fontDict: desFontDict);
1769
1770 //----- encoding info -----
1771
1772 // char collection
1773 obj1 = desFontDict->lookup(key: "CIDSystemInfo");
1774 if (obj1.isDict()) {
1775 obj2 = obj1.dictLookup(key: "Registry");
1776 obj3 = obj1.dictLookup(key: "Ordering");
1777 if (!obj2.isString() || !obj3.isString()) {
1778 error(category: errSyntaxError, pos: -1, msg: "Invalid CIDSystemInfo dictionary in Type 0 descendant font");
1779 error(category: errSyntaxError, pos: -1, msg: "Assuming Adobe-Identity for character collection");
1780 obj2 = Object(new GooString("Adobe"));
1781 obj3 = Object(new GooString("Identity"));
1782 }
1783 collection = obj2.getString()->copy()->append(c: '-')->append(str: obj3.getString());
1784 } else {
1785 error(category: errSyntaxError, pos: -1, msg: "Missing CIDSystemInfo dictionary in Type 0 descendant font");
1786 error(category: errSyntaxError, pos: -1, msg: "Assuming Adobe-Identity for character collection");
1787 collection = new GooString("Adobe-Identity");
1788 }
1789
1790 // look for a ToUnicode CMap
1791 if (!(ctu = readToUnicodeCMap(fontDict, nBits: 16, ctu: nullptr))) {
1792 ctuUsesCharCode = false;
1793
1794 // use an identity mapping for the "Adobe-Identity" and
1795 // "Adobe-UCS" collections
1796 if (!collection->cmp(sA: "Adobe-Identity") || !collection->cmp(sA: "Adobe-UCS")) {
1797 ctu = CharCodeToUnicode::makeIdentityMapping();
1798 } else {
1799 // look for a user-supplied .cidToUnicode file
1800 if (!(ctu = globalParams->getCIDToUnicode(collection))) {
1801 // I'm not completely sure that this is the best thing to do
1802 // but it seems to produce better results when the .cidToUnicode
1803 // files from the poppler-data package are missing. At least
1804 // we know that assuming the Identity mapping is definitely wrong.
1805 // -- jrmuizel
1806 static const char *knownCollections[] = {
1807 "Adobe-CNS1", "Adobe-GB1", "Adobe-Japan1", "Adobe-Japan2", "Adobe-Korea1",
1808 };
1809 for (const char *knownCollection : knownCollections) {
1810 if (collection->cmp(sA: knownCollection) == 0) {
1811 error(category: errSyntaxError, pos: -1, msg: "Missing language pack for '{0:t}' mapping", collection);
1812 return;
1813 }
1814 }
1815 error(category: errSyntaxError, pos: -1, msg: "Unknown character collection '{0:t}'", collection);
1816 // fall-through, assuming the Identity mapping -- this appears
1817 // to match Adobe's behavior
1818 }
1819 }
1820 }
1821
1822 // encoding (i.e., CMap)
1823 obj1 = fontDict->lookup(key: "Encoding");
1824 if (obj1.isNull()) {
1825 error(category: errSyntaxError, pos: -1, msg: "Missing Encoding entry in Type 0 font");
1826 return;
1827 }
1828 if (!(cMap = CMap::parse(cache: nullptr, collectionA: collection, obj: &obj1))) {
1829 return;
1830 }
1831 if (cMap->getCMapName()) {
1832 encodingName = cMap->getCMapName()->toStr();
1833 } else {
1834 encodingName = "Custom";
1835 }
1836
1837 // CIDToGIDMap (for embedded TrueType fonts)
1838 obj1 = desFontDict->lookup(key: "CIDToGIDMap");
1839 if (obj1.isStream()) {
1840 cidToGIDLen = 0;
1841 unsigned int i = 64;
1842 cidToGID = (int *)gmallocn(count: i, size: sizeof(int));
1843 obj1.streamReset();
1844 while ((c1 = obj1.streamGetChar()) != EOF && (c2 = obj1.streamGetChar()) != EOF) {
1845 if (cidToGIDLen == i) {
1846 i *= 2;
1847 cidToGID = (int *)greallocn(p: cidToGID, count: i, size: sizeof(int));
1848 }
1849 cidToGID[cidToGIDLen++] = (c1 << 8) + c2;
1850 }
1851 } else if (!obj1.isName(nameA: "Identity") && !obj1.isNull()) {
1852 error(category: errSyntaxError, pos: -1, msg: "Invalid CIDToGIDMap entry in CID font");
1853 }
1854
1855 //----- character metrics -----
1856
1857 // default char width
1858 obj1 = desFontDict->lookup(key: "DW");
1859 if (obj1.isInt()) {
1860 widths.defWidth = obj1.getInt() * 0.001;
1861 }
1862
1863 // char width exceptions
1864 obj1 = desFontDict->lookup(key: "W");
1865 if (obj1.isArray()) {
1866 excepsSize = 0;
1867 int i = 0;
1868 while (i + 1 < obj1.arrayGetLength()) {
1869 obj2 = obj1.arrayGet(i);
1870 obj3 = obj1.arrayGet(i: i + 1);
1871 if (obj2.isInt() && obj3.isInt() && i + 2 < obj1.arrayGetLength()) {
1872 obj4 = obj1.arrayGet(i: i + 2);
1873 if (obj4.isNum()) {
1874 if (widths.nExceps == excepsSize) {
1875 excepsSize += 16;
1876 widths.exceps = (GfxFontCIDWidthExcep *)greallocn(p: widths.exceps, count: excepsSize, size: sizeof(GfxFontCIDWidthExcep));
1877 }
1878 widths.exceps[widths.nExceps].first = obj2.getInt();
1879 widths.exceps[widths.nExceps].last = obj3.getInt();
1880 widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
1881 ++widths.nExceps;
1882 } else {
1883 error(category: errSyntaxError, pos: -1, msg: "Bad widths array in Type 0 font");
1884 }
1885 i += 3;
1886 } else if (obj2.isInt() && obj3.isArray()) {
1887 if (widths.nExceps + obj3.arrayGetLength() > excepsSize) {
1888 excepsSize = (widths.nExceps + obj3.arrayGetLength() + 15) & ~15;
1889 widths.exceps = (GfxFontCIDWidthExcep *)greallocn(p: widths.exceps, count: excepsSize, size: sizeof(GfxFontCIDWidthExcep));
1890 }
1891 int j = obj2.getInt();
1892 if (likely(j < INT_MAX - obj3.arrayGetLength())) {
1893 for (int k = 0; k < obj3.arrayGetLength(); ++k) {
1894 obj4 = obj3.arrayGet(i: k);
1895 if (obj4.isNum()) {
1896 widths.exceps[widths.nExceps].first = j;
1897 widths.exceps[widths.nExceps].last = j;
1898 widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
1899 ++j;
1900 ++widths.nExceps;
1901 } else {
1902 error(category: errSyntaxError, pos: -1, msg: "Bad widths array in Type 0 font");
1903 }
1904 }
1905 }
1906 i += 2;
1907 } else {
1908 error(category: errSyntaxError, pos: -1, msg: "Bad widths array in Type 0 font");
1909 ++i;
1910 }
1911 }
1912 std::sort(first: widths.exceps, last: widths.exceps + widths.nExceps, comp: cmpWidthExcepFunctor());
1913 }
1914
1915 // default metrics for vertical font
1916 obj1 = desFontDict->lookup(key: "DW2");
1917 if (obj1.isArray() && obj1.arrayGetLength() == 2) {
1918 obj2 = obj1.arrayGet(i: 0);
1919 if (obj2.isNum()) {
1920 widths.defVY = obj2.getNum() * 0.001;
1921 }
1922 obj2 = obj1.arrayGet(i: 1);
1923 if (obj2.isNum()) {
1924 widths.defHeight = obj2.getNum() * 0.001;
1925 }
1926 }
1927
1928 // char metric exceptions for vertical font
1929 obj1 = desFontDict->lookup(key: "W2");
1930 if (obj1.isArray()) {
1931 excepsSize = 0;
1932 int i = 0;
1933 while (i + 1 < obj1.arrayGetLength()) {
1934 obj2 = obj1.arrayGet(i);
1935 obj3 = obj1.arrayGet(i: i + 1);
1936 if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) {
1937 if ((obj4 = obj1.arrayGet(i: i + 2), obj4.isNum()) && (obj5 = obj1.arrayGet(i: i + 3), obj5.isNum()) && (obj6 = obj1.arrayGet(i: i + 4), obj6.isNum())) {
1938 if (widths.nExcepsV == excepsSize) {
1939 excepsSize += 16;
1940 widths.excepsV = (GfxFontCIDWidthExcepV *)greallocn(p: widths.excepsV, count: excepsSize, size: sizeof(GfxFontCIDWidthExcepV));
1941 }
1942 widths.excepsV[widths.nExcepsV].first = obj2.getInt();
1943 widths.excepsV[widths.nExcepsV].last = obj3.getInt();
1944 widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
1945 widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
1946 widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
1947 ++widths.nExcepsV;
1948 } else {
1949 error(category: errSyntaxError, pos: -1, msg: "Bad widths (W2) array in Type 0 font");
1950 }
1951 i += 5;
1952 } else if (obj2.isInt() && obj3.isArray()) {
1953 if (widths.nExcepsV + obj3.arrayGetLength() / 3 > excepsSize) {
1954 excepsSize = (widths.nExcepsV + obj3.arrayGetLength() / 3 + 15) & ~15;
1955 widths.excepsV = (GfxFontCIDWidthExcepV *)greallocn(p: widths.excepsV, count: excepsSize, size: sizeof(GfxFontCIDWidthExcepV));
1956 }
1957 int j = obj2.getInt();
1958 for (int k = 0; k < obj3.arrayGetLength(); k += 3) {
1959 if ((obj4 = obj3.arrayGet(i: k), obj4.isNum()) && (obj5 = obj3.arrayGet(i: k + 1), obj5.isNum()) && (obj6 = obj3.arrayGet(i: k + 2), obj6.isNum())) {
1960 widths.excepsV[widths.nExcepsV].first = j;
1961 widths.excepsV[widths.nExcepsV].last = j;
1962 widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
1963 widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
1964 widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
1965 ++j;
1966 ++widths.nExcepsV;
1967 } else {
1968 error(category: errSyntaxError, pos: -1, msg: "Bad widths (W2) array in Type 0 font");
1969 }
1970 }
1971 i += 2;
1972 } else {
1973 error(category: errSyntaxError, pos: -1, msg: "Bad widths (W2) array in Type 0 font");
1974 ++i;
1975 }
1976 }
1977 std::sort(first: widths.excepsV, last: widths.excepsV + widths.nExcepsV, comp: cmpWidthExcepVFunctor());
1978 }
1979
1980 ok = true;
1981}
1982
1983GfxCIDFont::~GfxCIDFont()
1984{
1985 if (collection) {
1986 delete collection;
1987 }
1988 if (ctu) {
1989 ctu->decRefCnt();
1990 }
1991 gfree(p: widths.exceps);
1992 gfree(p: widths.excepsV);
1993 if (cidToGID) {
1994 gfree(p: cidToGID);
1995 }
1996}
1997
1998int GfxCIDFont::getNextChar(const char *s, int len, CharCode *code, Unicode const **u, int *uLen, double *dx, double *dy, double *ox, double *oy) const
1999{
2000 CID cid;
2001 CharCode dummy;
2002 double w, h, vx, vy;
2003 int n, a, b, m;
2004
2005 if (!cMap) {
2006 *code = 0;
2007 *uLen = 0;
2008 *dx = *dy = *ox = *oy = 0;
2009 return 1;
2010 }
2011
2012 *code = (CharCode)(cid = cMap->getCID(s, len, c: &dummy, nUsed: &n));
2013 if (ctu) {
2014 if (hasToUnicode) {
2015 int i = 0, c = 0;
2016 while (i < n) {
2017 c = (c << 8) + (s[i] & 0xff);
2018 ++i;
2019 }
2020 *uLen = ctu->mapToUnicode(c, u);
2021 } else {
2022 *uLen = ctu->mapToUnicode(c: cid, u);
2023 }
2024 } else {
2025 *uLen = 0;
2026 }
2027
2028 // horizontal
2029 if (cMap->getWMode() == 0) {
2030 w = getWidth(cid);
2031 h = vx = vy = 0;
2032
2033 // vertical
2034 } else {
2035 w = 0;
2036 h = widths.defHeight;
2037 vx = getWidth(cid) / 2;
2038 vy = widths.defVY;
2039 if (widths.nExcepsV > 0 && cid >= widths.excepsV[0].first) {
2040 a = 0;
2041 b = widths.nExcepsV;
2042 // invariant: widths.excepsV[a].first <= cid < widths.excepsV[b].first
2043 while (b - a > 1) {
2044 m = (a + b) / 2;
2045 if (widths.excepsV[m].last <= cid) {
2046 a = m;
2047 } else {
2048 b = m;
2049 }
2050 }
2051 if (cid <= widths.excepsV[a].last) {
2052 h = widths.excepsV[a].height;
2053 vx = widths.excepsV[a].vx;
2054 vy = widths.excepsV[a].vy;
2055 }
2056 }
2057 }
2058
2059 *dx = w;
2060 *dy = h;
2061 *ox = vx;
2062 *oy = vy;
2063
2064 return n;
2065}
2066
2067int GfxCIDFont::getWMode() const
2068{
2069 return cMap ? cMap->getWMode() : 0;
2070}
2071
2072const CharCodeToUnicode *GfxCIDFont::getToUnicode() const
2073{
2074 return ctu;
2075}
2076
2077const GooString *GfxCIDFont::getCollection() const
2078{
2079 return cMap ? cMap->getCollection() : nullptr;
2080}
2081
2082int GfxCIDFont::mapCodeToGID(FoFiTrueType *ff, int cmapi, Unicode unicode, bool wmode)
2083{
2084 unsigned short gid = ff->mapCodeToGID(i: cmapi, c: unicode);
2085 if (wmode) {
2086 unsigned short vgid = ff->mapToVertGID(orgGID: gid);
2087 if (vgid != 0) {
2088 gid = vgid;
2089 }
2090 }
2091 return gid;
2092}
2093
2094int *GfxCIDFont::getCodeToGIDMap(FoFiTrueType *ff, int *codeToGIDLen)
2095{
2096#define N_UCS_CANDIDATES 2
2097 /* space characters */
2098 static const unsigned long spaces[] = { 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x00A0, 0x200B, 0x2060, 0x3000, 0xFEFF, 0 };
2099 static const char *adobe_cns1_cmaps[] = { "UniCNS-UTF32-V", "UniCNS-UCS2-V", "UniCNS-UTF32-H", "UniCNS-UCS2-H", nullptr };
2100 static const char *adobe_gb1_cmaps[] = { "UniGB-UTF32-V", "UniGB-UCS2-V", "UniGB-UTF32-H", "UniGB-UCS2-H", nullptr };
2101 static const char *adobe_japan1_cmaps[] = { "UniJIS-UTF32-V", "UniJIS-UCS2-V", "UniJIS-UTF32-H", "UniJIS-UCS2-H", nullptr };
2102 static const char *adobe_japan2_cmaps[] = { "UniHojo-UTF32-V", "UniHojo-UCS2-V", "UniHojo-UTF32-H", "UniHojo-UCS2-H", nullptr };
2103 static const char *adobe_korea1_cmaps[] = { "UniKS-UTF32-V", "UniKS-UCS2-V", "UniKS-UTF32-H", "UniKS-UCS2-H", nullptr };
2104 static struct CMapListEntry
2105 {
2106 const char *collection;
2107 const char *scriptTag;
2108 const char *languageTag;
2109 const char *toUnicodeMap;
2110 const char **CMaps;
2111 } CMapList[] = { {
2112 .collection: "Adobe-CNS1",
2113 .scriptTag: "hani",
2114 .languageTag: "CHN ",
2115 .toUnicodeMap: "Adobe-CNS1-UCS2",
2116 .CMaps: adobe_cns1_cmaps,
2117 },
2118 {
2119 .collection: "Adobe-GB1",
2120 .scriptTag: "hani",
2121 .languageTag: "CHN ",
2122 .toUnicodeMap: "Adobe-GB1-UCS2",
2123 .CMaps: adobe_gb1_cmaps,
2124 },
2125 {
2126 .collection: "Adobe-Japan1",
2127 .scriptTag: "kana",
2128 .languageTag: "JAN ",
2129 .toUnicodeMap: "Adobe-Japan1-UCS2",
2130 .CMaps: adobe_japan1_cmaps,
2131 },
2132 {
2133 .collection: "Adobe-Japan2",
2134 .scriptTag: "kana",
2135 .languageTag: "JAN ",
2136 .toUnicodeMap: "Adobe-Japan2-UCS2",
2137 .CMaps: adobe_japan2_cmaps,
2138 },
2139 {
2140 .collection: "Adobe-Korea1",
2141 .scriptTag: "hang",
2142 .languageTag: "KOR ",
2143 .toUnicodeMap: "Adobe-Korea1-UCS2",
2144 .CMaps: adobe_korea1_cmaps,
2145 },
2146 { .collection: nullptr, .scriptTag: nullptr, .languageTag: nullptr, .toUnicodeMap: nullptr, .CMaps: nullptr } };
2147 Unicode *humap = nullptr;
2148 Unicode *vumap = nullptr;
2149 Unicode *tumap = nullptr;
2150 int *codeToGID = nullptr;
2151 int i;
2152 unsigned long code;
2153 int wmode;
2154 const char **cmapName;
2155 CMapListEntry *lp;
2156 int cmap;
2157 int cmapPlatform, cmapEncoding;
2158 Ref embID;
2159
2160 *codeToGIDLen = 0;
2161 if (!ctu || !getCollection()) {
2162 return nullptr;
2163 }
2164
2165 if (getEmbeddedFontID(embID: &embID)) {
2166 if (getCollection()->cmp(sA: "Adobe-Identity") == 0) {
2167 return nullptr;
2168 }
2169
2170 /* if this font is embedded font,
2171 * CIDToGIDMap should be embedded in PDF file
2172 * and already set. So return it.
2173 */
2174 *codeToGIDLen = getCIDToGIDLen();
2175 return getCIDToGID();
2176 }
2177
2178 /* we use only unicode cmap */
2179 cmap = -1;
2180 for (i = 0; i < ff->getNumCmaps(); ++i) {
2181 cmapPlatform = ff->getCmapPlatform(i);
2182 cmapEncoding = ff->getCmapEncoding(i);
2183 if (cmapPlatform == 3 && cmapEncoding == 10) {
2184 /* UCS-4 */
2185 cmap = i;
2186 /* use UCS-4 cmap */
2187 break;
2188 } else if (cmapPlatform == 3 && cmapEncoding == 1) {
2189 /* Unicode */
2190 cmap = i;
2191 } else if (cmapPlatform == 0 && cmap < 0) {
2192 cmap = i;
2193 }
2194 }
2195 if (cmap < 0) {
2196 return nullptr;
2197 }
2198
2199 wmode = getWMode();
2200 for (lp = CMapList; lp->collection != nullptr; lp++) {
2201 if (strcmp(s1: lp->collection, s2: getCollection()->c_str()) == 0) {
2202 break;
2203 }
2204 }
2205 const unsigned int n = 65536;
2206 humap = new Unicode[n * N_UCS_CANDIDATES];
2207 memset(s: humap, c: 0, n: sizeof(Unicode) * n * N_UCS_CANDIDATES);
2208 if (lp->collection != nullptr) {
2209 CharCodeToUnicode *tctu;
2210 GooString tname(lp->toUnicodeMap);
2211
2212 if ((tctu = CharCodeToUnicode::parseCMapFromFile(fileName: &tname, nBits: 16)) != nullptr) {
2213 tumap = new Unicode[n];
2214 CharCode cid;
2215 for (cid = 0; cid < n; cid++) {
2216 int len;
2217 const Unicode *ucodes;
2218
2219 len = tctu->mapToUnicode(c: cid, u: &ucodes);
2220 if (len == 1) {
2221 tumap[cid] = ucodes[0];
2222 } else {
2223 /* if not single character, ignore it */
2224 tumap[cid] = 0;
2225 }
2226 }
2227 delete tctu;
2228 }
2229 vumap = new Unicode[n];
2230 memset(s: vumap, c: 0, n: sizeof(Unicode) * n);
2231 for (cmapName = lp->CMaps; *cmapName != nullptr; cmapName++) {
2232 GooString cname(*cmapName);
2233
2234 std::shared_ptr<CMap> cnameCMap;
2235 if ((cnameCMap = globalParams->getCMap(collection: getCollection(), cMapName: &cname)) != nullptr) {
2236 if (cnameCMap->getWMode()) {
2237 cnameCMap->setReverseMap(rmap: vumap, rmapSize: n, ncand: 1);
2238 } else {
2239 cnameCMap->setReverseMap(rmap: humap, rmapSize: n, N_UCS_CANDIDATES);
2240 }
2241 }
2242 }
2243 ff->setupGSUB(scriptName: lp->scriptTag, languageName: lp->languageTag);
2244 } else {
2245 if (getCollection()->cmp(sA: "Adobe-Identity") == 0) {
2246 error(category: errSyntaxError, pos: -1, msg: "non-embedded font using identity encoding: {0:s}", name ? name->c_str() : "(null)");
2247 } else {
2248 error(category: errSyntaxError, pos: -1, msg: "Unknown character collection {0:t}\n", getCollection());
2249 }
2250 if (ctu) {
2251 CharCode cid;
2252 for (cid = 0; cid < n; cid++) {
2253 const Unicode *ucode;
2254
2255 if (ctu->mapToUnicode(c: cid, u: &ucode)) {
2256 humap[cid * N_UCS_CANDIDATES] = ucode[0];
2257 } else {
2258 humap[cid * N_UCS_CANDIDATES] = 0;
2259 }
2260 for (i = 1; i < N_UCS_CANDIDATES; i++) {
2261 humap[cid * N_UCS_CANDIDATES + i] = 0;
2262 }
2263 }
2264 }
2265 }
2266 // map CID -> Unicode -> GID
2267 codeToGID = (int *)gmallocn(count: n, size: sizeof(int));
2268 for (code = 0; code < n; ++code) {
2269 Unicode unicode;
2270 unsigned long gid;
2271
2272 unicode = 0;
2273 gid = 0;
2274 if (humap != nullptr) {
2275 for (i = 0; i < N_UCS_CANDIDATES && gid == 0 && (unicode = humap[code * N_UCS_CANDIDATES + i]) != 0; i++) {
2276 gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode: false);
2277 }
2278 }
2279 if (gid == 0 && vumap != nullptr) {
2280 unicode = vumap[code];
2281 if (unicode != 0) {
2282 gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode: true);
2283 if (gid == 0 && tumap != nullptr) {
2284 if ((unicode = tumap[code]) != 0) {
2285 gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode: true);
2286 }
2287 }
2288 }
2289 }
2290 if (gid == 0 && tumap != nullptr) {
2291 if ((unicode = tumap[code]) != 0) {
2292 gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode: false);
2293 }
2294 }
2295 if (gid == 0) {
2296 /* special handling space characters */
2297 const unsigned long *p;
2298
2299 if (humap != nullptr) {
2300 unicode = humap[code];
2301 }
2302 if (unicode != 0) {
2303 /* check if code is space character , so map code to 0x0020 */
2304 for (p = spaces; *p != 0; p++) {
2305 if (*p == unicode) {
2306 unicode = 0x20;
2307 gid = mapCodeToGID(ff, cmapi: cmap, unicode, wmode);
2308 break;
2309 }
2310 }
2311 }
2312 }
2313 codeToGID[code] = gid;
2314 }
2315 *codeToGIDLen = n;
2316 if (humap != nullptr) {
2317 delete[] humap;
2318 }
2319 if (tumap != nullptr) {
2320 delete[] tumap;
2321 }
2322 if (vumap != nullptr) {
2323 delete[] vumap;
2324 }
2325 return codeToGID;
2326}
2327
2328double GfxCIDFont::getWidth(CID cid) const
2329{
2330 double w;
2331 int a, b, m;
2332
2333 w = widths.defWidth;
2334 if (widths.nExceps > 0 && cid >= widths.exceps[0].first) {
2335 a = 0;
2336 b = widths.nExceps;
2337 // invariant: widths.exceps[a].first <= cid < widths.exceps[b].first
2338 while (b - a > 1) {
2339 m = (a + b) / 2;
2340 if (widths.exceps[m].first <= cid) {
2341 a = m;
2342 } else {
2343 b = m;
2344 }
2345 }
2346 if (cid <= widths.exceps[a].last) {
2347 w = widths.exceps[a].width;
2348 }
2349 }
2350 return w;
2351}
2352
2353double GfxCIDFont::getWidth(char *s, int len) const
2354{
2355 int nUsed;
2356 CharCode c;
2357
2358 CID cid = cMap->getCID(s, len, c: &c, nUsed: &nUsed);
2359 return getWidth(cid);
2360}
2361
2362//------------------------------------------------------------------------
2363// GfxFontDict
2364//------------------------------------------------------------------------
2365
2366GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict)
2367{
2368 Ref r;
2369
2370 fonts.resize(new_size: fontDict->getLength());
2371 for (std::size_t i = 0; i < fonts.size(); ++i) {
2372 const Object &obj1 = fontDict->getValNF(i);
2373 Object obj2 = obj1.fetch(xref);
2374 if (obj2.isDict()) {
2375 if (obj1.isRef()) {
2376 r = obj1.getRef();
2377 } else if (fontDictRef) {
2378 // legal generation numbers are five digits, so we use a
2379 // 6-digit number here
2380 r.gen = 100000 + fontDictRef->num;
2381 r.num = i;
2382 } else {
2383 // no indirect reference for this font, or for the containing
2384 // font dict, so hash the font and use that
2385 r.gen = 100000;
2386 r.num = hashFontObject(obj: &obj2);
2387 }
2388 fonts[i] = GfxFont::makeFont(xref, tagA: fontDict->getKey(i), idA: r, fontDict: obj2.getDict());
2389 if (fonts[i] && !fonts[i]->isOk()) {
2390 // XXX: it may be meaningful to distinguish between
2391 // NULL and !isOk() so that when we do lookups
2392 // we can tell the difference between a missing font
2393 // and a font that is just !isOk()
2394 fonts[i].reset();
2395 }
2396 } else {
2397 error(category: errSyntaxError, pos: -1, msg: "font resource is not a dictionary");
2398 fonts[i] = nullptr;
2399 }
2400 }
2401}
2402
2403std::shared_ptr<GfxFont> GfxFontDict::lookup(const char *tag) const
2404{
2405 for (const auto &font : fonts) {
2406 if (font && font->matches(tagA: tag)) {
2407 return font;
2408 }
2409 }
2410 return nullptr;
2411}
2412
2413// FNV-1a hash
2414class FNVHash
2415{
2416public:
2417 FNVHash() { h = 2166136261U; }
2418
2419 void hash(char c)
2420 {
2421 h ^= c & 0xff;
2422 h *= 16777619;
2423 }
2424
2425 void hash(const char *p, int n)
2426 {
2427 int i;
2428 for (i = 0; i < n; ++i) {
2429 hash(c: p[i]);
2430 }
2431 }
2432
2433 int get31() { return (h ^ (h >> 31)) & 0x7fffffff; }
2434
2435private:
2436 unsigned int h;
2437};
2438
2439int GfxFontDict::hashFontObject(Object *obj)
2440{
2441 FNVHash h;
2442
2443 hashFontObject1(obj, h: &h);
2444 return h.get31();
2445}
2446
2447void GfxFontDict::hashFontObject1(const Object *obj, FNVHash *h)
2448{
2449 const GooString *s;
2450 const char *p;
2451 double r;
2452 int n, i;
2453
2454 switch (obj->getType()) {
2455 case objBool:
2456 h->hash(c: 'b');
2457 h->hash(c: obj->getBool() ? 1 : 0);
2458 break;
2459 case objInt:
2460 h->hash(c: 'i');
2461 n = obj->getInt();
2462 h->hash(p: (char *)&n, n: sizeof(int));
2463 break;
2464 case objReal:
2465 h->hash(c: 'r');
2466 r = obj->getReal();
2467 h->hash(p: (char *)&r, n: sizeof(double));
2468 break;
2469 case objString:
2470 h->hash(c: 's');
2471 s = obj->getString();
2472 h->hash(p: s->c_str(), n: s->getLength());
2473 break;
2474 case objName:
2475 h->hash(c: 'n');
2476 p = obj->getName();
2477 h->hash(p, n: (int)strlen(s: p));
2478 break;
2479 case objNull:
2480 h->hash(c: 'z');
2481 break;
2482 case objArray:
2483 h->hash(c: 'a');
2484 n = obj->arrayGetLength();
2485 h->hash(p: (char *)&n, n: sizeof(int));
2486 for (i = 0; i < n; ++i) {
2487 const Object &obj2 = obj->arrayGetNF(i);
2488 hashFontObject1(obj: &obj2, h);
2489 }
2490 break;
2491 case objDict:
2492 h->hash(c: 'd');
2493 n = obj->dictGetLength();
2494 h->hash(p: (char *)&n, n: sizeof(int));
2495 for (i = 0; i < n; ++i) {
2496 p = obj->dictGetKey(i);
2497 h->hash(p, n: (int)strlen(s: p));
2498 const Object &obj2 = obj->dictGetValNF(i);
2499 hashFontObject1(obj: &obj2, h);
2500 }
2501 break;
2502 case objStream:
2503 // this should never happen - streams must be indirect refs
2504 break;
2505 case objRef:
2506 h->hash(c: 'f');
2507 n = obj->getRefNum();
2508 h->hash(p: (char *)&n, n: sizeof(int));
2509 n = obj->getRefGen();
2510 h->hash(p: (char *)&n, n: sizeof(int));
2511 break;
2512 default:
2513 h->hash(c: 'u');
2514 break;
2515 }
2516}
2517

source code of poppler/poppler/GfxFont.cc