1//========================================================================
2//
3// CharCodeToUnicode.cc
4//
5// Copyright 2001-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2006, 2008-2010, 2012, 2018-2022, 2024 Albert Astals Cid <aacid@kde.org>
17// Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
18// Copyright (C) 2007 Koji Otani <sho@bbr.jp>
19// Copyright (C) 2008 Michael Vrable <mvrable@cs.ucsd.edu>
20// Copyright (C) 2008 Vasile Gaburici <gaburici@cs.umd.edu>
21// Copyright (C) 2010 William Bader <williambader@hotmail.com>
22// Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
23// Copyright (C) 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
24// Copyright (C) 2012, 2017 Adrian Johnson <ajohnson@redneon.com>
25// Copyright (C) 2014 Jiri Slaby <jirislaby@gmail.com>
26// Copyright (C) 2015 Marek Kasik <mkasik@redhat.com>
27// Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr>
28// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
29// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
30// Copyright (C) 2019 <corentinf@free.fr>
31// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
32//
33// To see a description of the changes please see the Changelog file that
34// came with your tarball or type make ChangeLog if you are building from git
35//
36//========================================================================
37
38#include <config.h>
39
40#include <cstdio>
41#include <cstring>
42#include <functional>
43#include "goo/glibc.h"
44#include "goo/gmem.h"
45#include "goo/gfile.h"
46#include "goo/GooLikely.h"
47#include "goo/GooString.h"
48#include "Error.h"
49#include "GlobalParams.h"
50#include "PSTokenizer.h"
51#include "CharCodeToUnicode.h"
52#include "UTF.h"
53
54//------------------------------------------------------------------------
55
56//------------------------------------------------------------------------
57
58static int getCharFromString(void *data)
59{
60 unsigned char *p;
61 int c;
62
63 p = *(unsigned char **)data;
64 if (*p) {
65 c = *p++;
66 *(unsigned char **)data = p;
67 } else {
68 c = EOF;
69 }
70 return c;
71}
72
73static int getCharFromFile(void *data)
74{
75 return fgetc(stream: (FILE *)data);
76}
77
78//------------------------------------------------------------------------
79
80static const int hexCharVals[256] = {
81 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
82 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
83 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
84 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 3x
85 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
86 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 5x
87 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
88 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
89 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
90 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
91 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
92 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
93 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
94 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
95 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
96 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 // Fx
97};
98
99// Parse a <len>-byte hex string <s> into *<val>. Returns false on
100// error.
101static bool parseHex(const char *s, int len, unsigned int *val)
102{
103 int i, x, v = 0;
104
105 for (i = 0; i < len; ++i) {
106 x = hexCharVals[s[i] & 0xff];
107 if (x < 0) {
108 *val = 0;
109 return false;
110 }
111 v = (v << 4) + x;
112 }
113 *val = v;
114 return true;
115}
116
117//------------------------------------------------------------------------
118
119CharCodeToUnicode *CharCodeToUnicode::makeIdentityMapping()
120{
121 CharCodeToUnicode *ctu = new CharCodeToUnicode();
122 ctu->isIdentity = true;
123 ctu->map.resize(new_size: 1, x: 0);
124 return ctu;
125}
126CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(const char *fileName, const GooString *collection)
127{
128 FILE *f;
129 CharCode size;
130 char buf[64];
131 Unicode u;
132
133 if (!(f = openFile(path: fileName, mode: "r"))) {
134 error(category: errIO, pos: -1, msg: "Couldn't open cidToUnicode file '{0:s}'", fileName);
135 return nullptr;
136 }
137
138 size = 32768;
139 std::vector<Unicode> mapA;
140 mapA.resize(new_size: size, x: 0);
141 CharCode mapLenA = 0;
142
143 while (getLine(buf, size: sizeof(buf), f)) {
144 if (mapLenA == size) {
145 size *= 2;
146 mapA.resize(new_size: size);
147 }
148 if (sscanf(s: buf, format: "%x", &u) == 1) {
149 mapA[mapLenA] = u;
150 } else {
151 error(category: errSyntaxWarning, pos: -1, msg: "Bad line ({0:d}) in cidToUnicode file '{1:s}'", (int)(mapLenA + 1), fileName);
152 mapA[mapLenA] = 0;
153 }
154 ++mapLenA;
155 }
156 fclose(stream: f);
157 mapA.resize(new_size: mapLenA);
158
159 return new CharCodeToUnicode(collection->toStr(), std::move(mapA), {});
160}
161
162CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode)
163{
164 std::vector<Unicode> data(toUnicode, toUnicode + 256);
165 return new CharCodeToUnicode({}, std::move(data), {});
166}
167
168CharCodeToUnicode *CharCodeToUnicode::parseCMap(const GooString *buf, int nBits)
169{
170 CharCodeToUnicode *ctu;
171
172 ctu = new CharCodeToUnicode(std::optional<std::string>());
173 const char *p = buf->c_str();
174 if (!ctu->parseCMap1(getCharFunc: &getCharFromString, data: &p, nBits)) {
175 delete ctu;
176 return nullptr;
177 }
178 return ctu;
179}
180
181CharCodeToUnicode *CharCodeToUnicode::parseCMapFromFile(const GooString *fileName, int nBits)
182{
183 CharCodeToUnicode *ctu;
184 FILE *f;
185
186 ctu = new CharCodeToUnicode(std::optional<std::string>());
187 if ((f = globalParams->findToUnicodeFile(name: fileName))) {
188 if (!ctu->parseCMap1(getCharFunc: &getCharFromFile, data: f, nBits)) {
189 delete ctu;
190 fclose(stream: f);
191 return nullptr;
192 }
193 } else {
194 error(category: errSyntaxError, pos: -1, msg: "Couldn't find ToUnicode CMap file for '{0:t}'", fileName);
195 }
196 return ctu;
197}
198
199void CharCodeToUnicode::mergeCMap(const GooString *buf, int nBits)
200{
201 const char *p = buf->c_str();
202 parseCMap1(getCharFunc: &getCharFromString, data: &p, nBits);
203}
204
205bool CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data, int nBits)
206{
207 PSTokenizer *pst;
208 char tok1[256], tok2[256], tok3[256];
209 int n1, n2, n3;
210 CharCode i;
211 CharCode maxCode, code1, code2;
212 GooString *name;
213 FILE *f;
214
215 bool ok = false;
216 maxCode = (nBits == 8) ? 0xff : (nBits == 16) ? 0xffff : 0xffffffff;
217 pst = new PSTokenizer(getCharFunc, data);
218 pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1);
219 while (pst->getToken(buf: tok2, size: sizeof(tok2), length: &n2)) {
220 if (!strcmp(s1: tok2, s2: "usecmap")) {
221 if (tok1[0] == '/') {
222 name = new GooString(tok1 + 1);
223 if ((f = globalParams->findToUnicodeFile(name))) {
224 if (parseCMap1(getCharFunc: &getCharFromFile, data: f, nBits)) {
225 ok = true;
226 }
227 fclose(stream: f);
228 } else {
229 error(category: errSyntaxError, pos: -1, msg: "Couldn't find ToUnicode CMap file for '{0:t}'", name);
230 }
231 delete name;
232 }
233 pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1);
234 } else if (!strcmp(s1: tok2, s2: "beginbfchar")) {
235 while (pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1)) {
236 if (!strcmp(s1: tok1, s2: "endbfchar")) {
237 break;
238 }
239 if (!pst->getToken(buf: tok2, size: sizeof(tok2), length: &n2) || !strcmp(s1: tok2, s2: "endbfchar")) {
240 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in bfchar block in ToUnicode CMap");
241 break;
242 }
243 if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
244 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in bfchar block in ToUnicode CMap");
245 continue;
246 }
247 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
248 if (!parseHex(s: tok1 + 1, len: n1 - 2, val: &code1)) {
249 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in bfchar block in ToUnicode CMap");
250 continue;
251 }
252 if (code1 > maxCode) {
253 error(category: errSyntaxWarning, pos: -1, msg: "Invalid entry in bfchar block in ToUnicode CMap");
254 }
255 addMapping(code: code1, uStr: tok2 + 1, n: n2 - 2, offset: 0);
256 ok = true;
257 }
258 pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1);
259 } else if (!strcmp(s1: tok2, s2: "beginbfrange")) {
260 while (pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1)) {
261 if (!strcmp(s1: tok1, s2: "endbfrange")) {
262 break;
263 }
264 if (!pst->getToken(buf: tok2, size: sizeof(tok2), length: &n2) || !strcmp(s1: tok2, s2: "endbfrange") || !pst->getToken(buf: tok3, size: sizeof(tok3), length: &n3) || !strcmp(s1: tok3, s2: "endbfrange")) {
265 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in bfrange block in ToUnicode CMap");
266 break;
267 }
268 if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
269 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in bfrange block in ToUnicode CMap");
270 continue;
271 }
272 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
273 if (!parseHex(s: tok1 + 1, len: n1 - 2, val: &code1) || !parseHex(s: tok2 + 1, len: n2 - 2, val: &code2)) {
274 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in bfrange block in ToUnicode CMap");
275 continue;
276 }
277 if (code1 > maxCode || code2 > maxCode) {
278 error(category: errSyntaxWarning, pos: -1, msg: "Invalid entry in bfrange block in ToUnicode CMap");
279 if (code1 > maxCode) {
280 code1 = maxCode;
281 }
282 if (code2 > maxCode) {
283 code2 = maxCode;
284 }
285 }
286 if (!strcmp(s1: tok3, s2: "[")) {
287 i = 0;
288 while (pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1) && code1 + i <= code2) {
289 if (!strcmp(s1: tok1, s2: "]")) {
290 break;
291 }
292 if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
293 tok1[n1 - 1] = '\0';
294 addMapping(code: code1 + i, uStr: tok1 + 1, n: n1 - 2, offset: 0);
295 ok = true;
296 } else {
297 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in bfrange block in ToUnicode CMap");
298 }
299 ++i;
300 }
301 } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
302 tok3[n3 - 1] = '\0';
303 for (i = 0; code1 <= code2; ++code1, ++i) {
304 addMapping(code: code1, uStr: tok3 + 1, n: n3 - 2, offset: i);
305 ok = true;
306 }
307
308 } else {
309 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in bfrange block in ToUnicode CMap");
310 }
311 }
312 pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1);
313 } else if (!strcmp(s1: tok2, s2: "begincidchar")) {
314 // the begincidchar operator is not allowed in ToUnicode CMaps,
315 // but some buggy PDF generators incorrectly use
316 // code-to-CID-type CMaps here
317 error(category: errSyntaxWarning, pos: -1, msg: "Invalid 'begincidchar' operator in ToUnicode CMap");
318 while (pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1)) {
319 if (!strcmp(s1: tok1, s2: "endcidchar")) {
320 break;
321 }
322 if (!pst->getToken(buf: tok2, size: sizeof(tok2), length: &n2) || !strcmp(s1: tok2, s2: "endcidchar")) {
323 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in cidchar block in ToUnicode CMap");
324 break;
325 }
326 if (!(tok1[0] == '<' && tok1[n1 - 1] == '>')) {
327 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in cidchar block in ToUnicode CMap");
328 continue;
329 }
330 tok1[n1 - 1] = '\0';
331 if (!parseHex(s: tok1 + 1, len: n1 - 2, val: &code1)) {
332 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in cidchar block in ToUnicode CMap");
333 continue;
334 }
335 if (code1 > maxCode) {
336 error(category: errSyntaxWarning, pos: -1, msg: "Invalid entry in cidchar block in ToUnicode CMap");
337 }
338 addMappingInt(code: code1, u: atoi(nptr: tok2));
339 ok = true;
340 }
341 pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1);
342 } else if (!strcmp(s1: tok2, s2: "begincidrange")) {
343 // the begincidrange operator is not allowed in ToUnicode CMaps,
344 // but some buggy PDF generators incorrectly use
345 // code-to-CID-type CMaps here
346 error(category: errSyntaxWarning, pos: -1, msg: "Invalid 'begincidrange' operator in ToUnicode CMap");
347 while (pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1)) {
348 if (!strcmp(s1: tok1, s2: "endcidrange")) {
349 break;
350 }
351 if (!pst->getToken(buf: tok2, size: sizeof(tok2), length: &n2) || !strcmp(s1: tok2, s2: "endcidrange") || !pst->getToken(buf: tok3, size: sizeof(tok3), length: &n3) || !strcmp(s1: tok3, s2: "endcidrange")) {
352 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in cidrange block in ToUnicode CMap");
353 break;
354 }
355 if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
356 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in cidrange block in ToUnicode CMap");
357 continue;
358 }
359 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
360 if (!parseHex(s: tok1 + 1, len: n1 - 2, val: &code1) || !parseHex(s: tok2 + 1, len: n2 - 2, val: &code2)) {
361 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in cidrange block in ToUnicode CMap");
362 continue;
363 }
364 if (code1 > maxCode || code2 > maxCode) {
365 error(category: errSyntaxWarning, pos: -1, msg: "Invalid entry in cidrange block in ToUnicode CMap");
366 if (code2 > maxCode) {
367 code2 = maxCode;
368 }
369 }
370 for (i = atoi(nptr: tok3); code1 <= code2; ++code1, ++i) {
371 addMappingInt(code: code1, u: i);
372 ok = true;
373 }
374 }
375 pst->getToken(buf: tok1, size: sizeof(tok1), length: &n1);
376 } else {
377 strcpy(dest: tok1, src: tok2);
378 }
379 }
380 delete pst;
381 return ok;
382}
383
384void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n, int offset)
385{
386 Unicode u;
387 int j;
388
389 if (code > 0xffffff) {
390 // This is an arbitrary limit to avoid integer overflow issues.
391 // (I've seen CMaps with mappings for <ffffffff>.)
392 return;
393 }
394 if (code >= map.size()) {
395 size_t oldLen = map.size();
396 auto newLen = oldLen ? 2 * oldLen : 256;
397 if (code >= newLen) {
398 newLen = (code + 256) & ~255;
399 }
400 if (unlikely(code >= newLen)) {
401 error(category: errSyntaxWarning, pos: -1, msg: "Illegal code value in CharCodeToUnicode::addMapping");
402 return;
403 } else {
404 map.resize(new_size: newLen, x: 0);
405 }
406 }
407 if (n <= 4) {
408 if (!parseHex(s: uStr, len: n, val: &u)) {
409 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in ToUnicode CMap");
410 return;
411 }
412 map[code] = u + offset;
413 if (!UnicodeIsValid(ucs4: map[code])) {
414 map[code] = 0xfffd;
415 }
416 } else {
417 map[code] = 0;
418 int utf16Len = n / 4;
419 std::vector<Unicode> utf16(utf16Len);
420 utf16.resize(new_size: utf16Len);
421 for (j = 0; j < utf16Len; ++j) {
422 if (!parseHex(s: uStr + j * 4, len: 4, val: &utf16[j])) {
423 error(category: errSyntaxWarning, pos: -1, msg: "Illegal entry in ToUnicode CMap");
424 return;
425 }
426 }
427 utf16[utf16Len - 1] += offset;
428 sMap.push_back(x: { .c: code, .u: UTF16toUCS4(utf16: utf16.data(), utf16Len: utf16.size()) });
429 }
430}
431
432void CharCodeToUnicode::addMappingInt(CharCode code, Unicode u)
433{
434 if (code > 0xffffff) {
435 // This is an arbitrary limit to avoid integer overflow issues.
436 // (I've seen CMaps with mappings for <ffffffff>.)
437 return;
438 }
439 if (code >= map.size()) {
440 size_t oldLen = map.size();
441 size_t newLen = oldLen ? 2 * oldLen : 256;
442 if (code >= newLen) {
443 newLen = (code + 256) & ~255;
444 }
445 map.resize(new_size: newLen, x: 0);
446 }
447 map[code] = u;
448}
449
450CharCodeToUnicode::CharCodeToUnicode()
451{
452 refCnt = 1;
453 isIdentity = false;
454}
455
456CharCodeToUnicode::CharCodeToUnicode(const std::optional<std::string> &tagA) : tag(tagA)
457{
458 map.resize(new_size: 256, x: 0);
459 refCnt = 1;
460 isIdentity = false;
461}
462CharCodeToUnicode::CharCodeToUnicode(const std::optional<std::string> &tagA, std::vector<Unicode> &&mapA, std::vector<CharCodeToUnicodeString> &&sMapA) : tag(tagA)
463{
464 map = std::move(mapA);
465 sMap = std::move(sMapA);
466 refCnt = 1;
467 isIdentity = false;
468}
469
470void CharCodeToUnicode::incRefCnt()
471{
472 ++refCnt;
473}
474
475void CharCodeToUnicode::decRefCnt()
476{
477 if (--refCnt == 0) {
478 delete this;
479 }
480}
481
482bool CharCodeToUnicode::match(const GooString *tagA)
483{
484 return tag && tag == tagA->toStr();
485}
486
487void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len)
488{
489 size_t i;
490 int j;
491
492 if (map.empty() || isIdentity) {
493 return;
494 }
495 if (len == 1) {
496 map[c] = u[0];
497 } else {
498 std::optional<std::reference_wrapper<CharCodeToUnicodeString>> element;
499 for (i = 0; i < sMap.size(); ++i) {
500 if (sMap[i].c == c) {
501 sMap[i].u.clear();
502 element = std::ref(t&: sMap[i]);
503 break;
504 }
505 }
506 if (!element) {
507 sMap.emplace_back();
508 element = std::ref(t&: sMap.back());
509 }
510 map[c] = 0;
511 element->get().c = c;
512 element->get().u.reserve(n: len);
513 for (j = 0; j < len; ++j) {
514 if (UnicodeIsValid(ucs4: u[j])) {
515 element->get().u.push_back(x: u[j]);
516 } else {
517 element->get().u.push_back(x: 0xfffd);
518 }
519 }
520 }
521}
522
523int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode const **u) const
524{
525 if (isIdentity) {
526 auto that = const_cast<CharCodeToUnicode *>(this);
527 that->map[0] = (Unicode)c;
528 *u = map.data();
529 return 1;
530 }
531 if (c >= map.size()) {
532 return 0;
533 }
534 if (map[c]) {
535 *u = &map[c];
536 return 1;
537 }
538 for (auto i = sMap.size(); i > 0; --i) { // in reverse so CMap takes precedence
539 if (sMap[i - 1].c == c) {
540 *u = sMap[i - 1].u.data();
541 return sMap[i - 1].u.size();
542 }
543 }
544 return 0;
545}
546
547int CharCodeToUnicode::mapToCharCode(const Unicode *u, CharCode *c, int usize) const
548{
549 // look for charcode in map
550 if (usize == 1 || (usize > 1 && !(*u & ~0xff))) {
551 if (isIdentity) {
552 *c = (CharCode)*u;
553 return 1;
554 }
555 for (CharCode i = 0; i < map.size(); i++) {
556 if (map[i] == *u) {
557 *c = i;
558 return 1;
559 }
560 }
561 *c = 'x';
562 } else {
563 size_t j;
564 // for each entry in the sMap
565 for (const auto &element : sMap) {
566 // if the entry's unicode length isn't the same are usize, the strings
567 // are obviously different
568 if (element.u.size() != size_t(usize)) {
569 continue;
570 }
571 // compare the string char by char
572 for (j = 0; j < element.u.size(); j++) {
573 if (element.u[j] != u[j]) {
574 break;
575 }
576 }
577
578 // we have the same strings
579 if (j == element.u.size()) {
580 *c = element.c;
581 return 1;
582 }
583 }
584 }
585 return 0;
586}
587
588//------------------------------------------------------------------------
589
590CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA)
591{
592 int i;
593
594 size = sizeA;
595 cache = (CharCodeToUnicode **)gmallocn(count: size, size: sizeof(CharCodeToUnicode *));
596 for (i = 0; i < size; ++i) {
597 cache[i] = nullptr;
598 }
599}
600
601CharCodeToUnicodeCache::~CharCodeToUnicodeCache()
602{
603 int i;
604
605 for (i = 0; i < size; ++i) {
606 if (cache[i]) {
607 cache[i]->decRefCnt();
608 }
609 }
610 gfree(p: cache);
611}
612
613CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(const GooString *tag)
614{
615 CharCodeToUnicode *ctu;
616 int i, j;
617
618 if (cache[0] && cache[0]->match(tagA: tag)) {
619 cache[0]->incRefCnt();
620 return cache[0];
621 }
622 for (i = 1; i < size; ++i) {
623 if (cache[i] && cache[i]->match(tagA: tag)) {
624 ctu = cache[i];
625 for (j = i; j >= 1; --j) {
626 cache[j] = cache[j - 1];
627 }
628 cache[0] = ctu;
629 ctu->incRefCnt();
630 return ctu;
631 }
632 }
633 return nullptr;
634}
635
636void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu)
637{
638 int i;
639
640 if (cache[size - 1]) {
641 cache[size - 1]->decRefCnt();
642 }
643 for (i = size - 1; i >= 1; --i) {
644 cache[i] = cache[i - 1];
645 }
646 cache[0] = ctu;
647 ctu->incRefCnt();
648}
649

source code of poppler/poppler/CharCodeToUnicode.cc