1//========================================================================
2//
3// CharCodeToUnicode.h
4//
5// Mapping from character codes to Unicode.
6//
7// Copyright 2001-2003 Glyph & Cog, LLC
8//
9//========================================================================
10
11//========================================================================
12//
13// Modified under the Poppler project - http://poppler.freedesktop.org
14//
15// All changes made under the Poppler project to this file are licensed
16// under GPL version 2 or later
17//
18// Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
19// Copyright (C) 2007 Koji Otani <sho@bbr.jp>
20// Copyright (C) 2008, 2011, 2012, 2018, 2019, 2021, 2022 Albert Astals Cid <aacid@kde.org>
21// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
22// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
23// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
24// Copyright (C) 2019 <corentinf@free.fr>
25// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk>
26//
27// To see a description of the changes please see the Changelog file that
28// came with your tarball or type make ChangeLog if you are building from git
29//
30//========================================================================
31
32#ifndef CHARCODETOUNICODE_H
33#define CHARCODETOUNICODE_H
34
35#include <atomic>
36#include <optional>
37#include <vector>
38
39#include "poppler-config.h"
40#include "CharTypes.h"
41
42class GooString;
43
44//------------------------------------------------------------------------
45
46class CharCodeToUnicode
47{
48 friend class UnicodeToCharCode;
49
50public:
51 // Create an identity mapping (Unicode = CharCode).
52 static CharCodeToUnicode *makeIdentityMapping();
53
54 // Read the CID-to-Unicode mapping for <collection> from the file
55 // specified by <fileName>. Sets the initial reference count to 1.
56 // Returns NULL on failure.
57 static CharCodeToUnicode *parseCIDToUnicode(const char *fileName, const GooString *collection);
58
59 // Create the CharCode-to-Unicode mapping for an 8-bit font.
60 // <toUnicode> is an array of 256 Unicode indexes. Sets the initial
61 // reference count to 1.
62 static CharCodeToUnicode *make8BitToUnicode(Unicode *toUnicode);
63
64 // Parse a ToUnicode CMap for an 8- or 16-bit font.
65 static CharCodeToUnicode *parseCMap(const GooString *buf, int nBits);
66 static CharCodeToUnicode *parseCMapFromFile(const GooString *fileName, int nBits);
67
68 // Parse a ToUnicode CMap for an 8- or 16-bit font, merging it into
69 // <this>.
70 void mergeCMap(const GooString *buf, int nBits);
71
72 ~CharCodeToUnicode() = default;
73
74 CharCodeToUnicode(const CharCodeToUnicode &) = delete;
75 CharCodeToUnicode &operator=(const CharCodeToUnicode &) = delete;
76
77 void incRefCnt();
78 void decRefCnt();
79
80 // Return true if this mapping matches the specified <tagA>.
81 bool match(const GooString *tagA);
82
83 // Set the mapping for <c>.
84 void setMapping(CharCode c, Unicode *u, int len);
85
86 // Map a CharCode to Unicode. Returns a pointer in u to internal storage
87 // so never store the pointers it returns, just the data, otherwise
88 // your pointed values might get changed by future calls
89 int mapToUnicode(CharCode c, Unicode const **u) const;
90
91 // Map a Unicode to CharCode.
92 int mapToCharCode(const Unicode *u, CharCode *c, int usize) const;
93
94private:
95 struct CharCodeToUnicodeString
96 {
97 CharCode c;
98 std::vector<Unicode> u;
99 };
100 bool parseCMap1(int (*getCharFunc)(void *), void *data, int nBits);
101 void addMapping(CharCode code, char *uStr, int n, int offset);
102 void addMappingInt(CharCode code, Unicode u);
103 CharCodeToUnicode();
104 explicit CharCodeToUnicode(const std::optional<std::string> &tagA);
105 CharCodeToUnicode(const std::optional<std::string> &tagA, std::vector<Unicode> &&mapA, std::vector<CharCodeToUnicodeString> &&sMapA);
106
107 const std::optional<std::string> tag;
108 std::vector<Unicode> map;
109 std::vector<CharCodeToUnicodeString> sMap;
110 std::atomic_int refCnt;
111 bool isIdentity;
112};
113
114//------------------------------------------------------------------------
115
116class CharCodeToUnicodeCache
117{
118public:
119 explicit CharCodeToUnicodeCache(int sizeA);
120 ~CharCodeToUnicodeCache();
121
122 CharCodeToUnicodeCache(const CharCodeToUnicodeCache &) = delete;
123 CharCodeToUnicodeCache &operator=(const CharCodeToUnicodeCache &) = delete;
124
125 // Get the CharCodeToUnicode object for <tag>. Increments its
126 // reference count; there will be one reference for the cache plus
127 // one for the caller of this function. Returns NULL on failure.
128 CharCodeToUnicode *getCharCodeToUnicode(const GooString *tag);
129
130 // Insert <ctu> into the cache, in the most-recently-used position.
131 void add(CharCodeToUnicode *ctu);
132
133private:
134 CharCodeToUnicode **cache;
135 int size;
136};
137
138#endif
139

source code of poppler/poppler/CharCodeToUnicode.h