1 | //======================================================================== |
2 | // |
3 | // CharCodeToUnicode.h |
4 | // |
5 | // Mapping from character codes to Unicode. |
6 | // |
7 | // Copyright 2001-2003 Glyph & Cog, LLC |
8 | // |
9 | //======================================================================== |
10 | |
11 | //======================================================================== |
12 | // |
13 | // Modified under the Poppler project - http://poppler.freedesktop.org |
14 | // |
15 | // All changes made under the Poppler project to this file are licensed |
16 | // under GPL version 2 or later |
17 | // |
18 | // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org> |
19 | // Copyright (C) 2007 Koji Otani <sho@bbr.jp> |
20 | // Copyright (C) 2008, 2011, 2012, 2018, 2019, 2021, 2022 Albert Astals Cid <aacid@kde.org> |
21 | // Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com> |
22 | // Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich |
23 | // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de> |
24 | // Copyright (C) 2019 <corentinf@free.fr> |
25 | // Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <sune@vuorela.dk> |
26 | // |
27 | // To see a description of the changes please see the Changelog file that |
28 | // came with your tarball or type make ChangeLog if you are building from git |
29 | // |
30 | //======================================================================== |
31 | |
32 | #ifndef CHARCODETOUNICODE_H |
33 | #define CHARCODETOUNICODE_H |
34 | |
35 | #include <atomic> |
36 | #include <optional> |
37 | #include <vector> |
38 | |
39 | #include "poppler-config.h" |
40 | #include "CharTypes.h" |
41 | |
42 | class GooString; |
43 | |
44 | //------------------------------------------------------------------------ |
45 | |
46 | class CharCodeToUnicode |
47 | { |
48 | friend class UnicodeToCharCode; |
49 | |
50 | public: |
51 | // Create an identity mapping (Unicode = CharCode). |
52 | static CharCodeToUnicode *makeIdentityMapping(); |
53 | |
54 | // Read the CID-to-Unicode mapping for <collection> from the file |
55 | // specified by <fileName>. Sets the initial reference count to 1. |
56 | // Returns NULL on failure. |
57 | static CharCodeToUnicode *parseCIDToUnicode(const char *fileName, const GooString *collection); |
58 | |
59 | // Create the CharCode-to-Unicode mapping for an 8-bit font. |
60 | // <toUnicode> is an array of 256 Unicode indexes. Sets the initial |
61 | // reference count to 1. |
62 | static CharCodeToUnicode *make8BitToUnicode(Unicode *toUnicode); |
63 | |
64 | // Parse a ToUnicode CMap for an 8- or 16-bit font. |
65 | static CharCodeToUnicode *parseCMap(const GooString *buf, int nBits); |
66 | static CharCodeToUnicode *parseCMapFromFile(const GooString *fileName, int nBits); |
67 | |
68 | // Parse a ToUnicode CMap for an 8- or 16-bit font, merging it into |
69 | // <this>. |
70 | void mergeCMap(const GooString *buf, int nBits); |
71 | |
72 | ~CharCodeToUnicode() = default; |
73 | |
74 | CharCodeToUnicode(const CharCodeToUnicode &) = delete; |
75 | CharCodeToUnicode &operator=(const CharCodeToUnicode &) = delete; |
76 | |
77 | void incRefCnt(); |
78 | void decRefCnt(); |
79 | |
80 | // Return true if this mapping matches the specified <tagA>. |
81 | bool match(const GooString *tagA); |
82 | |
83 | // Set the mapping for <c>. |
84 | void setMapping(CharCode c, Unicode *u, int len); |
85 | |
86 | // Map a CharCode to Unicode. Returns a pointer in u to internal storage |
87 | // so never store the pointers it returns, just the data, otherwise |
88 | // your pointed values might get changed by future calls |
89 | int mapToUnicode(CharCode c, Unicode const **u) const; |
90 | |
91 | // Map a Unicode to CharCode. |
92 | int mapToCharCode(const Unicode *u, CharCode *c, int usize) const; |
93 | |
94 | private: |
95 | struct CharCodeToUnicodeString |
96 | { |
97 | CharCode c; |
98 | std::vector<Unicode> u; |
99 | }; |
100 | bool parseCMap1(int (*getCharFunc)(void *), void *data, int nBits); |
101 | void addMapping(CharCode code, char *uStr, int n, int offset); |
102 | void addMappingInt(CharCode code, Unicode u); |
103 | CharCodeToUnicode(); |
104 | explicit CharCodeToUnicode(const std::optional<std::string> &tagA); |
105 | CharCodeToUnicode(const std::optional<std::string> &tagA, std::vector<Unicode> &&mapA, std::vector<CharCodeToUnicodeString> &&sMapA); |
106 | |
107 | const std::optional<std::string> tag; |
108 | std::vector<Unicode> map; |
109 | std::vector<CharCodeToUnicodeString> sMap; |
110 | std::atomic_int refCnt; |
111 | bool isIdentity; |
112 | }; |
113 | |
114 | //------------------------------------------------------------------------ |
115 | |
116 | class CharCodeToUnicodeCache |
117 | { |
118 | public: |
119 | explicit CharCodeToUnicodeCache(int sizeA); |
120 | ~CharCodeToUnicodeCache(); |
121 | |
122 | CharCodeToUnicodeCache(const CharCodeToUnicodeCache &) = delete; |
123 | CharCodeToUnicodeCache &operator=(const CharCodeToUnicodeCache &) = delete; |
124 | |
125 | // Get the CharCodeToUnicode object for <tag>. Increments its |
126 | // reference count; there will be one reference for the cache plus |
127 | // one for the caller of this function. Returns NULL on failure. |
128 | CharCodeToUnicode *getCharCodeToUnicode(const GooString *tag); |
129 | |
130 | // Insert <ctu> into the cache, in the most-recently-used position. |
131 | void add(CharCodeToUnicode *ctu); |
132 | |
133 | private: |
134 | CharCodeToUnicode **cache; |
135 | int size; |
136 | }; |
137 | |
138 | #endif |
139 | |