1//========================================================================
2//
3// UnicodeMap.cc
4//
5// Copyright 2001-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
17// Copyright (C) 2017-2020, 2022 Albert Astals Cid <aacid@kde.org>
18// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
19// Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr>
20// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
21// Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
22// Copyright (C) 2019 Volker Krause <vkrause@kde.org>
23//
24// To see a description of the changes please see the Changelog file that
25// came with your tarball or type make ChangeLog if you are building from git
26//
27//========================================================================
28
29#include <config.h>
30
31#include <cstdio>
32#include <cstring>
33#include "goo/glibc.h"
34#include "goo/gmem.h"
35#include "goo/gfile.h"
36#include "goo/GooString.h"
37#include "Error.h"
38#include "GlobalParams.h"
39#include "UnicodeMap.h"
40
41//------------------------------------------------------------------------
42
43#define maxExtCode 16
44
45struct UnicodeMapExt
46{
47 Unicode u; // Unicode char
48 char code[maxExtCode];
49 unsigned int nBytes;
50};
51
52//------------------------------------------------------------------------
53
54std::unique_ptr<UnicodeMap> UnicodeMap::parse(const std::string &encodingNameA)
55{
56 FILE *f;
57 UnicodeMapRange *range;
58 UnicodeMapExt *eMap;
59 int size, eMapsSize;
60 char buf[256];
61 int line, nBytes, i;
62 char *tok1, *tok2, *tok3;
63 char *tokptr;
64
65 if (!(f = globalParams->getUnicodeMapFile(encodingName: encodingNameA))) {
66 error(category: errSyntaxError, pos: -1, msg: "Couldn't find unicodeMap file for the '{0:s}' encoding", encodingNameA.c_str());
67 return {};
68 }
69
70 auto map = std::unique_ptr<UnicodeMap>(new UnicodeMap(encodingNameA));
71
72 size = 8;
73 UnicodeMapRange *customRanges = (UnicodeMapRange *)gmallocn(count: size, size: sizeof(UnicodeMapRange));
74 eMapsSize = 0;
75
76 line = 1;
77 while (getLine(buf, size: sizeof(buf), f)) {
78 if ((tok1 = strtok_r(s: buf, delim: " \t\r\n", save_ptr: &tokptr)) && (tok2 = strtok_r(s: nullptr, delim: " \t\r\n", save_ptr: &tokptr))) {
79 if (!(tok3 = strtok_r(s: nullptr, delim: " \t\r\n", save_ptr: &tokptr))) {
80 tok3 = tok2;
81 tok2 = tok1;
82 }
83 nBytes = strlen(s: tok3) / 2;
84 if (nBytes <= 4) {
85 if (map->len == size) {
86 size *= 2;
87 customRanges = (UnicodeMapRange *)greallocn(p: customRanges, count: size, size: sizeof(UnicodeMapRange));
88 }
89 range = &customRanges[map->len];
90 sscanf(s: tok1, format: "%x", &range->start);
91 sscanf(s: tok2, format: "%x", &range->end);
92 sscanf(s: tok3, format: "%x", &range->code);
93 range->nBytes = nBytes;
94 ++map->len;
95 } else if (tok2 == tok1) {
96 if (map->eMapsLen == eMapsSize) {
97 eMapsSize += 16;
98 map->eMaps = (UnicodeMapExt *)greallocn(p: map->eMaps, count: eMapsSize, size: sizeof(UnicodeMapExt));
99 }
100 eMap = &map->eMaps[map->eMapsLen];
101 sscanf(s: tok1, format: "%x", &eMap->u);
102 for (i = 0; i < nBytes; ++i) {
103 unsigned int x;
104 sscanf(s: tok3 + i * 2, format: "%2x", &x);
105 eMap->code[i] = (char)x;
106 }
107 eMap->nBytes = nBytes;
108 ++map->eMapsLen;
109 } else {
110 error(category: errSyntaxError, pos: -1, msg: "Bad line ({0:d}) in unicodeMap file for the '{1:s}' encoding", line, encodingNameA.c_str());
111 }
112 } else {
113 error(category: errSyntaxError, pos: -1, msg: "Bad line ({0:d}) in unicodeMap file for the '{1:s}' encoding", line, encodingNameA.c_str());
114 }
115 ++line;
116 }
117
118 fclose(stream: f);
119
120 map->ranges = customRanges;
121 return map;
122}
123
124UnicodeMap::UnicodeMap(const std::string &encodingNameA)
125{
126 encodingName = encodingNameA;
127 unicodeOut = false;
128 kind = unicodeMapUser;
129 ranges = nullptr;
130 len = 0;
131 eMaps = nullptr;
132 eMapsLen = 0;
133}
134
135UnicodeMap::UnicodeMap(const char *encodingNameA, bool unicodeOutA, const UnicodeMapRange *rangesA, int lenA)
136{
137 encodingName = encodingNameA;
138 unicodeOut = unicodeOutA;
139 kind = unicodeMapResident;
140 ranges = rangesA;
141 len = lenA;
142 eMaps = nullptr;
143 eMapsLen = 0;
144}
145
146UnicodeMap::UnicodeMap(const char *encodingNameA, bool unicodeOutA, UnicodeMapFunc funcA)
147{
148 encodingName = encodingNameA;
149 unicodeOut = unicodeOutA;
150 kind = unicodeMapFunc;
151 func = funcA;
152 eMaps = nullptr;
153 eMapsLen = 0;
154}
155
156UnicodeMap::~UnicodeMap()
157{
158 if (kind == unicodeMapUser && ranges) {
159 gfree(p: const_cast<UnicodeMapRange *>(ranges));
160 }
161 if (eMaps) {
162 gfree(p: eMaps);
163 }
164}
165
166UnicodeMap::UnicodeMap(UnicodeMap &&other) noexcept : encodingName { std::move(other.encodingName) }, kind { other.kind }, unicodeOut { other.unicodeOut }, len { other.len }, eMaps { other.eMaps }, eMapsLen { other.eMapsLen }
167{
168 switch (kind) {
169 case unicodeMapUser:
170 case unicodeMapResident:
171 ranges = other.ranges;
172 other.ranges = nullptr;
173 break;
174 case unicodeMapFunc:
175 func = other.func;
176 break;
177 }
178 other.eMaps = nullptr;
179}
180
181UnicodeMap &UnicodeMap::operator=(UnicodeMap &&other) noexcept
182{
183 if (this != &other) {
184 swap(other);
185 }
186 return *this;
187}
188
189void UnicodeMap::swap(UnicodeMap &other) noexcept
190{
191 using std::swap;
192 swap(lhs&: encodingName, rhs&: other.encodingName);
193 swap(a&: unicodeOut, b&: other.unicodeOut);
194 switch (kind) {
195 case unicodeMapUser:
196 case unicodeMapResident:
197 switch (other.kind) {
198 case unicodeMapUser:
199 case unicodeMapResident:
200 swap(a&: ranges, b&: other.ranges);
201 break;
202 case unicodeMapFunc: {
203 const auto tmp = ranges;
204 func = other.func;
205 other.ranges = tmp;
206 break;
207 }
208 }
209 break;
210 case unicodeMapFunc:
211 switch (other.kind) {
212 case unicodeMapUser:
213 case unicodeMapResident: {
214 const auto tmp = func;
215 ranges = other.ranges;
216 other.func = tmp;
217 break;
218 }
219 case unicodeMapFunc:
220 swap(a&: func, b&: other.func);
221 break;
222 }
223 break;
224 }
225 swap(a&: kind, b&: other.kind);
226 swap(a&: len, b&: other.len);
227 swap(a&: eMaps, b&: other.eMaps);
228 swap(a&: eMapsLen, b&: other.eMapsLen);
229}
230
231bool UnicodeMap::match(const std::string &encodingNameA) const
232{
233 return encodingName == encodingNameA;
234}
235
236int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) const
237{
238 int a, b, m, n, i, j;
239 unsigned int code;
240
241 if (kind == unicodeMapFunc) {
242 return (*func)(u, buf, bufSize);
243 }
244
245 a = 0;
246 b = len;
247 if (u >= ranges[a].start) {
248 // invariant: ranges[a].start <= u < ranges[b].start
249 while (b - a > 1) {
250 m = (a + b) / 2;
251 if (u >= ranges[m].start) {
252 a = m;
253 } else if (u < ranges[m].start) {
254 b = m;
255 }
256 }
257 if (u <= ranges[a].end) {
258 n = ranges[a].nBytes;
259 if (n > bufSize) {
260 return 0;
261 }
262 code = ranges[a].code + (u - ranges[a].start);
263 for (i = n - 1; i >= 0; --i) {
264 buf[i] = (char)(code & 0xff);
265 code >>= 8;
266 }
267 return n;
268 }
269 }
270
271 for (i = 0; i < eMapsLen; ++i) {
272 if (eMaps[i].u == u) {
273 n = eMaps[i].nBytes;
274 for (j = 0; j < n; ++j) {
275 buf[j] = eMaps[i].code[j];
276 }
277 return n;
278 }
279 }
280
281 return 0;
282}
283
284//------------------------------------------------------------------------
285
286UnicodeMapCache::UnicodeMapCache() { }
287
288const UnicodeMap *UnicodeMapCache::getUnicodeMap(const std::string &encodingName)
289{
290 for (const std::unique_ptr<UnicodeMap> &map : cache) {
291 if (map->match(encodingNameA: encodingName)) {
292 return map.get();
293 }
294 }
295 std::unique_ptr<UnicodeMap> map = UnicodeMap::parse(encodingNameA: encodingName);
296 if (map) {
297 UnicodeMap *m = map.get();
298 cache.emplace_back(args: std::move(map));
299 return m;
300 }
301 return nullptr;
302}
303

source code of poppler/poppler/UnicodeMap.cc