UnicodeMap.cc source code [poppler/poppler/UnicodeMap.cc]

1	//========================================================================
2	//
3	// UnicodeMap.cc
4	//
5	// Copyright 2001-2003 Glyph & Cog, LLC
6	//
7	//========================================================================
8
9	//========================================================================
10	//
11	// Modified under the Poppler project - http://poppler.freedesktop.org
12	//
13	// All changes made under the Poppler project to this file are licensed
14	// under GPL version 2 or later
15	//
16	// Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
17	// Copyright (C) 2017-2020, 2022 Albert Astals Cid <aacid@kde.org>
18	// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
19	// Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr>
20	// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
21	// Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
22	// Copyright (C) 2019 Volker Krause <vkrause@kde.org>
23	//
24	// To see a description of the changes please see the Changelog file that
25	// came with your tarball or type make ChangeLog if you are building from git
26	//
27	//========================================================================
28
29	#include <config.h>
30
31	#include <cstdio>
32	#include <cstring>
33	#include "goo/glibc.h"
34	#include "goo/gmem.h"
35	#include "goo/gfile.h"
36	#include "goo/GooString.h"
37	#include "Error.h"
38	#include "GlobalParams.h"
39	#include "UnicodeMap.h"
40
41	//------------------------------------------------------------------------
42
43	#define maxExtCode 16
44
45	struct UnicodeMapExt
46	{
47	Unicode u; // Unicode char
48	char code[maxExtCode];
49	unsigned int nBytes;
50	};
51
52	//------------------------------------------------------------------------
53
54	std::unique_ptr<UnicodeMap> UnicodeMap::parse(const std::string &encodingNameA)
55	{
56	FILE *f;
57	UnicodeMapRange *range;
58	UnicodeMapExt *eMap;
59	int size, eMapsSize;
60	char buf[`256`];
61	int line, nBytes, i;
62	char tok1, tok2, *tok3;
63	char *tokptr;
64
65	if (!(f = globalParams ->getUnicodeMapFile(encodingName: encodingNameA))) {
66	error(category: errSyntaxError, pos: -`1`, msg: "Couldn't find unicodeMap file for the '{0:s}' encoding", encodingNameA.c_str());
67	return {};
68	}
69
70	auto map = std::unique_ptr<UnicodeMap>(new UnicodeMap (encodingNameA));
71
72	size = `8`;
73	UnicodeMapRange customRanges = (UnicodeMapRange )gmallocn(count: size, size: sizeof(UnicodeMapRange));
74	eMapsSize = `0`;
75
76	line = `1`;
77	while (getLine(buf, size: sizeof(buf), f)) {
78	if ((tok1 = strtok_r(s: buf, delim: " \t\r\n", save_ptr: &tokptr)) && (tok2 = strtok_r(s: nullptr, delim: " \t\r\n", save_ptr: &tokptr))) {
79	if (!(tok3 = strtok_r(s: nullptr, delim: " \t\r\n", save_ptr: &tokptr))) {
80	tok3 = tok2;
81	tok2 = tok1;
82	}
83	nBytes = strlen(s: tok3) / `2`;
84	if (nBytes <= `4`) {
85	if (map ->len == size) {
86	size *= `2`;
87	customRanges = (UnicodeMapRange )greallocn(p: customRanges, count: size, size: sizeof*(UnicodeMapRange));
88	}
89	range = &customRanges[map ->len];
90	sscanf(s: tok1, format: "%x", &range->start);
91	sscanf(s: tok2, format: "%x", &range->end);
92	sscanf(s: tok3, format: "%x", &range->code);
93	range->nBytes = nBytes;
94	++map ->len;
95	} else if (tok2 == tok1) {
96	if (map ->eMapsLen == eMapsSize) {
97	eMapsSize += `16`;
98	map ->eMaps = (UnicodeMapExt )greallocn(p: map ->eMaps, count: eMapsSize, size: sizeof*(UnicodeMapExt));
99	}
100	eMap = &map ->eMaps[map ->eMapsLen];
101	sscanf(s: tok1, format: "%x", &eMap->u);
102	for (i = `0`; i < nBytes; ++i) {
103	unsigned int x;
104	sscanf(s: tok3 + i * `2`, format: "%2x", &x);
105	eMap->code[i] = (char)x;
106	}
107	eMap->nBytes = nBytes;
108	++map ->eMapsLen;
109	} else {
110	error(category: errSyntaxError, pos: -`1`, msg: "Bad line ({0:d}) in unicodeMap file for the '{1:s}' encoding", line, encodingNameA.c_str());
111	}
112	} else {
113	error(category: errSyntaxError, pos: -`1`, msg: "Bad line ({0:d}) in unicodeMap file for the '{1:s}' encoding", line, encodingNameA.c_str());
114	}
115	++line;
116	}
117
118	fclose(stream: f);
119
120	map ->ranges = customRanges;
121	return map;
122	}
123
124	UnicodeMap::UnicodeMap(const std::string &encodingNameA)
125	{
126	encodingName = encodingNameA;
127	unicodeOut = false;
128	kind = unicodeMapUser;
129	ranges = nullptr;
130	len = `0`;
131	eMaps = nullptr;
132	eMapsLen = `0`;
133	}
134
135	UnicodeMap::UnicodeMap(const char encodingNameA, bool* unicodeOutA, const UnicodeMapRange rangesA, int* lenA)
136	{
137	encodingName = encodingNameA;
138	unicodeOut = unicodeOutA;
139	kind = unicodeMapResident;
140	ranges = rangesA;
141	len = lenA;
142	eMaps = nullptr;
143	eMapsLen = `0`;
144	}
145
146	UnicodeMap::UnicodeMap(const char encodingNameA, bool* unicodeOutA, UnicodeMapFunc funcA)
147	{
148	encodingName = encodingNameA;
149	unicodeOut = unicodeOutA;
150	kind = unicodeMapFunc;
151	func = funcA;
152	eMaps = nullptr;
153	eMapsLen = `0`;
154	}
155
156	UnicodeMap::~UnicodeMap()
157	{
158	if (kind == unicodeMapUser && ranges) {
159	gfree(p: const_cast<UnicodeMapRange *>(ranges));
160	}
161	if (eMaps) {
162	gfree(p: eMaps);
163	}
164	}
165
166	UnicodeMap::UnicodeMap(UnicodeMap &&other) noexcept : encodingName { std::move(other.encodingName) }, kind { other.kind }, unicodeOut { other.unicodeOut }, len { other.len }, eMaps { other.eMaps }, eMapsLen { other.eMapsLen }
167	{
168	switch (kind) {
169	case unicodeMapUser:
170	case unicodeMapResident:
171	ranges = other.ranges;
172	other.ranges = nullptr;
173	break;
174	case unicodeMapFunc:
175	func = other.func;
176	break;
177	}
178	other.eMaps = nullptr;
179	}
180
181	UnicodeMap &UnicodeMap::operator=(UnicodeMap &&other) noexcept
182	{
183	if (this != &other) {
184	swap(other);
185	}
186	return *this;
187	}
188
189	void UnicodeMap::swap(UnicodeMap &other) noexcept
190	{
191	using std::swap;
192	swap(lhs&: encodingName, rhs&: other.encodingName);
193	swap(a&: unicodeOut, b&: other.unicodeOut);
194	switch (kind) {
195	case unicodeMapUser:
196	case unicodeMapResident:
197	switch (other.kind) {
198	case unicodeMapUser:
199	case unicodeMapResident:
200	swap(a&: ranges, b&: other.ranges);
201	break;
202	case unicodeMapFunc: {
203	const auto tmp = ranges;
204	func = other.func;
205	other.ranges = tmp;
206	break;
207	}
208	}
209	break;
210	case unicodeMapFunc:
211	switch (other.kind) {
212	case unicodeMapUser:
213	case unicodeMapResident: {
214	const auto tmp = func;
215	ranges = other.ranges;
216	other.func = tmp;
217	break;
218	}
219	case unicodeMapFunc:
220	swap(a&: func, b&: other.func);
221	break;
222	}
223	break;
224	}
225	swap(a&: kind, b&: other.kind);
226	swap(a&: len, b&: other.len);
227	swap(a&: eMaps, b&: other.eMaps);
228	swap(a&: eMapsLen, b&: other.eMapsLen);
229	}
230
231	bool UnicodeMap::match(const std::string &encodingNameA) const
232	{
233	return encodingName == encodingNameA;
234	}
235
236	int UnicodeMap::mapUnicode(Unicode u, char buf, int* bufSize) const
237	{
238	int a, b, m, n, i, j;
239	unsigned int code;
240
241	if (kind == unicodeMapFunc) {
242	return (*func)(u, buf, bufSize);
243	}
244
245	a = `0`;
246	b = len;
247	if (u >= ranges[a].start) {
248	// invariant: ranges[a].start <= u < ranges[b].start
249	while (b - a > `1`) {
250	m = (a + b) / `2`;
251	if (u >= ranges[m].start) {
252	a = m;
253	} else if (u < ranges[m].start) {
254	b = m;
255	}
256	}
257	if (u <= ranges[a].end) {
258	n = ranges[a].nBytes;
259	if (n > bufSize) {
260	return `0`;
261	}
262	code = ranges[a].code + (u - ranges[a].start);
263	for (i = n - `1`; i >= `0`; --i) {
264	buf[i] = (char)(code & `0xff`);
265	code >>= `8`;
266	}
267	return n;
268	}
269	}
270
271	for (i = `0`; i < eMapsLen; ++i) {
272	if (eMaps[i].u == u) {
273	n = eMaps[i].nBytes;
274	for (j = `0`; j < n; ++j) {
275	buf[j] = eMaps[i].code[j];
276	}
277	return n;
278	}
279	}
280
281	return `0`;
282	}
283
284	//------------------------------------------------------------------------
285
286	UnicodeMapCache::UnicodeMapCache() { }
287
288	const UnicodeMap UnicodeMapCache::getUnicodeMap(const* std::string &encodingName)
289	{
290	for (const std::unique_ptr<UnicodeMap> &map : cache) {
291	if (map ->match(encodingNameA: encodingName)) {
292	return map.get();
293	}
294	}
295	std::unique_ptr<UnicodeMap> map = UnicodeMap::parse(encodingNameA: encodingName);
296	if (map) {
297	UnicodeMap *m = map.get();
298	cache.emplace_back(args: std::move(map));
299	return m;
300	}
301	return nullptr;
302	}
303

source code of poppler/poppler/UnicodeMap.cc