1 | //======================================================================== |
2 | // |
3 | // FoFiType1.cc |
4 | // |
5 | // Copyright 1999-2003 Glyph & Cog, LLC |
6 | // |
7 | //======================================================================== |
8 | |
9 | //======================================================================== |
10 | // |
11 | // Modified under the Poppler project - http://poppler.freedesktop.org |
12 | // |
13 | // All changes made under the Poppler project to this file are licensed |
14 | // under GPL version 2 or later |
15 | // |
16 | // Copyright (C) 2005, 2008, 2010, 2018, 2021-2023 Albert Astals Cid <aacid@kde.org> |
17 | // Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com> |
18 | // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net> |
19 | // Copyright (C) 2014 Carlos Garcia Campos <carlosgc@gnome.org> |
20 | // Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com> |
21 | // Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr> |
22 | // Copyright (C) 2022 Oliver Sander <oliver.sander@tu-dresden.de> |
23 | // |
24 | // To see a description of the changes please see the Changelog file that |
25 | // came with your tarball or type make ChangeLog if you are building from git |
26 | // |
27 | //======================================================================== |
28 | |
29 | #include <config.h> |
30 | |
31 | #include <charconv> |
32 | #include <optional> |
33 | |
34 | #include <cstdlib> |
35 | #include <cstring> |
36 | #include <climits> |
37 | #include "goo/glibc.h" |
38 | #include "goo/gmem.h" |
39 | #include "goo/GooLikely.h" |
40 | #include "FoFiEncodings.h" |
41 | #include "FoFiType1.h" |
42 | #include "poppler/Error.h" |
43 | |
44 | //------------------------------------------------------------------------ |
45 | // FoFiType1 |
46 | //------------------------------------------------------------------------ |
47 | |
48 | FoFiType1 *FoFiType1::make(const unsigned char *fileA, int lenA) |
49 | { |
50 | return new FoFiType1(fileA, lenA, false); |
51 | } |
52 | |
53 | FoFiType1::FoFiType1(const unsigned char *fileA, int lenA, bool freeFileDataA) : FoFiBase(fileA, lenA, freeFileDataA) |
54 | { |
55 | encoding = nullptr; |
56 | parsed = false; |
57 | undoPFB(); |
58 | } |
59 | |
60 | FoFiType1::~FoFiType1() |
61 | { |
62 | if (encoding && encoding != fofiType1StandardEncoding) { |
63 | for (int i = 0; i < 256; ++i) { |
64 | gfree(p: encoding[i]); |
65 | } |
66 | gfree(p: encoding); |
67 | } |
68 | } |
69 | |
70 | std::string FoFiType1::getName() |
71 | { |
72 | if (!parsed) { |
73 | parse(); |
74 | } |
75 | return name; |
76 | } |
77 | |
78 | char **FoFiType1::getEncoding() |
79 | { |
80 | if (!parsed) { |
81 | parse(); |
82 | } |
83 | return encoding; |
84 | } |
85 | |
86 | void FoFiType1::writeEncoded(const char **newEncoding, FoFiOutputFunc outputFunc, void *outputStream) const |
87 | { |
88 | char buf[512]; |
89 | char *line, *line2, *p; |
90 | int i; |
91 | |
92 | // copy everything up to the encoding |
93 | for (line = (char *)file; line && strncmp(s1: line, s2: "/Encoding" , n: 9); line = getNextLine(line)) { |
94 | ; |
95 | } |
96 | if (!line) { |
97 | // no encoding - just copy the whole font file |
98 | (*outputFunc)(outputStream, (char *)file, len); |
99 | return; |
100 | } |
101 | (*outputFunc)(outputStream, (char *)file, line - (char *)file); |
102 | |
103 | // write the new encoding |
104 | (*outputFunc)(outputStream, "/Encoding 256 array\n" , 20); |
105 | (*outputFunc)(outputStream, "0 1 255 {1 index exch /.notdef put} for\n" , 40); |
106 | for (i = 0; i < 256; ++i) { |
107 | if (newEncoding[i]) { |
108 | sprintf(s: buf, format: "dup %d /%s put\n" , i, newEncoding[i]); |
109 | (*outputFunc)(outputStream, buf, strlen(s: buf)); |
110 | } |
111 | } |
112 | (*outputFunc)(outputStream, "readonly def\n" , 13); |
113 | |
114 | // find the end of the encoding data |
115 | //~ this ought to parse PostScript tokens |
116 | if (!strncmp(s1: line, s2: "/Encoding StandardEncoding def" , n: 30)) { |
117 | line = getNextLine(line); |
118 | } else { |
119 | // skip "/Encoding" + one whitespace char, |
120 | // then look for 'def' preceded by PostScript whitespace |
121 | p = line + 10; |
122 | line = nullptr; |
123 | for (; p < (char *)file + len; ++p) { |
124 | if ((*p == ' ' || *p == '\t' || *p == '\x0a' || *p == '\x0d' || *p == '\x0c' || *p == '\0') && p + 4 <= (char *)file + len && !strncmp(s1: p + 1, s2: "def" , n: 3)) { |
125 | line = p + 4; |
126 | break; |
127 | } |
128 | } |
129 | } |
130 | |
131 | // some fonts have two /Encoding entries in their dictionary, so we |
132 | // check for a second one here |
133 | if (line) { |
134 | for (line2 = line, i = 0; i < 20 && line2 && strncmp(s1: line2, s2: "/Encoding" , n: 9); line2 = getNextLine(line: line2), ++i) { |
135 | ; |
136 | } |
137 | if (i < 20 && line2) { |
138 | (*outputFunc)(outputStream, line, line2 - line); |
139 | if (!strncmp(s1: line2, s2: "/Encoding StandardEncoding def" , n: 30)) { |
140 | line = getNextLine(line: line2); |
141 | } else { |
142 | // skip "/Encoding" + one whitespace char, |
143 | // then look for 'def' preceded by PostScript whitespace |
144 | p = line2 + 10; |
145 | line = nullptr; |
146 | for (; p < (char *)file + len; ++p) { |
147 | if ((*p == ' ' || *p == '\t' || *p == '\x0a' || *p == '\x0d' || *p == '\x0c' || *p == '\0') && p + 4 <= (char *)file + len && !strncmp(s1: p + 1, s2: "def" , n: 3)) { |
148 | line = p + 4; |
149 | break; |
150 | } |
151 | } |
152 | } |
153 | } |
154 | |
155 | // copy everything after the encoding |
156 | if (line) { |
157 | (*outputFunc)(outputStream, line, ((char *)file + len) - line); |
158 | } |
159 | } |
160 | } |
161 | |
162 | char *FoFiType1::getNextLine(char *line) const |
163 | { |
164 | while (line < (char *)file + len && *line != '\x0a' && *line != '\x0d') { |
165 | ++line; |
166 | } |
167 | if (line < (char *)file + len && *line == '\x0d') { |
168 | ++line; |
169 | } |
170 | if (line < (char *)file + len && *line == '\x0a') { |
171 | ++line; |
172 | } |
173 | if (line >= (char *)file + len) { |
174 | return nullptr; |
175 | } |
176 | return line; |
177 | } |
178 | |
179 | static const char tokenSeparators[] = " \t\n\r" ; |
180 | |
181 | class FoFiType1Tokenizer |
182 | { |
183 | public: |
184 | explicit FoFiType1Tokenizer(std::string_view &&stringViewA) : stringView(stringViewA) { } |
185 | |
186 | std::optional<std::string_view> getToken() |
187 | { |
188 | const auto length = stringView.length(); |
189 | if (currentPos >= length) { |
190 | return {}; |
191 | } |
192 | |
193 | std::string_view::size_type pos = stringView.find_first_of(str: tokenSeparators, pos: currentPos); |
194 | while (pos == currentPos) { |
195 | // skip multiple contiguous separators |
196 | ++currentPos; |
197 | pos = stringView.find_first_of(str: tokenSeparators, pos: currentPos); |
198 | } |
199 | if (pos == std::string_view::npos) { |
200 | std::string_view token = stringView.substr(pos: currentPos, n: length - currentPos); |
201 | currentPos = length; |
202 | return token; |
203 | } |
204 | |
205 | std::string_view token = stringView.substr(pos: currentPos, n: pos - currentPos); |
206 | |
207 | currentPos = pos + 1; |
208 | |
209 | return token; |
210 | } |
211 | |
212 | private: |
213 | std::string_view::size_type currentPos = 0; |
214 | const std::string_view stringView; |
215 | }; |
216 | |
217 | void FoFiType1::parse() |
218 | { |
219 | FoFiType1Tokenizer tokenizer(std::string_view(reinterpret_cast<const char *>(file), len)); |
220 | while (name.empty() || !encoding) { |
221 | const std::optional<std::string_view> token = tokenizer.getToken(); |
222 | |
223 | if (!token) { |
224 | break; |
225 | } |
226 | |
227 | if (name.empty() && token == "/FontName" ) { |
228 | const std::optional<std::string_view> fontNameToken = tokenizer.getToken(); |
229 | if (!fontNameToken) { |
230 | break; |
231 | } |
232 | |
233 | // Skip the / |
234 | name = fontNameToken->substr(pos: 1); |
235 | |
236 | } else if (!encoding && token == "/Encoding" ) { |
237 | const std::optional<std::string_view> token2 = tokenizer.getToken(); |
238 | if (!token2) { |
239 | break; |
240 | } |
241 | |
242 | const std::optional<std::string_view> token3 = tokenizer.getToken(); |
243 | if (!token3) { |
244 | break; |
245 | } |
246 | |
247 | if (token2 == "StandardEncoding" && token3 == "def" ) { |
248 | encoding = (char **)fofiType1StandardEncoding; |
249 | } else if (token2 == "256" && token3 == "array" ) { |
250 | encoding = (char **)gmallocn(count: 256, size: sizeof(char *)); |
251 | for (int j = 0; j < 256; ++j) { |
252 | encoding[j] = nullptr; |
253 | } |
254 | |
255 | while (true) { |
256 | const std::optional<std::string_view> encodingToken = tokenizer.getToken(); |
257 | if (!encodingToken) { |
258 | break; |
259 | } |
260 | |
261 | if (encodingToken == "dup" ) { |
262 | std::optional<std::string_view> codeToken = tokenizer.getToken(); |
263 | if (!codeToken) { |
264 | break; |
265 | } |
266 | |
267 | std::optional<std::string_view> nameToken; |
268 | // Sometimes font data has code and name together without spacing i.e. 33/exclam |
269 | // if that happens don't call getToken again and just split codeToken in 2 |
270 | const auto slashPositionInCodeToken = codeToken->find(c: '/'); |
271 | if (slashPositionInCodeToken != std::string_view::npos) { |
272 | nameToken = codeToken->substr(pos: slashPositionInCodeToken, n: codeToken->length() - slashPositionInCodeToken); |
273 | codeToken = codeToken->substr(pos: 0, n: slashPositionInCodeToken); |
274 | } else { |
275 | nameToken = tokenizer.getToken(); |
276 | } |
277 | |
278 | if (!nameToken) { |
279 | break; |
280 | } |
281 | |
282 | int code = 0; |
283 | if (codeToken->length() > 2 && codeToken->at(pos: 0) == '8' && codeToken->at(pos: 1) == '#') { |
284 | std::from_chars(first: codeToken->data() + 2, last: codeToken->data() + codeToken->length(), value&: code, base: 8); |
285 | } else { |
286 | std::from_chars(first: codeToken->data(), last: codeToken->data() + codeToken->length(), value&: code); |
287 | } |
288 | |
289 | if (code >= 0 && code < 256 && nameToken->length() > 1) { |
290 | gfree(p: encoding[code]); |
291 | encoding[code] = copyString(s: nameToken->data() + 1, n: nameToken->length() - 1); |
292 | } |
293 | |
294 | } else if (encodingToken == "def" ) { |
295 | break; |
296 | } |
297 | } |
298 | } |
299 | } |
300 | } |
301 | |
302 | parsed = true; |
303 | } |
304 | |
305 | // Undo the PFB encoding, i.e., remove the PFB headers. |
306 | void FoFiType1::undoPFB() |
307 | { |
308 | bool ok; |
309 | unsigned char *file2; |
310 | int pos1, pos2, type; |
311 | unsigned int segLen; |
312 | |
313 | ok = true; |
314 | if (getU8(pos: 0, ok: &ok) != 0x80 || !ok) { |
315 | return; |
316 | } |
317 | file2 = (unsigned char *)gmalloc(size: len); |
318 | pos1 = pos2 = 0; |
319 | while (getU8(pos: pos1, ok: &ok) == 0x80 && ok) { |
320 | type = getU8(pos: pos1 + 1, ok: &ok); |
321 | if (type < 1 || type > 2 || !ok) { |
322 | break; |
323 | } |
324 | segLen = getU32LE(pos: pos1 + 2, ok: &ok); |
325 | pos1 += 6; |
326 | if (!ok || !checkRegion(pos: pos1, size: segLen)) { |
327 | break; |
328 | } |
329 | memcpy(dest: file2 + pos2, src: file + pos1, n: segLen); |
330 | pos1 += segLen; |
331 | pos2 += segLen; |
332 | } |
333 | if (freeFileData) { |
334 | gfree(p: (char *)file); |
335 | } |
336 | file = file2; |
337 | freeFileData = true; |
338 | len = pos2; |
339 | } |
340 | |