1//========================================================================
2//
3// FoFiType1.cc
4//
5// Copyright 1999-2003 Glyph & Cog, LLC
6//
7//========================================================================
8
9//========================================================================
10//
11// Modified under the Poppler project - http://poppler.freedesktop.org
12//
13// All changes made under the Poppler project to this file are licensed
14// under GPL version 2 or later
15//
16// Copyright (C) 2005, 2008, 2010, 2018, 2021-2023 Albert Astals Cid <aacid@kde.org>
17// Copyright (C) 2005 Kristian Høgsberg <krh@redhat.com>
18// Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
19// Copyright (C) 2014 Carlos Garcia Campos <carlosgc@gnome.org>
20// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
21// Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr>
22// Copyright (C) 2022 Oliver Sander <oliver.sander@tu-dresden.de>
23//
24// To see a description of the changes please see the Changelog file that
25// came with your tarball or type make ChangeLog if you are building from git
26//
27//========================================================================
28
29#include <config.h>
30
31#include <charconv>
32#include <optional>
33
34#include <cstdlib>
35#include <cstring>
36#include <climits>
37#include "goo/glibc.h"
38#include "goo/gmem.h"
39#include "goo/GooLikely.h"
40#include "FoFiEncodings.h"
41#include "FoFiType1.h"
42#include "poppler/Error.h"
43
44//------------------------------------------------------------------------
45// FoFiType1
46//------------------------------------------------------------------------
47
48FoFiType1 *FoFiType1::make(const unsigned char *fileA, int lenA)
49{
50 return new FoFiType1(fileA, lenA, false);
51}
52
53FoFiType1::FoFiType1(const unsigned char *fileA, int lenA, bool freeFileDataA) : FoFiBase(fileA, lenA, freeFileDataA)
54{
55 encoding = nullptr;
56 parsed = false;
57 undoPFB();
58}
59
60FoFiType1::~FoFiType1()
61{
62 if (encoding && encoding != fofiType1StandardEncoding) {
63 for (int i = 0; i < 256; ++i) {
64 gfree(p: encoding[i]);
65 }
66 gfree(p: encoding);
67 }
68}
69
70std::string FoFiType1::getName()
71{
72 if (!parsed) {
73 parse();
74 }
75 return name;
76}
77
78char **FoFiType1::getEncoding()
79{
80 if (!parsed) {
81 parse();
82 }
83 return encoding;
84}
85
86void FoFiType1::writeEncoded(const char **newEncoding, FoFiOutputFunc outputFunc, void *outputStream) const
87{
88 char buf[512];
89 char *line, *line2, *p;
90 int i;
91
92 // copy everything up to the encoding
93 for (line = (char *)file; line && strncmp(s1: line, s2: "/Encoding", n: 9); line = getNextLine(line)) {
94 ;
95 }
96 if (!line) {
97 // no encoding - just copy the whole font file
98 (*outputFunc)(outputStream, (char *)file, len);
99 return;
100 }
101 (*outputFunc)(outputStream, (char *)file, line - (char *)file);
102
103 // write the new encoding
104 (*outputFunc)(outputStream, "/Encoding 256 array\n", 20);
105 (*outputFunc)(outputStream, "0 1 255 {1 index exch /.notdef put} for\n", 40);
106 for (i = 0; i < 256; ++i) {
107 if (newEncoding[i]) {
108 sprintf(s: buf, format: "dup %d /%s put\n", i, newEncoding[i]);
109 (*outputFunc)(outputStream, buf, strlen(s: buf));
110 }
111 }
112 (*outputFunc)(outputStream, "readonly def\n", 13);
113
114 // find the end of the encoding data
115 //~ this ought to parse PostScript tokens
116 if (!strncmp(s1: line, s2: "/Encoding StandardEncoding def", n: 30)) {
117 line = getNextLine(line);
118 } else {
119 // skip "/Encoding" + one whitespace char,
120 // then look for 'def' preceded by PostScript whitespace
121 p = line + 10;
122 line = nullptr;
123 for (; p < (char *)file + len; ++p) {
124 if ((*p == ' ' || *p == '\t' || *p == '\x0a' || *p == '\x0d' || *p == '\x0c' || *p == '\0') && p + 4 <= (char *)file + len && !strncmp(s1: p + 1, s2: "def", n: 3)) {
125 line = p + 4;
126 break;
127 }
128 }
129 }
130
131 // some fonts have two /Encoding entries in their dictionary, so we
132 // check for a second one here
133 if (line) {
134 for (line2 = line, i = 0; i < 20 && line2 && strncmp(s1: line2, s2: "/Encoding", n: 9); line2 = getNextLine(line: line2), ++i) {
135 ;
136 }
137 if (i < 20 && line2) {
138 (*outputFunc)(outputStream, line, line2 - line);
139 if (!strncmp(s1: line2, s2: "/Encoding StandardEncoding def", n: 30)) {
140 line = getNextLine(line: line2);
141 } else {
142 // skip "/Encoding" + one whitespace char,
143 // then look for 'def' preceded by PostScript whitespace
144 p = line2 + 10;
145 line = nullptr;
146 for (; p < (char *)file + len; ++p) {
147 if ((*p == ' ' || *p == '\t' || *p == '\x0a' || *p == '\x0d' || *p == '\x0c' || *p == '\0') && p + 4 <= (char *)file + len && !strncmp(s1: p + 1, s2: "def", n: 3)) {
148 line = p + 4;
149 break;
150 }
151 }
152 }
153 }
154
155 // copy everything after the encoding
156 if (line) {
157 (*outputFunc)(outputStream, line, ((char *)file + len) - line);
158 }
159 }
160}
161
162char *FoFiType1::getNextLine(char *line) const
163{
164 while (line < (char *)file + len && *line != '\x0a' && *line != '\x0d') {
165 ++line;
166 }
167 if (line < (char *)file + len && *line == '\x0d') {
168 ++line;
169 }
170 if (line < (char *)file + len && *line == '\x0a') {
171 ++line;
172 }
173 if (line >= (char *)file + len) {
174 return nullptr;
175 }
176 return line;
177}
178
179static const char tokenSeparators[] = " \t\n\r";
180
181class FoFiType1Tokenizer
182{
183public:
184 explicit FoFiType1Tokenizer(std::string_view &&stringViewA) : stringView(stringViewA) { }
185
186 std::optional<std::string_view> getToken()
187 {
188 const auto length = stringView.length();
189 if (currentPos >= length) {
190 return {};
191 }
192
193 std::string_view::size_type pos = stringView.find_first_of(str: tokenSeparators, pos: currentPos);
194 while (pos == currentPos) {
195 // skip multiple contiguous separators
196 ++currentPos;
197 pos = stringView.find_first_of(str: tokenSeparators, pos: currentPos);
198 }
199 if (pos == std::string_view::npos) {
200 std::string_view token = stringView.substr(pos: currentPos, n: length - currentPos);
201 currentPos = length;
202 return token;
203 }
204
205 std::string_view token = stringView.substr(pos: currentPos, n: pos - currentPos);
206
207 currentPos = pos + 1;
208
209 return token;
210 }
211
212private:
213 std::string_view::size_type currentPos = 0;
214 const std::string_view stringView;
215};
216
217void FoFiType1::parse()
218{
219 FoFiType1Tokenizer tokenizer(std::string_view(reinterpret_cast<const char *>(file), len));
220 while (name.empty() || !encoding) {
221 const std::optional<std::string_view> token = tokenizer.getToken();
222
223 if (!token) {
224 break;
225 }
226
227 if (name.empty() && token == "/FontName") {
228 const std::optional<std::string_view> fontNameToken = tokenizer.getToken();
229 if (!fontNameToken) {
230 break;
231 }
232
233 // Skip the /
234 name = fontNameToken->substr(pos: 1);
235
236 } else if (!encoding && token == "/Encoding") {
237 const std::optional<std::string_view> token2 = tokenizer.getToken();
238 if (!token2) {
239 break;
240 }
241
242 const std::optional<std::string_view> token3 = tokenizer.getToken();
243 if (!token3) {
244 break;
245 }
246
247 if (token2 == "StandardEncoding" && token3 == "def") {
248 encoding = (char **)fofiType1StandardEncoding;
249 } else if (token2 == "256" && token3 == "array") {
250 encoding = (char **)gmallocn(count: 256, size: sizeof(char *));
251 for (int j = 0; j < 256; ++j) {
252 encoding[j] = nullptr;
253 }
254
255 while (true) {
256 const std::optional<std::string_view> encodingToken = tokenizer.getToken();
257 if (!encodingToken) {
258 break;
259 }
260
261 if (encodingToken == "dup") {
262 std::optional<std::string_view> codeToken = tokenizer.getToken();
263 if (!codeToken) {
264 break;
265 }
266
267 std::optional<std::string_view> nameToken;
268 // Sometimes font data has code and name together without spacing i.e. 33/exclam
269 // if that happens don't call getToken again and just split codeToken in 2
270 const auto slashPositionInCodeToken = codeToken->find(c: '/');
271 if (slashPositionInCodeToken != std::string_view::npos) {
272 nameToken = codeToken->substr(pos: slashPositionInCodeToken, n: codeToken->length() - slashPositionInCodeToken);
273 codeToken = codeToken->substr(pos: 0, n: slashPositionInCodeToken);
274 } else {
275 nameToken = tokenizer.getToken();
276 }
277
278 if (!nameToken) {
279 break;
280 }
281
282 int code = 0;
283 if (codeToken->length() > 2 && codeToken->at(pos: 0) == '8' && codeToken->at(pos: 1) == '#') {
284 std::from_chars(first: codeToken->data() + 2, last: codeToken->data() + codeToken->length(), value&: code, base: 8);
285 } else {
286 std::from_chars(first: codeToken->data(), last: codeToken->data() + codeToken->length(), value&: code);
287 }
288
289 if (code >= 0 && code < 256 && nameToken->length() > 1) {
290 gfree(p: encoding[code]);
291 encoding[code] = copyString(s: nameToken->data() + 1, n: nameToken->length() - 1);
292 }
293
294 } else if (encodingToken == "def") {
295 break;
296 }
297 }
298 }
299 }
300 }
301
302 parsed = true;
303}
304
305// Undo the PFB encoding, i.e., remove the PFB headers.
306void FoFiType1::undoPFB()
307{
308 bool ok;
309 unsigned char *file2;
310 int pos1, pos2, type;
311 unsigned int segLen;
312
313 ok = true;
314 if (getU8(pos: 0, ok: &ok) != 0x80 || !ok) {
315 return;
316 }
317 file2 = (unsigned char *)gmalloc(size: len);
318 pos1 = pos2 = 0;
319 while (getU8(pos: pos1, ok: &ok) == 0x80 && ok) {
320 type = getU8(pos: pos1 + 1, ok: &ok);
321 if (type < 1 || type > 2 || !ok) {
322 break;
323 }
324 segLen = getU32LE(pos: pos1 + 2, ok: &ok);
325 pos1 += 6;
326 if (!ok || !checkRegion(pos: pos1, size: segLen)) {
327 break;
328 }
329 memcpy(dest: file2 + pos2, src: file + pos1, n: segLen);
330 pos1 += segLen;
331 pos2 += segLen;
332 }
333 if (freeFileData) {
334 gfree(p: (char *)file);
335 }
336 file = file2;
337 freeFileData = true;
338 len = pos2;
339}
340

source code of poppler/fofi/FoFiType1.cc