3// GooString.cc
5// Simple variable-length string type.
7// Copyright 1996-2003 Glyph & Cog, LLC
13// Modified under the Poppler project - http://poppler.freedesktop.org
15// All changes made under the Poppler project to this file are licensed
16// under GPL version 2 or later
18// Copyright (C) 2006 Kristian Høgsberg <krh@redhat.com>
19// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
20// Copyright (C) 2007 Jeff Muizelaar <jeff@infidigm.net>
21// Copyright (C) 2008-2011, 2016-2018, 2022 Albert Astals Cid <aacid@kde.org>
22// Copyright (C) 2011 Kenji Uno <ku@digitaldolphins.jp>
23// Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
24// Copyright (C) 2012, 2017 Adrian Johnson <ajohnson@redneon.com>
25// Copyright (C) 2012 Pino Toscano <pino@kde.org>
26// Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
27// Copyright (C) 2015 William Bader <williambader@hotmail.com>
28// Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
29// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
30// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
31// Copyright (C) 2018 Greg Knight <lyngvi@gmail.com>
32// Copyright (C) 2019, 2022-2024 Oliver Sander <oliver.sander@tu-dresden.de>
33// Copyright (C) 2023 Even Rouault <even.rouault@mines-paris.org>
35// To see a description of the changes please see the Changelog file that
36// came with your tarball or type make ChangeLog if you are building from git
40#include <config.h>
42#include <algorithm>
43#include <cassert>
44#include <cctype>
45#include <cmath>
46#include <cstddef>
47#include <cstdlib>
48#include <cstring>
49#include <limits>
51#include "gmem.h"
52#include "Error.h"
53#include "GooString.h"
57namespace {
59union GooStringFormatArg {
60 int i;
61 unsigned int ui;
62 long l;
63 unsigned long ul;
64 long long ll;
65 unsigned long long ull;
66 double f;
67 char c;
68 char *s;
69 GooString *gs;
72enum GooStringFormatType
74 fmtIntDecimal,
75 fmtIntHex,
76 fmtIntHexUpper,
77 fmtIntOctal,
78 fmtIntBinary,
79 fmtUIntDecimal,
80 fmtUIntHex,
81 fmtUIntHexUpper,
82 fmtUIntOctal,
83 fmtUIntBinary,
84 fmtLongDecimal,
85 fmtLongHex,
86 fmtLongHexUpper,
87 fmtLongOctal,
88 fmtLongBinary,
89 fmtULongDecimal,
90 fmtULongHex,
91 fmtULongHexUpper,
92 fmtULongOctal,
93 fmtULongBinary,
94 fmtLongLongDecimal,
95 fmtLongLongHex,
96 fmtLongLongHexUpper,
97 fmtLongLongOctal,
98 fmtLongLongBinary,
99 fmtULongLongDecimal,
100 fmtULongLongHex,
101 fmtULongLongHexUpper,
102 fmtULongLongOctal,
103 fmtULongLongBinary,
104 fmtDouble,
105 fmtDoubleTrimSmallAware,
106 fmtDoubleTrim,
107 fmtChar,
108 fmtString,
109 fmtGooString,
110 fmtSpace
113const char *const formatStrings[] = { "d", "x", "X", "o", "b", "ud", "ux", "uX", "uo", "ub", "ld", "lx", "lX", "lo", "lb", "uld", "ulx", "ulX", "ulo",
114 "ulb", "lld", "llx", "llX", "llo", "llb", "ulld", "ullx", "ullX", "ullo", "ullb", "f", "gs", "g", "c", "s", "t", "w", nullptr };
116void formatInt(long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase = false);
118void formatUInt(unsigned long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase = false);
120void formatDouble(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len);
122void formatDoubleSmallAware(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len);
128std::unique_ptr<GooString> GooString::format(const char *fmt, ...)
130 auto s = std::make_unique<GooString>();
132 va_list argList;
133 va_start(argList, fmt);
134 s->appendfv(fmt, argList);
135 va_end(argList);
137 return s;
140std::unique_ptr<GooString> GooString::formatv(const char *fmt, va_list argList)
142 auto s = std::make_unique<GooString>();
144 s->appendfv(fmt, argList);
146 return s;
149GooString *GooString::appendf(const char *fmt, ...)
151 va_list argList;
152 va_start(argList, fmt);
153 appendfv(fmt, argList);
154 va_end(argList);
156 return this;
159GooString *GooString::appendfv(const char *fmt, va_list argList)
161 GooStringFormatArg *args;
162 int argsLen, argsSize;
163 GooStringFormatArg arg;
164 int idx, width, prec;
165 bool reverseAlign, zeroFill;
166 GooStringFormatType ft;
167 char buf[65];
168 int len, i;
169 const char *p0, *p1;
170 const char *str;
171 GooStringFormatArg argsBuf[8];
173 argsLen = 0;
174 argsSize = sizeof(argsBuf) / sizeof(argsBuf[0]);
175 args = argsBuf;
177 p0 = fmt;
178 while (*p0) {
179 if (*p0 == '{') {
180 ++p0;
181 if (*p0 == '{') {
182 ++p0;
183 append(c: '{');
184 } else {
186 // parse the format string
187 if (!(*p0 >= '0' && *p0 <= '9')) {
188 break;
189 }
190 idx = *p0 - '0';
191 for (++p0; *p0 >= '0' && *p0 <= '9'; ++p0) {
192 idx = 10 * idx + (*p0 - '0');
193 }
194 if (*p0 != ':') {
195 break;
196 }
197 ++p0;
198 if (*p0 == '-') {
199 reverseAlign = true;
200 ++p0;
201 } else {
202 reverseAlign = false;
203 }
204 width = 0;
205 zeroFill = *p0 == '0';
206 for (; *p0 >= '0' && *p0 <= '9'; ++p0) {
207 width = 10 * width + (*p0 - '0');
208 }
209 if (width < 0) {
210 width = 0;
211 }
212 if (*p0 == '.') {
213 ++p0;
214 prec = 0;
215 for (; *p0 >= '0' && *p0 <= '9'; ++p0) {
216 prec = 10 * prec + (*p0 - '0');
217 }
218 } else {
219 prec = 0;
220 }
221 for (ft = (GooStringFormatType)0; formatStrings[ft]; ft = (GooStringFormatType)(ft + 1)) {
222 if (!strncmp(s1: p0, s2: formatStrings[ft], n: strlen(s: formatStrings[ft]))) {
223 break;
224 }
225 }
226 if (!formatStrings[ft]) {
227 break;
228 }
229 p0 += strlen(s: formatStrings[ft]);
230 if (*p0 != '}') {
231 break;
232 }
233 ++p0;
235 // fetch the argument
236 if (idx > argsLen) {
237 break;
238 }
239 if (idx == argsLen) {
240 if (argsLen == argsSize) {
241 argsSize *= 2;
242 if (args == argsBuf) {
243 args = (GooStringFormatArg *)gmallocn(count: argsSize, size: sizeof(GooStringFormatArg));
244 memcpy(dest: args, src: argsBuf, n: argsLen * sizeof(GooStringFormatArg));
245 } else {
246 args = (GooStringFormatArg *)greallocn(p: args, count: argsSize, size: sizeof(GooStringFormatArg));
247 }
248 }
249 switch (ft) {
250 case fmtIntDecimal:
251 case fmtIntHex:
252 case fmtIntHexUpper:
253 case fmtIntOctal:
254 case fmtIntBinary:
255 case fmtSpace:
256 args[argsLen].i = va_arg(argList, int);
257 break;
258 case fmtUIntDecimal:
259 case fmtUIntHex:
260 case fmtUIntHexUpper:
261 case fmtUIntOctal:
262 case fmtUIntBinary:
263 args[argsLen].ui = va_arg(argList, unsigned int);
264 break;
265 case fmtLongDecimal:
266 case fmtLongHex:
267 case fmtLongHexUpper:
268 case fmtLongOctal:
269 case fmtLongBinary:
270 args[argsLen].l = va_arg(argList, long);
271 break;
272 case fmtULongDecimal:
273 case fmtULongHex:
274 case fmtULongHexUpper:
275 case fmtULongOctal:
276 case fmtULongBinary:
277 args[argsLen].ul = va_arg(argList, unsigned long);
278 break;
279 case fmtLongLongDecimal:
280 case fmtLongLongHex:
281 case fmtLongLongHexUpper:
282 case fmtLongLongOctal:
283 case fmtLongLongBinary:
284 args[argsLen].ll = va_arg(argList, long long);
285 break;
286 case fmtULongLongDecimal:
287 case fmtULongLongHex:
288 case fmtULongLongHexUpper:
289 case fmtULongLongOctal:
290 case fmtULongLongBinary:
291 args[argsLen].ull = va_arg(argList, unsigned long long);
292 break;
293 case fmtDouble:
294 case fmtDoubleTrim:
295 case fmtDoubleTrimSmallAware:
296 args[argsLen].f = va_arg(argList, double);
297 break;
298 case fmtChar:
299 args[argsLen].c = (char)va_arg(argList, int);
300 break;
301 case fmtString:
302 args[argsLen].s = va_arg(argList, char *);
303 break;
304 case fmtGooString:
305 args[argsLen].gs = va_arg(argList, GooString *);
306 break;
307 }
308 ++argsLen;
309 }
311 // format the argument
312 arg = args[idx];
313 switch (ft) {
314 case fmtIntDecimal:
315 formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: 10, p: &str, len: &len);
316 break;
317 case fmtIntHex:
318 formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len);
319 break;
320 case fmtIntHexUpper:
321 formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len, upperCase: true);
322 break;
323 case fmtIntOctal:
324 formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: 8, p: &str, len: &len);
325 break;
326 case fmtIntBinary:
327 formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: 2, p: &str, len: &len);
328 break;
329 case fmtUIntDecimal:
330 formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: 10, p: &str, len: &len);
331 break;
332 case fmtUIntHex:
333 formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len);
334 break;
335 case fmtUIntHexUpper:
336 formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len, upperCase: true);
337 break;
338 case fmtUIntOctal:
339 formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: 8, p: &str, len: &len);
340 break;
341 case fmtUIntBinary:
342 formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: 2, p: &str, len: &len);
343 break;
344 case fmtLongDecimal:
345 formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: 10, p: &str, len: &len);
346 break;
347 case fmtLongHex:
348 formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len);
349 break;
350 case fmtLongHexUpper:
351 formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len, upperCase: true);
352 break;
353 case fmtLongOctal:
354 formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: 8, p: &str, len: &len);
355 break;
356 case fmtLongBinary:
357 formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: 2, p: &str, len: &len);
358 break;
359 case fmtULongDecimal:
360 formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: 10, p: &str, len: &len);
361 break;
362 case fmtULongHex:
363 formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len);
364 break;
365 case fmtULongHexUpper:
366 formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len, upperCase: true);
367 break;
368 case fmtULongOctal:
369 formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: 8, p: &str, len: &len);
370 break;
371 case fmtULongBinary:
372 formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: 2, p: &str, len: &len);
373 break;
374 case fmtLongLongDecimal:
375 formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: 10, p: &str, len: &len);
376 break;
377 case fmtLongLongHex:
378 formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len);
379 break;
380 case fmtLongLongHexUpper:
381 formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len, upperCase: true);
382 break;
383 case fmtLongLongOctal:
384 formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: 8, p: &str, len: &len);
385 break;
386 case fmtLongLongBinary:
387 formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: 2, p: &str, len: &len);
388 break;
389 case fmtULongLongDecimal:
390 formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: 10, p: &str, len: &len);
391 break;
392 case fmtULongLongHex:
393 formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len);
394 break;
395 case fmtULongLongHexUpper:
396 formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: 16, p: &str, len: &len, upperCase: true);
397 break;
398 case fmtULongLongOctal:
399 formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: 8, p: &str, len: &len);
400 break;
401 case fmtULongLongBinary:
402 formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: 2, p: &str, len: &len);
403 break;
404 case fmtDouble:
405 formatDouble(x: arg.f, buf, bufSize: sizeof(buf), prec, trim: false, p: &str, len: &len);
406 break;
407 case fmtDoubleTrim:
408 formatDouble(x: arg.f, buf, bufSize: sizeof(buf), prec, trim: true, p: &str, len: &len);
409 break;
410 case fmtDoubleTrimSmallAware:
411 formatDoubleSmallAware(x: arg.f, buf, bufSize: sizeof(buf), prec, trim: true, p: &str, len: &len);
412 break;
413 case fmtChar:
414 buf[0] = arg.c;
415 str = buf;
416 len = 1;
417 reverseAlign = !reverseAlign;
418 break;
419 case fmtString: {
420 str = arg.s;
421 const size_t strlen_str = strlen(s: str);
422 if (strlen_str > static_cast<size_t>(std::numeric_limits<int>::max())) {
423 error(category: errSyntaxWarning, pos: 0, msg: "String truncated to INT_MAX bytes");
424 len = std::numeric_limits<int>::max();
425 } else {
426 len = static_cast<int>(strlen_str);
427 }
428 reverseAlign = !reverseAlign;
429 break;
430 }
431 case fmtGooString:
432 if (arg.gs) {
433 str = arg.gs->c_str();
434 len = arg.gs->getLength();
435 } else {
436 str = "(null)";
437 len = 6;
438 }
439 reverseAlign = !reverseAlign;
440 break;
441 case fmtSpace:
442 str = buf;
443 len = 0;
444 width = arg.i;
445 break;
446 }
448 // append the formatted arg, handling width and alignment
449 if (!reverseAlign && len < width) {
450 for (i = len; i < width; ++i) {
451 append(c: ' ');
452 }
453 }
454 append(str, lengthA: len);
455 if (reverseAlign && len < width) {
456 for (i = len; i < width; ++i) {
457 append(c: ' ');
458 }
459 }
460 }
462 } else if (*p0 == '}') {
463 ++p0;
464 if (*p0 == '}') {
465 ++p0;
466 }
467 append(c: '}');
469 } else {
470 for (p1 = p0 + 1; *p1 && *p1 != '{' && *p1 != '}'; ++p1) {
471 ;
472 }
473 append(str: p0, lengthA: p1 - p0);
474 p0 = p1;
475 }
476 }
478 if (args != argsBuf) {
479 gfree(p: args);
480 }
482 return this;
485namespace {
487const char lowerCaseDigits[17] = "0123456789abcdef";
488const char upperCaseDigits[17] = "0123456789ABCDEF";
490void formatInt(long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase)
492 const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
493 bool neg;
494 int start, i, j;
495 unsigned long long abs_x;
497 i = bufSize;
498 if ((neg = x < 0)) {
499 abs_x = -x;
500 } else {
501 abs_x = x;
502 }
503 start = neg ? 1 : 0;
504 if (abs_x == 0) {
505 buf[--i] = '0';
506 } else {
507 while (i > start && abs_x) {
508 buf[--i] = vals[abs_x % base];
509 abs_x /= base;
510 }
511 }
512 if (zeroFill) {
513 for (j = bufSize - i; i > start && j < width - start; ++j) {
514 buf[--i] = '0';
515 }
516 }
517 if (neg) {
518 buf[--i] = '-';
519 }
520 *p = buf + i;
521 *len = bufSize - i;
524void formatUInt(unsigned long long x, char *buf, int bufSize, bool zeroFill, int width, int base, const char **p, int *len, bool upperCase)
526 const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
527 int i, j;
529 i = bufSize;
530 if (x == 0) {
531 buf[--i] = '0';
532 } else {
533 while (i > 0 && x) {
534 buf[--i] = vals[x % base];
535 x /= base;
536 }
537 }
538 if (zeroFill) {
539 for (j = bufSize - i; i > 0 && j < width; ++j) {
540 buf[--i] = '0';
541 }
542 }
543 *p = buf + i;
544 *len = bufSize - i;
547void formatDouble(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len)
549 bool neg, started;
550 double x2;
551 int d, i, j;
553 if ((neg = x < 0)) {
554 x = -x;
555 }
556 x = floor(x: x * pow(x: 10.0, y: prec) + 0.5);
557 i = bufSize;
558 started = !trim;
559 for (j = 0; j < prec && i > 1; ++j) {
560 x2 = floor(x: 0.1 * (x + 0.5));
561 d = (int)floor(x: x - 10 * x2 + 0.5);
562 if (started || d != 0) {
563 buf[--i] = '0' + d;
564 started = true;
565 }
566 x = x2;
567 }
568 if (i > 1 && started) {
569 buf[--i] = '.';
570 }
571 if (i > 1) {
572 do {
573 x2 = floor(x: 0.1 * (x + 0.5));
574 d = (int)floor(x: x - 10 * x2 + 0.5);
575 buf[--i] = '0' + d;
576 x = x2;
577 } while (i > 1 && x);
578 }
579 if (neg) {
580 buf[--i] = '-';
581 }
582 *p = buf + i;
583 *len = bufSize - i;
586void formatDoubleSmallAware(double x, char *buf, int bufSize, int prec, bool trim, const char **p, int *len)
588 double absX = fabs(x: x);
589 if (absX >= 0.1) {
590 formatDouble(x, buf, bufSize, prec, trim, p, len);
591 } else {
592 while (absX < 0.1 && prec < 16) {
593 absX = absX * 10;
594 prec++;
595 }
596 formatDouble(x, buf, bufSize, prec, trim, p, len);
597 }
602GooString *GooString::lowerCase()
604 lowerCase(s&: *this);
605 return this;
608void GooString::lowerCase(std::string &s)
610 for (auto &c : s) {
611 if (std::isupper(c)) {
612 c = std::tolower(c: c);
613 }
614 }
617std::string GooString::toLowerCase(const std::string &s)
619 std::string newString = s;
620 lowerCase(s&: newString);
621 return s;

source code of poppler/goo/GooString.cc