GooString.cc source code [poppler/goo/GooString.cc]

1	//========================================================================
2	//
3	// GooString.cc
4	//
5	// Simple variable-length string type.
6	//
7	// Copyright 1996-2003 Glyph & Cog, LLC
8	//
9	//========================================================================
10
11	//========================================================================
12	//
13	// Modified under the Poppler project - http://poppler.freedesktop.org
14	//
15	// All changes made under the Poppler project to this file are licensed
16	// under GPL version 2 or later
17	//
18	// Copyright (C) 2006 Kristian Høgsberg <krh@redhat.com>
19	// Copyright (C) 2006 Krzysztof Kowalczyk <kkowalczyk@gmail.com>
20	// Copyright (C) 2007 Jeff Muizelaar <jeff@infidigm.net>
21	// Copyright (C) 2008-2011, 2016-2018, 2022 Albert Astals Cid <aacid@kde.org>
22	// Copyright (C) 2011 Kenji Uno <ku@digitaldolphins.jp>
23	// Copyright (C) 2012, 2013 Fabio D'Urso <fabiodurso@hotmail.it>
24	// Copyright (C) 2012, 2017 Adrian Johnson <ajohnson@redneon.com>
25	// Copyright (C) 2012 Pino Toscano <pino@kde.org>
26	// Copyright (C) 2013 Jason Crain <jason@aquaticape.us>
27	// Copyright (C) 2015 William Bader <williambader@hotmail.com>
28	// Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
29	// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
30	// Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
31	// Copyright (C) 2018 Greg Knight <lyngvi@gmail.com>
32	// Copyright (C) 2019, 2022-2024 Oliver Sander <oliver.sander@tu-dresden.de>
33	// Copyright (C) 2023 Even Rouault <even.rouault@mines-paris.org>
34	//
35	// To see a description of the changes please see the Changelog file that
36	// came with your tarball or type make ChangeLog if you are building from git
37	//
38	//========================================================================
39
40	#include <config.h>
41
42	#include <algorithm>
43	#include <cassert>
44	#include <cctype>
45	#include <cmath>
46	#include <cstddef>
47	#include <cstdlib>
48	#include <cstring>
49	#include <limits>
50
51	#include "gmem.h"
52	#include "Error.h"
53	#include "GooString.h"
54
55	//------------------------------------------------------------------------
56
57	namespace {
58
59	union GooStringFormatArg {
60	int i;
61	unsigned int ui;
62	long l;
63	unsigned long ul;
64	long long ll;
65	unsigned long long ull;
66	double f;
67	char c;
68	char *s;
69	GooString *gs;
70	};
71
72	enum GooStringFormatType
73	{
74	fmtIntDecimal,
75	fmtIntHex,
76	fmtIntHexUpper,
77	fmtIntOctal,
78	fmtIntBinary,
79	fmtUIntDecimal,
80	fmtUIntHex,
81	fmtUIntHexUpper,
82	fmtUIntOctal,
83	fmtUIntBinary,
84	fmtLongDecimal,
85	fmtLongHex,
86	fmtLongHexUpper,
87	fmtLongOctal,
88	fmtLongBinary,
89	fmtULongDecimal,
90	fmtULongHex,
91	fmtULongHexUpper,
92	fmtULongOctal,
93	fmtULongBinary,
94	fmtLongLongDecimal,
95	fmtLongLongHex,
96	fmtLongLongHexUpper,
97	fmtLongLongOctal,
98	fmtLongLongBinary,
99	fmtULongLongDecimal,
100	fmtULongLongHex,
101	fmtULongLongHexUpper,
102	fmtULongLongOctal,
103	fmtULongLongBinary,
104	fmtDouble,
105	fmtDoubleTrimSmallAware,
106	fmtDoubleTrim,
107	fmtChar,
108	fmtString,
109	fmtGooString,
110	fmtSpace
111	};
112
113	const char *const formatStrings[] = { "d", "x", "X", "o", "b", "ud", "ux", "uX", "uo", "ub", "ld", "lx", "lX", "lo", "lb", "uld", "ulx", "ulX", "ulo",
114	"ulb", "lld", "llx", "llX", "llo", "llb", "ulld", "ullx", "ullX", "ullo", "ullb", "f", "gs", "g", "c", "s", "t", "w", nullptr };
115
116	void formatInt(long long x, char buf, int* bufSize, bool zeroFill, int width, int base, const char *p, int* len, bool* upperCase = false);
117
118	void formatUInt(unsigned long long x, char buf, int* bufSize, bool zeroFill, int width, int base, const char *p, int* len, bool* upperCase = false);
119
120	void formatDouble(double x, char buf, int* bufSize, int prec, bool trim, const char *p, int* *len);
121
122	void formatDoubleSmallAware(double x, char buf, int* bufSize, int prec, bool trim, const char *p, int* *len);
123
124	}
125
126	//------------------------------------------------------------------------
127
128	std::unique_ptr<GooString> GooString::format(const char *fmt, ...)
129	{
130	auto s = std::make_unique<GooString>();
131
132	va_list argList;
133	va_start(argList, fmt);
134	s ->appendfv(fmt, argList);
135	va_end(argList);
136
137	return s;
138	}
139
140	std::unique_ptr<GooString> GooString::formatv(const char *fmt, va_list argList)
141	{
142	auto s = std::make_unique<GooString>();
143
144	s ->appendfv(fmt, argList);
145
146	return s;
147	}
148
149	GooString GooString::appendf(const* char *fmt, ...)
150	{
151	va_list argList;
152	va_start(argList, fmt);
153	appendfv(fmt, argList);
154	va_end(argList);
155
156	return this;
157	}
158
159	GooString GooString::appendfv(const* char *fmt, va_list argList)
160	{
161	GooStringFormatArg *args;
162	int argsLen, argsSize;
163	GooStringFormatArg arg;
164	int idx, width, prec;
165	bool reverseAlign, zeroFill;
166	GooStringFormatType ft;
167	char buf[`65`];
168	int len, i;
169	const char p0, p1;
170	const char *str;
171	GooStringFormatArg argsBuf[`8`];
172
173	argsLen = `0`;
174	argsSize = sizeof(argsBuf) / sizeof(argsBuf[`0`]);
175	args = argsBuf;
176
177	p0 = fmt;
178	while (*p0) {
179	if (*p0 == `'{'`) {
180	++p0;
181	if (*p0 == `'{'`) {
182	++p0;
183	append(c: `'{'`);
184	} else {
185
186	// parse the format string
187	if (!(p0 >= `'0'` && p0 <= `'9'`)) {
188	break;
189	}
190	idx = *p0 - `'0'`;
191	for (++p0; p0 >= `'0'` && p0 <= `'9'`; ++p0) {
192	idx = `10` * idx + (*p0 - `'0'`);
193	}
194	if (*p0 != `':'`) {
195	break;
196	}
197	++p0;
198	if (*p0 == `'-'`) {
199	reverseAlign = true;
200	++p0;
201	} else {
202	reverseAlign = false;
203	}
204	width = `0`;
205	zeroFill = *p0 == `'0'`;
206	for (; p0 >= `'0'` && p0 <= `'9'`; ++p0) {
207	width = `10` * width + (*p0 - `'0'`);
208	}
209	if (width < `0`) {
210	width = `0`;
211	}
212	if (*p0 == `'.'`) {
213	++p0;
214	prec = `0`;
215	for (; p0 >= `'0'` && p0 <= `'9'`; ++p0) {
216	prec = `10` * prec + (*p0 - `'0'`);
217	}
218	} else {
219	prec = `0`;
220	}
221	for (ft = (GooStringFormatType)`0`; formatStrings[ft]; ft = (GooStringFormatType)(ft + `1`)) {
222	if (!strncmp(s1: p0, s2: formatStrings[ft], n: strlen(s: formatStrings[ft]))) {
223	break;
224	}
225	}
226	if (!formatStrings[ft]) {
227	break;
228	}
229	p0 += strlen(s: formatStrings[ft]);
230	if (*p0 != `'}'`) {
231	break;
232	}
233	++p0;
234
235	// fetch the argument
236	if (idx > argsLen) {
237	break;
238	}
239	if (idx == argsLen) {
240	if (argsLen == argsSize) {
241	argsSize *= `2`;
242	if (args == argsBuf) {
243	args = (GooStringFormatArg )gmallocn(count: argsSize, size: sizeof*(GooStringFormatArg));
244	memcpy(dest: args, src: argsBuf, n: argsLen * sizeof(GooStringFormatArg));
245	} else {
246	args = (GooStringFormatArg )greallocn(p: args, count: argsSize, size: sizeof*(GooStringFormatArg));
247	}
248	}
249	switch (ft) {
250	case fmtIntDecimal:
251	case fmtIntHex:
252	case fmtIntHexUpper:
253	case fmtIntOctal:
254	case fmtIntBinary:
255	case fmtSpace:
256	args[argsLen].i = va_arg(argList, int);
257	break;
258	case fmtUIntDecimal:
259	case fmtUIntHex:
260	case fmtUIntHexUpper:
261	case fmtUIntOctal:
262	case fmtUIntBinary:
263	args[argsLen].ui = va_arg(argList, unsigned int);
264	break;
265	case fmtLongDecimal:
266	case fmtLongHex:
267	case fmtLongHexUpper:
268	case fmtLongOctal:
269	case fmtLongBinary:
270	args[argsLen].l = va_arg(argList, long);
271	break;
272	case fmtULongDecimal:
273	case fmtULongHex:
274	case fmtULongHexUpper:
275	case fmtULongOctal:
276	case fmtULongBinary:
277	args[argsLen].ul = va_arg(argList, unsigned long);
278	break;
279	case fmtLongLongDecimal:
280	case fmtLongLongHex:
281	case fmtLongLongHexUpper:
282	case fmtLongLongOctal:
283	case fmtLongLongBinary:
284	args[argsLen].ll = va_arg(argList, long long);
285	break;
286	case fmtULongLongDecimal:
287	case fmtULongLongHex:
288	case fmtULongLongHexUpper:
289	case fmtULongLongOctal:
290	case fmtULongLongBinary:
291	args[argsLen].ull = va_arg(argList, unsigned long long);
292	break;
293	case fmtDouble:
294	case fmtDoubleTrim:
295	case fmtDoubleTrimSmallAware:
296	args[argsLen].f = va_arg(argList, double);
297	break;
298	case fmtChar:
299	args[argsLen].c = (char)va_arg(argList, int);
300	break;
301	case fmtString:
302	args[argsLen].s = va_arg(argList, char *);
303	break;
304	case fmtGooString:
305	args[argsLen].gs = va_arg(argList, GooString *);
306	break;
307	}
308	++argsLen;
309	}
310
311	// format the argument
312	arg = args[idx];
313	switch (ft) {
314	case fmtIntDecimal:
315	formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: `10`, p: &str, len: &len);
316	break;
317	case fmtIntHex:
318	formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len);
319	break;
320	case fmtIntHexUpper:
321	formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len, upperCase: true);
322	break;
323	case fmtIntOctal:
324	formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: `8`, p: &str, len: &len);
325	break;
326	case fmtIntBinary:
327	formatInt(x: arg.i, buf, bufSize: sizeof(buf), zeroFill, width, base: `2`, p: &str, len: &len);
328	break;
329	case fmtUIntDecimal:
330	formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: `10`, p: &str, len: &len);
331	break;
332	case fmtUIntHex:
333	formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len);
334	break;
335	case fmtUIntHexUpper:
336	formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len, upperCase: true);
337	break;
338	case fmtUIntOctal:
339	formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: `8`, p: &str, len: &len);
340	break;
341	case fmtUIntBinary:
342	formatUInt(x: arg.ui, buf, bufSize: sizeof(buf), zeroFill, width, base: `2`, p: &str, len: &len);
343	break;
344	case fmtLongDecimal:
345	formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: `10`, p: &str, len: &len);
346	break;
347	case fmtLongHex:
348	formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len);
349	break;
350	case fmtLongHexUpper:
351	formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len, upperCase: true);
352	break;
353	case fmtLongOctal:
354	formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: `8`, p: &str, len: &len);
355	break;
356	case fmtLongBinary:
357	formatInt(x: arg.l, buf, bufSize: sizeof(buf), zeroFill, width, base: `2`, p: &str, len: &len);
358	break;
359	case fmtULongDecimal:
360	formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: `10`, p: &str, len: &len);
361	break;
362	case fmtULongHex:
363	formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len);
364	break;
365	case fmtULongHexUpper:
366	formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len, upperCase: true);
367	break;
368	case fmtULongOctal:
369	formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: `8`, p: &str, len: &len);
370	break;
371	case fmtULongBinary:
372	formatUInt(x: arg.ul, buf, bufSize: sizeof(buf), zeroFill, width, base: `2`, p: &str, len: &len);
373	break;
374	case fmtLongLongDecimal:
375	formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: `10`, p: &str, len: &len);
376	break;
377	case fmtLongLongHex:
378	formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len);
379	break;
380	case fmtLongLongHexUpper:
381	formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len, upperCase: true);
382	break;
383	case fmtLongLongOctal:
384	formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: `8`, p: &str, len: &len);
385	break;
386	case fmtLongLongBinary:
387	formatInt(x: arg.ll, buf, bufSize: sizeof(buf), zeroFill, width, base: `2`, p: &str, len: &len);
388	break;
389	case fmtULongLongDecimal:
390	formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: `10`, p: &str, len: &len);
391	break;
392	case fmtULongLongHex:
393	formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len);
394	break;
395	case fmtULongLongHexUpper:
396	formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: `16`, p: &str, len: &len, upperCase: true);
397	break;
398	case fmtULongLongOctal:
399	formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: `8`, p: &str, len: &len);
400	break;
401	case fmtULongLongBinary:
402	formatUInt(x: arg.ull, buf, bufSize: sizeof(buf), zeroFill, width, base: `2`, p: &str, len: &len);
403	break;
404	case fmtDouble:
405	formatDouble(x: arg.f, buf, bufSize: sizeof(buf), prec, trim: false, p: &str, len: &len);
406	break;
407	case fmtDoubleTrim:
408	formatDouble(x: arg.f, buf, bufSize: sizeof(buf), prec, trim: true, p: &str, len: &len);
409	break;
410	case fmtDoubleTrimSmallAware:
411	formatDoubleSmallAware(x: arg.f, buf, bufSize: sizeof(buf), prec, trim: true, p: &str, len: &len);
412	break;
413	case fmtChar:
414	buf[`0`] = arg.c;
415	str = buf;
416	len = `1`;
417	reverseAlign = !reverseAlign;
418	break;
419	case fmtString: {
420	str = arg.s;
421	const size_t strlen_str = strlen(s: str);
422	if (strlen_str > static_cast<size_t>(std::numeric_limits<int>::max())) {
423	error(category: errSyntaxWarning, pos: `0`, msg: "String truncated to INT_MAX bytes");
424	len = std::numeric_limits<int>::max();
425	} else {
426	len = static_cast<int>(strlen_str);
427	}
428	reverseAlign = !reverseAlign;
429	break;
430	}
431	case fmtGooString:
432	if (arg.gs) {
433	str = arg.gs->c_str();
434	len = arg.gs->getLength();
435	} else {
436	str = "(null)";
437	len = `6`;
438	}
439	reverseAlign = !reverseAlign;
440	break;
441	case fmtSpace:
442	str = buf;
443	len = `0`;
444	width = arg.i;
445	break;
446	}
447
448	// append the formatted arg, handling width and alignment
449	if (!reverseAlign && len < width) {
450	for (i = len; i < width; ++i) {
451	append(c: `' '`);
452	}
453	}
454	append(str, lengthA: len);
455	if (reverseAlign && len < width) {
456	for (i = len; i < width; ++i) {
457	append(c: `' '`);
458	}
459	}
460	}
461
462	} else if (*p0 == `'}'`) {
463	++p0;
464	if (*p0 == `'}'`) {
465	++p0;
466	}
467	append(c: `'}'`);
468
469	} else {
470	for (p1 = p0 + `1`; p1 && p1 != `'{'` && *p1 != `'}'`; ++p1) {
471	;
472	}
473	append(str: p0, lengthA: p1 - p0);
474	p0 = p1;
475	}
476	}
477
478	if (args != argsBuf) {
479	gfree(p: args);
480	}
481
482	return this;
483	}
484
485	namespace {
486
487	const char lowerCaseDigits[`17`] = "0123456789abcdef";
488	const char upperCaseDigits[`17`] = "0123456789ABCDEF";
489
490	void formatInt(long long x, char buf, int* bufSize, bool zeroFill, int width, int base, const char *p, int* len, bool* upperCase)
491	{
492	const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
493	bool neg;
494	int start, i, j;
495	unsigned long long abs_x;
496
497	i = bufSize;
498	if ((neg = x < `0`)) {
499	abs_x = -x;
500	} else {
501	abs_x = x;
502	}
503	start = neg ? `1` : `0`;
504	if (abs_x == `0`) {
505	buf[--i] = `'0'`;
506	} else {
507	while (i > start && abs_x) {
508	buf[--i] = vals[abs_x % base];
509	abs_x /= base;
510	}
511	}
512	if (zeroFill) {
513	for (j = bufSize - i; i > start && j < width - start; ++j) {
514	buf[--i] = `'0'`;
515	}
516	}
517	if (neg) {
518	buf[--i] = `'-'`;
519	}
520	*p = buf + i;
521	*len = bufSize - i;
522	}
523
524	void formatUInt(unsigned long long x, char buf, int* bufSize, bool zeroFill, int width, int base, const char *p, int* len, bool* upperCase)
525	{
526	const char *vals = upperCase ? upperCaseDigits : lowerCaseDigits;
527	int i, j;
528
529	i = bufSize;
530	if (x == `0`) {
531	buf[--i] = `'0'`;
532	} else {
533	while (i > `0` && x) {
534	buf[--i] = vals[x % base];
535	x /= base;
536	}
537	}
538	if (zeroFill) {
539	for (j = bufSize - i; i > `0` && j < width; ++j) {
540	buf[--i] = `'0'`;
541	}
542	}
543	*p = buf + i;
544	*len = bufSize - i;
545	}
546
547	void formatDouble(double x, char buf, int* bufSize, int prec, bool trim, const char *p, int* *len)
548	{
549	bool neg, started;
550	double x2;
551	int d, i, j;
552
553	if ((neg = x < `0`)) {
554	x = -x;
555	}
556	x = floor(x: x * pow(x: `10.0`, y: prec) + `0.5`);
557	i = bufSize;
558	started = !trim;
559	for (j = `0`; j < prec && i > `1`; ++j) {
560	x2 = floor(x: `0.1` * (x + `0.5`));
561	d = (int)floor(x: x - `10` * x2 + `0.5`);
562	if (started \|\| d != `0`) {
563	buf[--i] = `'0'` + d;
564	started = true;
565	}
566	x = x2;
567	}
568	if (i > `1` && started) {
569	buf[--i] = `'.'`;
570	}
571	if (i > `1`) {
572	do {
573	x2 = floor(x: `0.1` * (x + `0.5`));
574	d = (int)floor(x: x - `10` * x2 + `0.5`);
575	buf[--i] = `'0'` + d;
576	x = x2;
577	} while (i > `1` && x);
578	}
579	if (neg) {
580	buf[--i] = `'-'`;
581	}
582	*p = buf + i;
583	*len = bufSize - i;
584	}
585
586	void formatDoubleSmallAware(double x, char buf, int* bufSize, int prec, bool trim, const char *p, int* *len)
587	{
588	double absX = fabs(x: x);
589	if (absX >= `0.1`) {
590	formatDouble(x, buf, bufSize, prec, trim, p, len);
591	} else {
592	while (absX < `0.1` && prec < `16`) {
593	absX = absX * `10`;
594	prec++;
595	}
596	formatDouble(x, buf, bufSize, prec, trim, p, len);
597	}
598	}
599
600	}
601
602	GooString *GooString::lowerCase()
603	{
604	lowerCase(s&: *this);
605	return this;
606	}
607
608	void GooString::lowerCase(std::string &s)
609	{
610	for (auto &c : s) {
611	if (std::isupper(c)) {
612	c = std::tolower(c: c);
613	}
614	}
615	}
616
617	std::string GooString::toLowerCase(const std::string &s)
618	{
619	std::string newString = s;
620	lowerCase(s&: newString);
621	return s;
622	}
623

source code of poppler/goo/GooString.cc