buffer.cc source code [libcody/buffer.cc]

1	// CODYlib -- mode:c++ --
2	// Copyright (C) 2020 Nathan Sidwell, nathan@acm.org
3	// License: Apache v2.0
4
5	// Cody
6	#include "internal.hh"
7	// C++
8	#include <algorithm>
9	// C
10	#include <cstring>
11	// OS
12	#include <unistd.h>
13	#include <cerrno>
14
15	// MessageBuffer code
16
17	// Lines consist of words and end with a NEWLINE (0xa) char
18	// Whitespace characters are TAB (0x9) and SPACE (0x20)
19	// Words consist of non-whitespace chars separated by whitespace.
20	// Multiple lines in one transaction are indicated by ending non-final
21	// lines with a SEMICOLON (0x3b) word, immediately before the NEWLINE
22	// Continuations with ; preceding it
23	// Words matching regexp [-+_/%.a-zA-Z0-9]+ need no quoting.
24	// Quoting with '...'
25	// Anything outside of [-+_/%.a-zA-Z0-9] needs quoting
26	// Anything outside of <= <space> or DEL or \' or \\ needs escaping.
27	// Escapes are \\, \', \n, \t, \_, everything else as \<hex><hex>?
28	// Spaces separate words, UTF8 encoding for non-ascii chars
29
30	namespace Cody {
31	namespace Detail {
32
33	static const char CONTINUE = S2C(s: u8";");
34
35	void MessageBuffer::BeginLine ()
36	{
37	if (!buffer.empty ())
38	{
39	// Terminate the previous line with a continuation
40	buffer.reserve (n: buffer.size () + `3`);
41	buffer.push_back (x: S2C(s: u8" "));
42	buffer.push_back (x: CONTINUE);
43	buffer.push_back (x: S2C(s: u8"\n"));
44	}
45	lastBol = buffer.size ();
46	}
47
48	// QUOTE means 'maybe quote', we search it for quote-needing chars
49
50	void MessageBuffer::Append (char const str, bool* quote, size_t len)
51	{
52	if (len == ~size_t (`0`))
53	len = strlen (s: str);
54
55	if (!len && !quote)
56	return;
57
58	// We want to quote characters outside of [-+_A-Za-z0-9/%.], anything
59	// that could remotely be shell-active. UTF8 encoding for non-ascii.
60	if (quote && len)
61	{
62	quote = false;
63	// Scan looking for quote-needing characters. We could just
64	// append until we find one, but that's probably confusing
65	for (size_t ix = len; ix--;)
66	{
67	unsigned char c = (unsigned char)str[ix];
68	if (!((c >= S2C(s: u8"a") && c <= S2C(s: u8"z"))
69	\|\| (c >= S2C(s: u8"A") && c <= S2C(s: u8"Z"))
70	\|\| (c >= S2C(s: u8"0") && c <= S2C(s: u8"9"))
71	\|\| c == S2C(s: u8"-") \|\| c == S2C(s: u8"+") \|\| c == S2C(s: u8"_")
72	\|\| c == S2C(s: u8"/") \|\| c == S2C(s: u8"%") \|\| c == S2C(s: u8".")))
73	{
74	quote = true;
75	break;
76	}
77	}
78	}
79
80	// Maximal length of appended string
81	buffer.reserve (n: buffer.size () + len * (quote ? `3` : `1`) + `2`);
82
83	if (quote)
84	buffer.push_back (x: S2C(s: u8"'"));
85
86	for (auto *end = str + len; str != end;)
87	{
88	auto *e = end;
89
90	if (quote)
91	// Look for next escape-needing char. More relaxed than
92	// the earlier needs-quoting check.
93	for (e = str; e != end; ++e)
94	{
95	unsigned char c = (unsigned char)*e;
96	if (c < S2C(s: u8" ") \|\| c == `0x7f`
97	\|\| c == S2C(s: u8"\\") \|\| c == S2C(s: u8"'"))
98	break;
99	}
100	buffer.insert (position: buffer.end (), first: str, last: e);
101	str = e;
102
103	if (str == end)
104	break;
105
106	buffer.push_back (x: S2C(s: u8"\\"));
107	switch (unsigned char c = (unsigned char)*str++)
108	{
109	case S2C(s: u8"\t"):
110	c = S2C(s: u8"t");
111	goto append;
112
113	case S2C(s: u8"\n"):
114	c = S2C(s: u8"n");
115	goto append;
116
117	case S2C(s: u8"'"):
118	case S2C(s: u8"\\"):
119	append:
120	buffer.push_back (x: c);
121	break;
122
123	default:
124	// Full-on escape. Use 2 lower-case hex chars
125	for (unsigned shift = `8`; shift;)
126	{
127	shift -= `4`;
128
129	char nibble = (c >> shift) & `0xf`;
130	nibble += S2C(s: u8"0");
131	if (nibble > S2C(s: u8"9"))
132	nibble += S2C(s: u8"a") - (S2C(s: u8"9") + `1`);
133	buffer.push_back (x: nibble);
134	}
135	}
136	}
137
138	if (quote)
139	buffer.push_back (x: S2C(s: u8"'"));
140	}
141
142	void MessageBuffer::Append (char c)
143	{
144	buffer.push_back (x: c);
145	}
146
147	void MessageBuffer::AppendInteger (unsigned u)
148	{
149	// Sigh, even though std::to_string is C++11, we support building on
150	// gcc 4.8, which is a C++11 compiler lacking std::to_string. so
151	// have something horrible.
152	std::string v (`20`, `0`);
153	size_t len = snprintf (s: const_cast<char *> (v.data ()), maxlen: v.size (), format: "%u", u);
154	v.erase (pos: len);
155
156	AppendWord (str: v);
157	}
158
159	int MessageBuffer::Write (int fd) noexcept
160	{
161	size_t limit = buffer.size () - lastBol;
162	ssize_t count = write (fd: fd, buf: &buffer.data ()[lastBol], n: limit);
163
164	int err = `0`;
165	if (count < `0`)
166	err = errno;
167	else
168	{
169	lastBol += count;
170	if (size_t (count) != limit)
171	err = EAGAIN;
172	}
173
174	if (err != EAGAIN && err != EINTR)
175	{
176	// Reset for next message
177	buffer.clear ();
178	lastBol = `0`;
179	}
180
181	return err;
182	}
183
184	int MessageBuffer::Read (int fd) noexcept
185	{
186	constexpr size_t blockSize = `200`;
187
188	size_t lwm = buffer.size ();
189	size_t hwm = buffer.capacity ();
190	if (hwm - lwm < blockSize / `2`)
191	hwm += blockSize;
192	buffer.resize (new_size: hwm);
193
194	auto iter = buffer.begin () + lwm;
195	ssize_t count = read (fd: fd, buf: &*iter, nbytes: hwm - lwm);
196	buffer.resize (new_size: lwm + (count >= `0` ? count : `0`));
197
198	if (count < `0`)
199	return errno;
200
201	if (!count)
202	// End of file
203	return -`1`;
204
205	bool more = true;
206	for (;;)
207	{
208	auto newline = std::find (first: iter, last: buffer.end (), val: S2C(s: u8"\n"));
209	if (newline == buffer.end ())
210	break;
211	more = newline != buffer.begin () && newline [-`1`] == CONTINUE;
212	iter = newline + `1`;
213
214	if (iter == buffer.end ())
215	break;
216
217	if (!more)
218	{
219	// There is no continuation, but there are chars after the
220	// newline. Truncate the buffer and return an error
221	buffer.resize (new_size: iter - buffer.begin ());
222	return EINVAL;
223	}
224	}
225
226	return more ? EAGAIN : `0`;
227	}
228
229	int MessageBuffer::Lex (std::vector<std::string> &result)
230	{
231	result.clear ();
232
233	if (IsAtEnd ())
234	return ENOENT;
235
236	Assert (buffer.back () == S2C(u8"\n"));
237
238	auto iter = buffer.begin () + lastBol;
239
240	for (std::string word = nullptr*;;)
241	{
242	char c = *iter;
243
244	++iter;
245	if (c == S2C(s: u8" ") \|\| c == S2C(s: u8"\t"))
246	{
247	word = nullptr;
248	continue;
249	}
250
251	if (c == S2C(s: u8"\n"))
252	break;
253
254	if (c == CONTINUE)
255	{
256	// Line continuation
257	if (word \|\| *iter != S2C(s: u8"\n"))
258	goto malformed;
259	++iter;
260	break;
261	}
262
263	if (c <= S2C(s: u8" ") \|\| c >= `0x7f`)
264	goto malformed;
265
266	if (!word)
267	{
268	result.emplace_back ();
269	word = &result.back ();
270	}
271
272	if (c == S2C(s: u8"'"))
273	{
274	// Quoted word
275	for (;;)
276	{
277	c = *iter;
278
279	if (c == S2C(s: u8"\n"))
280	{
281	malformed:;
282	result.clear ();
283	iter = std::find (first: iter, last: buffer.end (), val: S2C(s: u8"\n"));
284	auto back = iter;
285	if (back [-`1`] == CONTINUE && back [-`2`] == S2C(s: u8" "))
286	// Smells like a line continuation
287	back -= `2`;
288	result.emplace_back (args: &buffer [lastBol],
289	args: back - buffer.begin () - lastBol);
290	++iter;
291	lastBol = iter - buffer.begin ();
292	return EINVAL;
293	}
294
295	if (c < S2C(s: u8" ") \|\| c >= `0x7f`)
296	goto malformed;
297
298	++iter;
299	if (c == S2C(s: u8"'"))
300	break;
301
302	if (c == S2C(s: u8"\\"))
303	// escape
304	switch (c = *iter)
305	{
306	case S2C(s: u8"\\"):
307	case S2C(s: u8"'"):
308	++iter;
309	break;
310
311	case S2C(s: u8"n"):
312	c = S2C(s: u8"\n");
313	++iter;
314	break;
315
316	case S2C(s: u8"_"):
317	// We used to escape SPACE as \_, so accept that
318	c = S2C(s: u8" ");
319	++iter;
320	break;
321
322	case S2C(s: u8"t"):
323	c = S2C(s: u8"\t");
324	++iter;
325	break;
326
327	default:
328	{
329	unsigned v = `0`;
330	for (unsigned nibble = `0`; nibble != `2`; nibble++)
331	{
332	c = *iter;
333	if (c < S2C(s: u8"0"))
334	{
335	if (!nibble)
336	goto malformed;
337	break;
338	}
339	else if (c <= S2C(s: u8"9"))
340	c -= S2C(s: u8"0");
341	else if (c < S2C(s: u8"a"))
342	{
343	if (!nibble)
344	goto malformed;
345	break;
346	}
347	else if (c <= S2C(s: u8"f"))
348	c -= S2C(s: u8"a") - `10`;
349	else
350	{
351	if (!nibble)
352	goto malformed;
353	break;
354	}
355	++iter;
356	v = (v << `4`) \| c;
357	}
358	c = v;
359	}
360	}
361	word->push_back (c: c);
362	}
363	}
364	else
365	// Unquoted character
366	word->push_back (c: c);
367	}
368	lastBol = iter - buffer.begin ();
369	if (result.empty ())
370	return ENOENT;
371
372	return `0`;
373	}
374
375	void MessageBuffer::LexedLine (std::string &str)
376	{
377	if (lastBol)
378	{
379	size_t pos = lastBol - `1`;
380	for (; pos; pos--)
381	if (buffer [pos-`1`] == S2C(s: u8"\n"))
382	break;
383
384	size_t end = lastBol - `1`;
385	if (buffer [end-`1`] == CONTINUE && buffer [end-`2`] == S2C(s: u8" "))
386	// Strip line continuation
387	end -= `2`;
388	str.append (s: &buffer [pos], n: end - pos);
389	}
390	}
391	} // Detail
392	} // Cody
393

source code of libcody/buffer.cc