single_byte.rs source code [crates/encoding_rs-0.8.32/src/single_byte.rs]

1	// Copyright Mozilla Foundation. See the COPYRIGHT
2	// file at the top-level directory of this distribution.
3	//
4	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5	// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6	// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7	// option. This file may not be copied, modified, or distributed
8	// except according to those terms.
9
10	use super::*;
11	use crate::ascii::*;
12	use crate::data::position;
13	use crate::handles::*;
14	use crate::variant::*;
15
16	pub struct SingleByteDecoder {
17	table: &'static [u16; `128`],
18	}
19
20	impl SingleByteDecoder {
21	pub fn new(data: &'static [u16; `128`]) -> VariantDecoder {
22	VariantDecoder::SingleByte(SingleByteDecoder { table: data })
23	}
24
25	pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
26	Some(byte_length)
27	}
28
29	pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
30	byte_length.checked_mul(`3`)
31	}
32
33	pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
34	byte_length.checked_mul(`3`)
35	}
36
37	pub fn decode_to_utf8_raw(
38	&mut self,
39	src: &[u8],
40	dst: &mut [u8],
41	_last: bool,
42	) -> (DecoderResult, usize, usize) {
43	let mut source = ByteSource::new(src);
44	let mut dest = Utf8Destination::new(dst);
45	'outermost: loop {
46	match dest.copy_ascii_from_check_space_bmp(&mut source) {
47	CopyAsciiResult::Stop(ret) => return ret,
48	CopyAsciiResult::GoOn((mut non_ascii, mut handle)) => 'middle: loop {
49	// Start non-boilerplate
50	//
51	// Since the non-ASCIIness of `non_ascii` is hidden from
52	// the optimizer, it can't figure out that it's OK to
53	// statically omit the bound check when accessing
54	// `[u16; 128]` with an index
55	// `non_ascii as usize - 0x80usize`.
56	let mapped =
57	unsafe { (self.table.get_unchecked(non_ascii as usize* - `0x80usize`)) };
58	// let mapped = self.table[non_ascii as usize - 0x80usize];
59	if mapped == `0u16` {
60	return (
61	DecoderResult::Malformed(`1`, `0`),
62	source.consumed(),
63	handle.written(),
64	);
65	}
66	let dest_again = handle.write_bmp_excl_ascii(mapped);
67	// End non-boilerplate
68	match source.check_available() {
69	Space::Full(src_consumed) => {
70	return (
71	DecoderResult::InputEmpty,
72	src_consumed,
73	dest_again.written(),
74	);
75	}
76	Space::Available(source_handle) => {
77	match dest_again.check_space_bmp() {
78	Space::Full(dst_written) => {
79	return (
80	DecoderResult::OutputFull,
81	source_handle.consumed(),
82	dst_written,
83	);
84	}
85	Space::Available(mut destination_handle) => {
86	let (mut b, unread_handle) = source_handle.read();
87	let source_again = unread_handle.commit();
88	'innermost: loop {
89	if b > `127` {
90	non_ascii = b;
91	handle = destination_handle;
92	continue 'middle;
93	}
94	// Testing on Haswell says that we should write the
95	// byte unconditionally instead of trying to unread it
96	// to make it part of the next SIMD stride.
97	let dest_again_again = destination_handle.write_ascii(b);
98	if b < `60` {
99	// We've got punctuation
100	match source_again.check_available() {
101	Space::Full(src_consumed_again) => {
102	return (
103	DecoderResult::InputEmpty,
104	src_consumed_again,
105	dest_again_again.written(),
106	);
107	}
108	Space::Available(source_handle_again) => {
109	match dest_again_again.check_space_bmp() {
110	Space::Full(dst_written_again) => {
111	return (
112	DecoderResult::OutputFull,
113	source_handle_again.consumed(),
114	dst_written_again,
115	);
116	}
117	Space::Available(
118	destination_handle_again,
119	) => {
120	let (b_again, _unread_handle_again) =
121	source_handle_again.read();
122	b = b_again;
123	destination_handle =
124	destination_handle_again;
125	continue 'innermost;
126	}
127	}
128	}
129	}
130	}
131	// We've got markup or ASCII text
132	continue 'outermost;
133	}
134	}
135	}
136	}
137	}
138	},
139	}
140	}
141	}
142
143	pub fn decode_to_utf16_raw(
144	&mut self,
145	src: &[u8],
146	dst: &mut [u16],
147	_last: bool,
148	) -> (DecoderResult, usize, usize) {
149	let (pending, length) = if dst.len() < src.len() {
150	(DecoderResult::OutputFull, dst.len())
151	} else {
152	(DecoderResult::InputEmpty, src.len())
153	};
154	let mut converted = `0usize`;
155	'outermost: loop {
156	match unsafe {
157	ascii_to_basic_latin(
158	src.as_ptr().add(converted),
159	dst.as_mut_ptr().add(converted),
160	length - converted,
161	)
162	} {
163	None => {
164	return (pending, length, length);
165	}
166	Some((mut non_ascii, consumed)) => {
167	converted += consumed;
168	'middle: loop {
169	// `converted` doesn't count the reading of `non_ascii` yet.
170	// Since the non-ASCIIness of `non_ascii` is hidden from
171	// the optimizer, it can't figure out that it's OK to
172	// statically omit the bound check when accessing
173	// `[u16; 128]` with an index
174	// `non_ascii as usize - 0x80usize`.
175	let mapped =
176	unsafe { (self.table.get_unchecked(non_ascii as usize* - `0x80usize`)) };
177	// let mapped = self.table[non_ascii as usize - 0x80usize];
178	if mapped == `0u16` {
179	return (
180	DecoderResult::Malformed(`1`, `0`),
181	converted + `1`, // +1 `for non_ascii`
182	converted,
183	);
184	}
185	unsafe {
186	// The bound check has already been performed
187	*(dst.get_unchecked_mut(converted)) = mapped;
188	}
189	converted += `1`;
190	// Next, handle ASCII punctuation and non-ASCII without
191	// going back to ASCII acceleration. Non-ASCII scripts
192	// use ASCII punctuation, so this avoid going to
193	// acceleration just for punctuation/space and then
194	// failing. This is a significant boost to non-ASCII
195	// scripts.
196	// TODO: Split out Latin converters without this part
197	// this stuff makes Latin script-conversion slower.
198	if converted == length {
199	return (pending, length, length);
200	}
201	let mut b = unsafe { *(src.get_unchecked(converted)) };
202	'innermost: loop {
203	if b > `127` {
204	non_ascii = b;
205	continue 'middle;
206	}
207	// Testing on Haswell says that we should write the
208	// byte unconditionally instead of trying to unread it
209	// to make it part of the next SIMD stride.
210	unsafe {
211	(dst.get_unchecked_mut(converted)) = u16*::from(b);
212	}
213	converted += `1`;
214	if b < `60` {
215	// We've got punctuation
216	if converted == length {
217	return (pending, length, length);
218	}
219	b = unsafe { *(src.get_unchecked(converted)) };
220	continue 'innermost;
221	}
222	// We've got markup or ASCII text
223	continue 'outermost;
224	}
225	}
226	}
227	}
228	}
229	}
230
231	pub fn latin1_byte_compatible_up_to(&self, buffer: &[u8]) -> usize {
232	let mut bytes = buffer;
233	let mut total = `0`;
234	loop {
235	if let Some((non_ascii, offset)) = validate_ascii(bytes) {
236	total += offset;
237	let mapped = unsafe { (self.table.get_unchecked(non_ascii as usize* - `0x80usize`)) };
238	if mapped != u16::from(non_ascii) {
239	return total;
240	}
241	total += `1`;
242	bytes = &bytes[offset + `1`..];
243	} else {
244	return total;
245	}
246	}
247	}
248	}
249
250	pub struct SingleByteEncoder {
251	table: &'static [u16; `128`],
252	run_bmp_offset: usize,
253	run_byte_offset: usize,
254	run_length: usize,
255	}
256
257	impl SingleByteEncoder {
258	pub fn new(
259	encoding: &'static Encoding,
260	data: &'static [u16; `128`],
261	run_bmp_offset: u16,
262	run_byte_offset: u8,
263	run_length: u8,
264	) -> Encoder {
265	Encoder::new(
266	encoding,
267	VariantEncoder::SingleByte(SingleByteEncoder {
268	table: data,
269	run_bmp_offset: run_bmp_offset as usize,
270	run_byte_offset: run_byte_offset as usize,
271	run_length: run_length as usize,
272	}),
273	)
274	}
275
276	pub fn max_buffer_length_from_utf16_without_replacement(
277	&self,
278	u16_length: usize,
279	) -> Option<usize> {
280	Some(u16_length)
281	}
282
283	pub fn max_buffer_length_from_utf8_without_replacement(
284	&self,
285	byte_length: usize,
286	) -> Option<usize> {
287	Some(byte_length)
288	}
289
290	#[inline(always)]
291	fn encode_u16(&self, code_unit: u16) -> Option<u8> {
292	// First, we see if the code unit falls into a run of consecutive
293	// code units that can be mapped by offset. This is very efficient
294	// for most non-Latin encodings as well as Latin1-ish encodings.
295	//
296	// For encodings that don't fit this pattern, the run (which may
297	// have the length of just one) just establishes the starting point
298	// for the next rule.
299	//
300	// Next, we do a forward linear search in the part of the index
301	// after the run. Even in non-Latin1-ish Latin encodings (except
302	// macintosh), the lower case letters are here.
303	//
304	// Next, we search the third quadrant up to the start of the run
305	// (upper case letters in Latin encodings except macintosh, in
306	// Greek and in KOI encodings) and then the second quadrant,
307	// except if the run stared before the third quadrant, we search
308	// the second quadrant up to the run.
309	//
310	// Last, we search the first quadrant, which has unused controls
311	// or punctuation in most encodings. This is bad for macintosh
312	// and IBM866, but those are rare.
313
314	// Run of consecutive units
315	let unit_as_usize = code_unit as usize;
316	let offset = unit_as_usize.wrapping_sub(self.run_bmp_offset);
317	if offset < self.run_length {
318	return Some((`128` + self.run_byte_offset + offset) as u8);
319	}
320
321	// Search after the run
322	let tail_start = self.run_byte_offset + self.run_length;
323	if let Some(pos) = position(&self.table[tail_start..], code_unit) {
324	return Some((`128` + tail_start + pos) as u8);
325	}
326
327	if self.run_byte_offset >= `64` {
328	// Search third quadrant before the run
329	if let Some(pos) = position(&self.table[`64`..self.run_byte_offset], code_unit) {
330	return Some(((`128` + `64`) + pos) as u8);
331	}
332
333	// Search second quadrant
334	if let Some(pos) = position(&self.table[`32`..`64`], code_unit) {
335	return Some(((`128` + `32`) + pos) as u8);
336	}
337	} else if let Some(pos) = position(&self.table[`32`..self.run_byte_offset], code_unit) {
338	// windows-1252, windows-874, ISO-8859-15 and ISO-8859-5
339	// Search second quadrant before the run
340	return Some(((`128` + `32`) + pos) as u8);
341	}
342
343	// Search first quadrant
344	if let Some(pos) = position(&self.table[..`32`], code_unit) {
345	return Some((`128` + pos) as u8);
346	}
347
348	None
349	}
350
351	ascii_compatible_bmp_encoder_function!(
352	{
353	match self.encode_u16(bmp) {
354	Some(byte) => handle.write_one(byte),
355	None => {
356	return (
357	EncoderResult::unmappable_from_bmp(bmp),
358	source.consumed(),
359	handle.written(),
360	);
361	}
362	}
363	},
364	bmp,
365	self,
366	source,
367	handle,
368	copy_ascii_to_check_space_one,
369	check_space_one,
370	encode_from_utf8_raw,
371	str,
372	Utf8Source,
373	`true`
374	);
375
376	pub fn encode_from_utf16_raw(
377	&mut self,
378	src: &[u16],
379	dst: &mut [u8],
380	_last: bool,
381	) -> (EncoderResult, usize, usize) {
382	let (pending, length) = if dst.len() < src.len() {
383	(EncoderResult::OutputFull, dst.len())
384	} else {
385	(EncoderResult::InputEmpty, src.len())
386	};
387	let mut converted = `0usize`;
388	'outermost: loop {
389	match unsafe {
390	basic_latin_to_ascii(
391	src.as_ptr().add(converted),
392	dst.as_mut_ptr().add(converted),
393	length - converted,
394	)
395	} {
396	None => {
397	return (pending, length, length);
398	}
399	Some((mut non_ascii, consumed)) => {
400	converted += consumed;
401	'middle: loop {
402	// `converted` doesn't count the reading of `non_ascii` yet.
403	match self.encode_u16(non_ascii) {
404	Some(byte) => {
405	unsafe {
406	*(dst.get_unchecked_mut(converted)) = byte;
407	}
408	converted += `1`;
409	}
410	None => {
411	// At this point, we need to know if we
412	// have a surrogate.
413	let high_bits = non_ascii & `0xFC00u16`;
414	if high_bits == `0xD800u16` {
415	// high surrogate
416	if converted + `1` == length {
417	// End of buffer. This surrogate is unpaired.
418	return (
419	EncoderResult::Unmappable('`\u{FFFD}`'),
420	converted + `1`, // +1 `for non_ascii`
421	converted,
422	);
423	}
424	let second =
425	u32::from(unsafe { *src.get_unchecked(converted + `1`) });
426	if second & `0xFC00u32` != `0xDC00u32` {
427	return (
428	EncoderResult::Unmappable('`\u{FFFD}`'),
429	converted + `1`, // +1 `for non_ascii`
430	converted,
431	);
432	}
433	// The next code unit is a low surrogate.
434	let astral: char = unsafe {
435	::core::char::from_u32_unchecked(
436	(u32::from(non_ascii) << `10`) + second
437	- (((`0xD800u32` << `10`) - `0x1_0000u32`) + `0xDC00u32`),
438	)
439	};
440	return (
441	EncoderResult::Unmappable(astral),
442	converted + `2`, // +2 `for non_ascii` and `second`
443	converted,
444	);
445	}
446	if high_bits == `0xDC00u16` {
447	// Unpaired low surrogate
448	return (
449	EncoderResult::Unmappable('`\u{FFFD}`'),
450	converted + `1`, // +1 `for non_ascii`
451	converted,
452	);
453	}
454	return (
455	EncoderResult::unmappable_from_bmp(non_ascii),
456	converted + `1`, // +1 `for non_ascii`
457	converted,
458	);
459	}
460	}
461	// Next, handle ASCII punctuation and non-ASCII without
462	// going back to ASCII acceleration. Non-ASCII scripts
463	// use ASCII punctuation, so this avoid going to
464	// acceleration just for punctuation/space and then
465	// failing. This is a significant boost to non-ASCII
466	// scripts.
467	// TODO: Split out Latin converters without this part
468	// this stuff makes Latin script-conversion slower.
469	if converted == length {
470	return (pending, length, length);
471	}
472	let mut unit = unsafe { *(src.get_unchecked(converted)) };
473	'innermost: loop {
474	if unit > `127` {
475	non_ascii = unit;
476	continue 'middle;
477	}
478	// Testing on Haswell says that we should write the
479	// byte unconditionally instead of trying to unread it
480	// to make it part of the next SIMD stride.
481	unsafe {
482	*(dst.get_unchecked_mut(converted)) = unit as u8;
483	}
484	converted += `1`;
485	if unit < `60` {
486	// We've got punctuation
487	if converted == length {
488	return (pending, length, length);
489	}
490	unit = unsafe { *(src.get_unchecked(converted)) };
491	continue 'innermost;
492	}
493	// We've got markup or ASCII text
494	continue 'outermost;
495	}
496	}
497	}
498	}
499	}
500	}
501	}
502
503	// Any copyright to the test code below this comment is dedicated to the
504	// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
505
506	#[cfg(all(test, feature = "alloc"))]
507	mod tests {
508	use super::super::testing::*;
509	use super::super::*;
510
511	#[test]
512	fn test_windows_1255_ca() {
513	decode(WINDOWS_1255, b"`\xCA`", "`\u{05BA}`");
514	encode(WINDOWS_1255, "`\u{05BA}`", b"`\xCA`");
515	}
516
517	#[test]
518	fn test_ascii_punctuation() {
519	let bytes = b"`\xC1\xF5\xF4\xFC` `\xE5\xDF\xED\xE1\xE9` `\xDD\xED\xE1` `\xF4\xE5\xF3\xF4`. `\xC1\xF5\xF4\xFC` `\xE5\xDF\xED\xE1\xE9` `\xDD\xED\xE1` `\xF4\xE5\xF3\xF4`.";
520	let characters = "`\u{0391}\u{03C5}\u{03C4}\u{03CC}` \
521	`\u{03B5}\u{03AF}\u{03BD}\u{03B1}\u{03B9}` `\u{03AD}\u{03BD}\u{03B1}` \
522	`\u{03C4}\u{03B5}\u{03C3}\u{03C4}`. `\u{0391}\u{03C5}\u{03C4}\u{03CC}` \
523	`\u{03B5}\u{03AF}\u{03BD}\u{03B1}\u{03B9}` `\u{03AD}\u{03BD}\u{03B1}` \
524	`\u{03C4}\u{03B5}\u{03C3}\u{03C4}`.";
525	decode(WINDOWS_1253, bytes, characters);
526	encode(WINDOWS_1253, characters, bytes);
527	}
528
529	#[test]
530	fn test_decode_malformed() {
531	decode(
532	WINDOWS_1253,
533	b"`\xC1\xF5\xD2\xF4\xFC`",
534	"`\u{0391}\u{03C5}\u{FFFD}\u{03C4}\u{03CC}`",
535	);
536	}
537
538	#[test]
539	fn test_encode_unmappables() {
540	encode(
541	WINDOWS_1253,
542	"`\u{0391}\u{03C5}\u{2603}\u{03C4}\u{03CC}`",
543	b"`\xC1\xF5`☃`\xF4\xFC`",
544	);
545	encode(
546	WINDOWS_1253,
547	"`\u{0391}\u{03C5}\u{1F4A9}\u{03C4}\u{03CC}`",
548	b"`\xC1\xF5`💩`\xF4\xFC`",
549	);
550	}
551
552	#[test]
553	fn test_encode_unpaired_surrogates() {
554	encode_from_utf16(
555	WINDOWS_1253,
556	&[`0x0391u16`, `0x03C5u16`, `0xDCA9u16`, `0x03C4u16`, `0x03CCu16`],
557	b"`\xC1\xF5`�`\xF4\xFC`",
558	);
559	encode_from_utf16(
560	WINDOWS_1253,
561	&[`0x0391u16`, `0x03C5u16`, `0xD83Du16`, `0x03C4u16`, `0x03CCu16`],
562	b"`\xC1\xF5`�`\xF4\xFC`",
563	);
564	encode_from_utf16(
565	WINDOWS_1253,
566	&[`0x0391u16`, `0x03C5u16`, `0x03C4u16`, `0x03CCu16`, `0xD83Du16`],
567	b"`\xC1\xF5\xF4\xFC`�",
568	);
569	}
570
571	pub const HIGH_BYTES: &'static [u8; `128`] = &[
572	`0x80`, `0x81`, `0x82`, `0x83`, `0x84`, `0x85`, `0x86`, `0x87`, `0x88`, `0x89`, `0x8A`, `0x8B`, `0x8C`, `0x8D`, `0x8E`,
573	`0x8F`, `0x90`, `0x91`, `0x92`, `0x93`, `0x94`, `0x95`, `0x96`, `0x97`, `0x98`, `0x99`, `0x9A`, `0x9B`, `0x9C`, `0x9D`,
574	`0x9E`, `0x9F`, `0xA0`, `0xA1`, `0xA2`, `0xA3`, `0xA4`, `0xA5`, `0xA6`, `0xA7`, `0xA8`, `0xA9`, `0xAA`, `0xAB`, `0xAC`,
575	`0xAD`, `0xAE`, `0xAF`, `0xB0`, `0xB1`, `0xB2`, `0xB3`, `0xB4`, `0xB5`, `0xB6`, `0xB7`, `0xB8`, `0xB9`, `0xBA`, `0xBB`,
576	`0xBC`, `0xBD`, `0xBE`, `0xBF`, `0xC0`, `0xC1`, `0xC2`, `0xC3`, `0xC4`, `0xC5`, `0xC6`, `0xC7`, `0xC8`, `0xC9`, `0xCA`,
577	`0xCB`, `0xCC`, `0xCD`, `0xCE`, `0xCF`, `0xD0`, `0xD1`, `0xD2`, `0xD3`, `0xD4`, `0xD5`, `0xD6`, `0xD7`, `0xD8`, `0xD9`,
578	`0xDA`, `0xDB`, `0xDC`, `0xDD`, `0xDE`, `0xDF`, `0xE0`, `0xE1`, `0xE2`, `0xE3`, `0xE4`, `0xE5`, `0xE6`, `0xE7`, `0xE8`,
579	`0xE9`, `0xEA`, `0xEB`, `0xEC`, `0xED`, `0xEE`, `0xEF`, `0xF0`, `0xF1`, `0xF2`, `0xF3`, `0xF4`, `0xF5`, `0xF6`, `0xF7`,
580	`0xF8`, `0xF9`, `0xFA`, `0xFB`, `0xFC`, `0xFD`, `0xFE`, `0xFF`,
581	];
582
583	fn decode_single_byte(encoding: &'static Encoding, data: &'static [u16; `128`]) {
584	let mut with_replacement = [`0u16`; `128`];
585	let mut it = data.iter().enumerate();
586	loop {
587	match it.next() {
588	Some((i, code_point)) => {
589	if *code_point == `0` {
590	with_replacement[i] = `0xFFFD`;
591	} else {
592	with_replacement[i] = *code_point;
593	}
594	}
595	None => {
596	break;
597	}
598	}
599	}
600
601	decode_to_utf16(encoding, HIGH_BYTES, &with_replacement[..]);
602	}
603
604	fn encode_single_byte(encoding: &'static Encoding, data: &'static [u16; `128`]) {
605	let mut with_zeros = [`0u8`; `128`];
606	let mut it = data.iter().enumerate();
607	loop {
608	match it.next() {
609	Some((i, code_point)) => {
610	if *code_point == `0` {
611	with_zeros[i] = `0`;
612	} else {
613	with_zeros[i] = HIGH_BYTES[i];
614	}
615	}
616	None => {
617	break;
618	}
619	}
620	}
621
622	encode_from_utf16(encoding, data, &with_zeros[..]);
623	}
624
625	#[test]
626	fn test_single_byte_from_two_low_surrogates() {
627	let expectation = b"��";
628	let mut output = [`0u8`; `40`];
629	let mut encoder = WINDOWS_1253.new_encoder();
630	let (result, read, written, had_errors) =
631	encoder.encode_from_utf16(&[`0xDC00u16`, `0xDEDEu16`], &mut output[..], `true`);
632	assert_eq!(result, CoderResult::InputEmpty);
633	assert_eq!(read, `2`);
634	assert_eq!(written, expectation.len());
635	assert!(had_errors);
636	assert_eq!(&output[..written], expectation);
637	}
638
639	// These tests are so self-referential that they are pretty useless.
640
641	// BEGIN GENERATED CODE. PLEASE DO NOT EDIT.
642	// Instead, please regenerate using generate-encoding-data.py
643
644	#[test]
645	fn test_single_byte_decode() {
646	decode_single_byte(IBM866, &data::SINGLE_BYTE_DATA.ibm866);
647	decode_single_byte(ISO_8859_10, &data::SINGLE_BYTE_DATA.iso_8859_10);
648	if cfg!(miri) {
649	// Miri is too slow
650	return;
651	}
652	decode_single_byte(ISO_8859_13, &data::SINGLE_BYTE_DATA.iso_8859_13);
653	decode_single_byte(ISO_8859_14, &data::SINGLE_BYTE_DATA.iso_8859_14);
654	decode_single_byte(ISO_8859_15, &data::SINGLE_BYTE_DATA.iso_8859_15);
655	decode_single_byte(ISO_8859_16, &data::SINGLE_BYTE_DATA.iso_8859_16);
656	decode_single_byte(ISO_8859_2, &data::SINGLE_BYTE_DATA.iso_8859_2);
657	decode_single_byte(ISO_8859_3, &data::SINGLE_BYTE_DATA.iso_8859_3);
658	decode_single_byte(ISO_8859_4, &data::SINGLE_BYTE_DATA.iso_8859_4);
659	decode_single_byte(ISO_8859_5, &data::SINGLE_BYTE_DATA.iso_8859_5);
660	decode_single_byte(ISO_8859_6, &data::SINGLE_BYTE_DATA.iso_8859_6);
661	decode_single_byte(ISO_8859_7, &data::SINGLE_BYTE_DATA.iso_8859_7);
662	decode_single_byte(ISO_8859_8, &data::SINGLE_BYTE_DATA.iso_8859_8);
663	decode_single_byte(KOI8_R, &data::SINGLE_BYTE_DATA.koi8_r);
664	decode_single_byte(KOI8_U, &data::SINGLE_BYTE_DATA.koi8_u);
665	decode_single_byte(MACINTOSH, &data::SINGLE_BYTE_DATA.macintosh);
666	decode_single_byte(WINDOWS_1250, &data::SINGLE_BYTE_DATA.windows_1250);
667	decode_single_byte(WINDOWS_1251, &data::SINGLE_BYTE_DATA.windows_1251);
668	decode_single_byte(WINDOWS_1252, &data::SINGLE_BYTE_DATA.windows_1252);
669	decode_single_byte(WINDOWS_1253, &data::SINGLE_BYTE_DATA.windows_1253);
670	decode_single_byte(WINDOWS_1254, &data::SINGLE_BYTE_DATA.windows_1254);
671	decode_single_byte(WINDOWS_1255, &data::SINGLE_BYTE_DATA.windows_1255);
672	decode_single_byte(WINDOWS_1256, &data::SINGLE_BYTE_DATA.windows_1256);
673	decode_single_byte(WINDOWS_1257, &data::SINGLE_BYTE_DATA.windows_1257);
674	decode_single_byte(WINDOWS_1258, &data::SINGLE_BYTE_DATA.windows_1258);
675	decode_single_byte(WINDOWS_874, &data::SINGLE_BYTE_DATA.windows_874);
676	decode_single_byte(X_MAC_CYRILLIC, &data::SINGLE_BYTE_DATA.x_mac_cyrillic);
677	}
678
679	#[test]
680	fn test_single_byte_encode() {
681	encode_single_byte(IBM866, &data::SINGLE_BYTE_DATA.ibm866);
682	encode_single_byte(ISO_8859_10, &data::SINGLE_BYTE_DATA.iso_8859_10);
683	if cfg!(miri) {
684	// Miri is too slow
685	return;
686	}
687	encode_single_byte(ISO_8859_13, &data::SINGLE_BYTE_DATA.iso_8859_13);
688	encode_single_byte(ISO_8859_14, &data::SINGLE_BYTE_DATA.iso_8859_14);
689	encode_single_byte(ISO_8859_15, &data::SINGLE_BYTE_DATA.iso_8859_15);
690	encode_single_byte(ISO_8859_16, &data::SINGLE_BYTE_DATA.iso_8859_16);
691	encode_single_byte(ISO_8859_2, &data::SINGLE_BYTE_DATA.iso_8859_2);
692	encode_single_byte(ISO_8859_3, &data::SINGLE_BYTE_DATA.iso_8859_3);
693	encode_single_byte(ISO_8859_4, &data::SINGLE_BYTE_DATA.iso_8859_4);
694	encode_single_byte(ISO_8859_5, &data::SINGLE_BYTE_DATA.iso_8859_5);
695	encode_single_byte(ISO_8859_6, &data::SINGLE_BYTE_DATA.iso_8859_6);
696	encode_single_byte(ISO_8859_7, &data::SINGLE_BYTE_DATA.iso_8859_7);
697	encode_single_byte(ISO_8859_8, &data::SINGLE_BYTE_DATA.iso_8859_8);
698	encode_single_byte(KOI8_R, &data::SINGLE_BYTE_DATA.koi8_r);
699	encode_single_byte(KOI8_U, &data::SINGLE_BYTE_DATA.koi8_u);
700	encode_single_byte(MACINTOSH, &data::SINGLE_BYTE_DATA.macintosh);
701	encode_single_byte(WINDOWS_1250, &data::SINGLE_BYTE_DATA.windows_1250);
702	encode_single_byte(WINDOWS_1251, &data::SINGLE_BYTE_DATA.windows_1251);
703	encode_single_byte(WINDOWS_1252, &data::SINGLE_BYTE_DATA.windows_1252);
704	encode_single_byte(WINDOWS_1253, &data::SINGLE_BYTE_DATA.windows_1253);
705	encode_single_byte(WINDOWS_1254, &data::SINGLE_BYTE_DATA.windows_1254);
706	encode_single_byte(WINDOWS_1255, &data::SINGLE_BYTE_DATA.windows_1255);
707	encode_single_byte(WINDOWS_1256, &data::SINGLE_BYTE_DATA.windows_1256);
708	encode_single_byte(WINDOWS_1257, &data::SINGLE_BYTE_DATA.windows_1257);
709	encode_single_byte(WINDOWS_1258, &data::SINGLE_BYTE_DATA.windows_1258);
710	encode_single_byte(WINDOWS_874, &data::SINGLE_BYTE_DATA.windows_874);
711	encode_single_byte(X_MAC_CYRILLIC, &data::SINGLE_BYTE_DATA.x_mac_cyrillic);
712	}
713	// END GENERATED CODE
714	}
715