x_user_defined.rs source code [crates/encoding_rs/src/x_user_defined.rs]

1	// Copyright Mozilla Foundation. See the COPYRIGHT
2	// file at the top-level directory of this distribution.
3	//
4	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5	// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6	// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7	// option. This file may not be copied, modified, or distributed
8	// except according to those terms.
9
10	use super::*;
11	use crate::handles::*;
12	use crate::variant::*;
13
14	cfg_if! {
15	if #[cfg(feature = "simd-accel")] {
16	use simd_funcs::*;
17	use core::simd::u16x8;
18	use core::simd::cmp::SimdPartialOrd;
19
20	#[inline(always)]
21	fn shift_upper(unpacked: u16x8) -> u16x8 {
22	let highest_ascii = u16x8::splat(`0x7F`);
23	unpacked + unpacked.simd_gt(highest_ascii).select(u16x8::splat(`0xF700`), u16x8::splat(`0`)) }
24	} else {
25	}
26	}
27
28	pub struct UserDefinedDecoder;
29
30	impl UserDefinedDecoder {
31	pub fn new() -> VariantDecoder {
32	VariantDecoder::UserDefined(UserDefinedDecoder)
33	}
34
35	pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
36	Some(byte_length)
37	}
38
39	pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
40	byte_length.checked_mul(`3`)
41	}
42
43	pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
44	byte_length.checked_mul(`3`)
45	}
46
47	decoder_function!(
48	{},
49	{},
50	{},
51	{
52	if b < `0x80` {
53	// ASCII run not optimized, because binary data expected
54	destination_handle.write_ascii(b);
55	continue;
56	}
57	destination_handle.write_upper_bmp(u16::from(b) + `0xF700`);
58	continue;
59	},
60	self,
61	src_consumed,
62	dest,
63	source,
64	b,
65	destination_handle,
66	_unread_handle,
67	check_space_bmp,
68	decode_to_utf8_raw,
69	u8,
70	Utf8Destination
71	);
72
73	#[cfg(not(feature = "simd-accel"))]
74	pub fn decode_to_utf16_raw(
75	&mut self,
76	src: &[u8],
77	dst: &mut [u16],
78	_last: bool,
79	) -> (DecoderResult, usize, usize) {
80	let (pending, length) = if dst.len() < src.len() {
81	(DecoderResult::OutputFull, dst.len())
82	} else {
83	(DecoderResult::InputEmpty, src.len())
84	};
85	let src_trim = &src[..length];
86	let dst_trim = &mut dst[..length];
87	src_trim
88	.iter()
89	.zip(dst_trim.iter_mut())
90	.for_each(\|(from, to)\| {
91	*to = {
92	let unit = *from;
93	if unit < `0x80` {
94	u16::from(unit)
95	} else {
96	u16::from(unit) + `0xF700`
97	}
98	}
99	});
100	(pending, length, length)
101	}
102
103	#[cfg(feature = "simd-accel")]
104	pub fn decode_to_utf16_raw(
105	&mut self,
106	src: &[u8],
107	dst: &mut [u16],
108	_last: bool,
109	) -> (DecoderResult, usize, usize) {
110	let (pending, length) = if dst.len() < src.len() {
111	(DecoderResult::OutputFull, dst.len())
112	} else {
113	(DecoderResult::InputEmpty, src.len())
114	};
115	// Not bothering with alignment
116	let tail_start = length & !`0xF`;
117	let simd_iterations = length >> `4`;
118	let src_ptr = src.as_ptr();
119	let dst_ptr = dst.as_mut_ptr();
120	// Safety: This is `for i in 0..length / 16`
121	for i in `0`..simd_iterations {
122	// Safety: This is in bounds: length is the minumum valid length for both src/dst
123	// and i ranges to length/16, so multiplying by 16 will always be `< length` and can do
124	// a 16 byte read
125	let input = unsafe { load16_unaligned(src_ptr.add(i * `16`)) };
126	let (first, second) = simd_unpack(input);
127	unsafe {
128	// Safety: same as above, but this is two consecutive 8-byte reads
129	store8_unaligned(dst_ptr.add(i * `16`), shift_upper(first));
130	store8_unaligned(dst_ptr.add((i * `16`) + `8`), shift_upper(second));
131	}
132	}
133	let src_tail = &src[tail_start..length];
134	let dst_tail = &mut dst[tail_start..length];
135	src_tail
136	.iter()
137	.zip(dst_tail.iter_mut())
138	.for_each(\|(from, to)\| {
139	*to = {
140	let unit = *from;
141	if unit < `0x80` {
142	u16::from(unit)
143	} else {
144	u16::from(unit) + `0xF700`
145	}
146	}
147	});
148	(pending, length, length)
149	}
150	}
151
152	pub struct UserDefinedEncoder;
153
154	impl UserDefinedEncoder {
155	pub fn new(encoding: &'static Encoding) -> Encoder {
156	Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder))
157	}
158
159	pub fn max_buffer_length_from_utf16_without_replacement(
160	&self,
161	u16_length: usize,
162	) -> Option<usize> {
163	Some(u16_length)
164	}
165
166	pub fn max_buffer_length_from_utf8_without_replacement(
167	&self,
168	byte_length: usize,
169	) -> Option<usize> {
170	Some(byte_length)
171	}
172
173	encoder_functions!(
174	{},
175	{
176	if c <= '`\u{7F}`' {
177	// TODO optimize ASCII run
178	destination_handle.write_one(c as u8);
179	continue;
180	}
181	if c < '`\u{F780}`' \|\| c > '`\u{F7FF}`' {
182	return (
183	EncoderResult::Unmappable(c),
184	unread_handle.consumed(),
185	destination_handle.written(),
186	);
187	}
188	destination_handle.write_one((u32::from(c) - `0xF700`) as u8);
189	continue;
190	},
191	self,
192	src_consumed,
193	source,
194	dest,
195	c,
196	destination_handle,
197	unread_handle,
198	check_space_one
199	);
200	}
201
202	// Any copyright to the test code below this comment is dedicated to the
203	// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
204
205	#[cfg(all(test, feature = "alloc"))]
206	mod tests {
207	use super::super::testing::*;
208	use super::super::*;
209
210	fn decode_x_user_defined(bytes: &[u8], expect: &str) {
211	decode(X_USER_DEFINED, bytes, expect);
212	}
213
214	fn encode_x_user_defined(string: &str, expect: &[u8]) {
215	encode(X_USER_DEFINED, string, expect);
216	}
217
218	#[test]
219	fn test_x_user_defined_decode() {
220	// Empty
221	decode_x_user_defined(b"", "");
222
223	// ASCII
224	decode_x_user_defined(b"`\x61\x62`", "`\u{0061}\u{0062}`");
225
226	decode_x_user_defined(b"`\x80\xFF`", "`\u{F780}\u{F7FF}`");
227	decode_x_user_defined(b"`\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62`", "`\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}`");
228	}
229
230	#[test]
231	fn test_x_user_defined_encode() {
232	// Empty
233	encode_x_user_defined("", b"");
234
235	// ASCII
236	encode_x_user_defined("`\u{0061}\u{0062}`", b"`\x61\x62`");
237
238	encode_x_user_defined("`\u{F780}\u{F7FF}`", b"`\x80\xFF`");
239	encode_x_user_defined("`\u{F77F}\u{F800}`", b"");
240	}
241
242	#[test]
243	fn test_x_user_defined_from_two_low_surrogates() {
244	let expectation = b"��";
245	let mut output = [`0u8`; `40`];
246	let mut encoder = X_USER_DEFINED.new_encoder();
247	let (result, read, written, had_errors) =
248	encoder.encode_from_utf16(&[`0xDC00u16`, `0xDEDEu16`], &mut output[..], `true`);
249	assert_eq!(result, CoderResult::InputEmpty);
250	assert_eq!(read, `2`);
251	assert_eq!(written, expectation.len());
252	assert!(had_errors);
253	assert_eq!(&output[..written], expectation);
254	}
255	}
256