1 | // Copyright Mozilla Foundation. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | use super::*; |
11 | use crate::handles::*; |
12 | use crate::variant::*; |
13 | |
14 | cfg_if! { |
15 | if #[cfg(feature = "simd-accel" )] { |
16 | use simd_funcs::*; |
17 | use packed_simd::u16x8; |
18 | |
19 | #[inline (always)] |
20 | fn shift_upper(unpacked: u16x8) -> u16x8 { |
21 | let highest_ascii = u16x8::splat(0x7F); |
22 | unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) } |
23 | } else { |
24 | } |
25 | } |
26 | |
27 | pub struct UserDefinedDecoder; |
28 | |
29 | impl UserDefinedDecoder { |
30 | pub fn new() -> VariantDecoder { |
31 | VariantDecoder::UserDefined(UserDefinedDecoder) |
32 | } |
33 | |
34 | pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> { |
35 | Some(byte_length) |
36 | } |
37 | |
38 | pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> { |
39 | byte_length.checked_mul(3) |
40 | } |
41 | |
42 | pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> { |
43 | byte_length.checked_mul(3) |
44 | } |
45 | |
46 | decoder_function!( |
47 | {}, |
48 | {}, |
49 | {}, |
50 | { |
51 | if b < 0x80 { |
52 | // ASCII run not optimized, because binary data expected |
53 | destination_handle.write_ascii(b); |
54 | continue; |
55 | } |
56 | destination_handle.write_upper_bmp(u16::from(b) + 0xF700); |
57 | continue; |
58 | }, |
59 | self, |
60 | src_consumed, |
61 | dest, |
62 | source, |
63 | b, |
64 | destination_handle, |
65 | _unread_handle, |
66 | check_space_bmp, |
67 | decode_to_utf8_raw, |
68 | u8, |
69 | Utf8Destination |
70 | ); |
71 | |
72 | #[cfg (not(feature = "simd-accel" ))] |
73 | pub fn decode_to_utf16_raw( |
74 | &mut self, |
75 | src: &[u8], |
76 | dst: &mut [u16], |
77 | _last: bool, |
78 | ) -> (DecoderResult, usize, usize) { |
79 | let (pending, length) = if dst.len() < src.len() { |
80 | (DecoderResult::OutputFull, dst.len()) |
81 | } else { |
82 | (DecoderResult::InputEmpty, src.len()) |
83 | }; |
84 | let src_trim = &src[..length]; |
85 | let dst_trim = &mut dst[..length]; |
86 | src_trim |
87 | .iter() |
88 | .zip(dst_trim.iter_mut()) |
89 | .for_each(|(from, to)| { |
90 | *to = { |
91 | let unit = *from; |
92 | if unit < 0x80 { |
93 | u16::from(unit) |
94 | } else { |
95 | u16::from(unit) + 0xF700 |
96 | } |
97 | } |
98 | }); |
99 | (pending, length, length) |
100 | } |
101 | |
102 | #[cfg (feature = "simd-accel" )] |
103 | pub fn decode_to_utf16_raw( |
104 | &mut self, |
105 | src: &[u8], |
106 | dst: &mut [u16], |
107 | _last: bool, |
108 | ) -> (DecoderResult, usize, usize) { |
109 | let (pending, length) = if dst.len() < src.len() { |
110 | (DecoderResult::OutputFull, dst.len()) |
111 | } else { |
112 | (DecoderResult::InputEmpty, src.len()) |
113 | }; |
114 | // Not bothering with alignment |
115 | let tail_start = length & !0xF; |
116 | let simd_iterations = length >> 4; |
117 | let src_ptr = src.as_ptr(); |
118 | let dst_ptr = dst.as_mut_ptr(); |
119 | for i in 0..simd_iterations { |
120 | let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) }; |
121 | let (first, second) = simd_unpack(input); |
122 | unsafe { |
123 | store8_unaligned(dst_ptr.add(i * 16), shift_upper(first)); |
124 | store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second)); |
125 | } |
126 | } |
127 | let src_tail = &src[tail_start..length]; |
128 | let dst_tail = &mut dst[tail_start..length]; |
129 | src_tail |
130 | .iter() |
131 | .zip(dst_tail.iter_mut()) |
132 | .for_each(|(from, to)| { |
133 | *to = { |
134 | let unit = *from; |
135 | if unit < 0x80 { |
136 | u16::from(unit) |
137 | } else { |
138 | u16::from(unit) + 0xF700 |
139 | } |
140 | } |
141 | }); |
142 | (pending, length, length) |
143 | } |
144 | } |
145 | |
146 | pub struct UserDefinedEncoder; |
147 | |
148 | impl UserDefinedEncoder { |
149 | pub fn new(encoding: &'static Encoding) -> Encoder { |
150 | Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder)) |
151 | } |
152 | |
153 | pub fn max_buffer_length_from_utf16_without_replacement( |
154 | &self, |
155 | u16_length: usize, |
156 | ) -> Option<usize> { |
157 | Some(u16_length) |
158 | } |
159 | |
160 | pub fn max_buffer_length_from_utf8_without_replacement( |
161 | &self, |
162 | byte_length: usize, |
163 | ) -> Option<usize> { |
164 | Some(byte_length) |
165 | } |
166 | |
167 | encoder_functions!( |
168 | {}, |
169 | { |
170 | if c <= ' \u{7F}' { |
171 | // TODO optimize ASCII run |
172 | destination_handle.write_one(c as u8); |
173 | continue; |
174 | } |
175 | if c < ' \u{F780}' || c > ' \u{F7FF}' { |
176 | return ( |
177 | EncoderResult::Unmappable(c), |
178 | unread_handle.consumed(), |
179 | destination_handle.written(), |
180 | ); |
181 | } |
182 | destination_handle.write_one((u32::from(c) - 0xF700) as u8); |
183 | continue; |
184 | }, |
185 | self, |
186 | src_consumed, |
187 | source, |
188 | dest, |
189 | c, |
190 | destination_handle, |
191 | unread_handle, |
192 | check_space_one |
193 | ); |
194 | } |
195 | |
196 | // Any copyright to the test code below this comment is dedicated to the |
197 | // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ |
198 | |
199 | #[cfg (all(test, feature = "alloc" ))] |
200 | mod tests { |
201 | use super::super::testing::*; |
202 | use super::super::*; |
203 | |
204 | fn decode_x_user_defined(bytes: &[u8], expect: &str) { |
205 | decode(X_USER_DEFINED, bytes, expect); |
206 | } |
207 | |
208 | fn encode_x_user_defined(string: &str, expect: &[u8]) { |
209 | encode(X_USER_DEFINED, string, expect); |
210 | } |
211 | |
212 | #[test ] |
213 | fn test_x_user_defined_decode() { |
214 | // Empty |
215 | decode_x_user_defined(b"" , "" ); |
216 | |
217 | // ASCII |
218 | decode_x_user_defined(b" \x61\x62" , " \u{0061}\u{0062}" ); |
219 | |
220 | decode_x_user_defined(b" \x80\xFF" , " \u{F780}\u{F7FF}" ); |
221 | decode_x_user_defined(b" \x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62" , " \u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}" ); |
222 | } |
223 | |
224 | #[test ] |
225 | fn test_x_user_defined_encode() { |
226 | // Empty |
227 | encode_x_user_defined("" , b"" ); |
228 | |
229 | // ASCII |
230 | encode_x_user_defined(" \u{0061}\u{0062}" , b" \x61\x62" ); |
231 | |
232 | encode_x_user_defined(" \u{F780}\u{F7FF}" , b" \x80\xFF" ); |
233 | encode_x_user_defined(" \u{F77F}\u{F800}" , b"" ); |
234 | } |
235 | |
236 | #[test ] |
237 | fn test_x_user_defined_from_two_low_surrogates() { |
238 | let expectation = b"��" ; |
239 | let mut output = [0u8; 40]; |
240 | let mut encoder = X_USER_DEFINED.new_encoder(); |
241 | let (result, read, written, had_errors) = |
242 | encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true); |
243 | assert_eq!(result, CoderResult::InputEmpty); |
244 | assert_eq!(read, 2); |
245 | assert_eq!(written, expectation.len()); |
246 | assert!(had_errors); |
247 | assert_eq!(&output[..written], expectation); |
248 | } |
249 | } |
250 | |