1 | // Copyright Mozilla Foundation. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | use super::*; |
11 | use crate::handles::*; |
12 | use crate::variant::*; |
13 | |
14 | cfg_if! { |
15 | if #[cfg(feature = "simd-accel" )] { |
16 | use simd_funcs::*; |
17 | use core::simd::u16x8; |
18 | use core::simd::cmp::SimdPartialOrd; |
19 | |
20 | #[inline(always)] |
21 | fn shift_upper(unpacked: u16x8) -> u16x8 { |
22 | let highest_ascii = u16x8::splat(0x7F); |
23 | unpacked + unpacked.simd_gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) } |
24 | } else { |
25 | } |
26 | } |
27 | |
28 | pub struct UserDefinedDecoder; |
29 | |
30 | impl UserDefinedDecoder { |
31 | pub fn new() -> VariantDecoder { |
32 | VariantDecoder::UserDefined(UserDefinedDecoder) |
33 | } |
34 | |
35 | pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> { |
36 | Some(byte_length) |
37 | } |
38 | |
39 | pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> { |
40 | byte_length.checked_mul(3) |
41 | } |
42 | |
43 | pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> { |
44 | byte_length.checked_mul(3) |
45 | } |
46 | |
47 | decoder_function!( |
48 | {}, |
49 | {}, |
50 | {}, |
51 | { |
52 | if b < 0x80 { |
53 | // ASCII run not optimized, because binary data expected |
54 | destination_handle.write_ascii(b); |
55 | continue; |
56 | } |
57 | destination_handle.write_upper_bmp(u16::from(b) + 0xF700); |
58 | continue; |
59 | }, |
60 | self, |
61 | src_consumed, |
62 | dest, |
63 | source, |
64 | b, |
65 | destination_handle, |
66 | _unread_handle, |
67 | check_space_bmp, |
68 | decode_to_utf8_raw, |
69 | u8, |
70 | Utf8Destination |
71 | ); |
72 | |
73 | #[cfg (not(feature = "simd-accel" ))] |
74 | pub fn decode_to_utf16_raw( |
75 | &mut self, |
76 | src: &[u8], |
77 | dst: &mut [u16], |
78 | _last: bool, |
79 | ) -> (DecoderResult, usize, usize) { |
80 | let (pending, length) = if dst.len() < src.len() { |
81 | (DecoderResult::OutputFull, dst.len()) |
82 | } else { |
83 | (DecoderResult::InputEmpty, src.len()) |
84 | }; |
85 | let src_trim = &src[..length]; |
86 | let dst_trim = &mut dst[..length]; |
87 | src_trim |
88 | .iter() |
89 | .zip(dst_trim.iter_mut()) |
90 | .for_each(|(from, to)| { |
91 | *to = { |
92 | let unit = *from; |
93 | if unit < 0x80 { |
94 | u16::from(unit) |
95 | } else { |
96 | u16::from(unit) + 0xF700 |
97 | } |
98 | } |
99 | }); |
100 | (pending, length, length) |
101 | } |
102 | |
103 | #[cfg (feature = "simd-accel" )] |
104 | pub fn decode_to_utf16_raw( |
105 | &mut self, |
106 | src: &[u8], |
107 | dst: &mut [u16], |
108 | _last: bool, |
109 | ) -> (DecoderResult, usize, usize) { |
110 | let (pending, length) = if dst.len() < src.len() { |
111 | (DecoderResult::OutputFull, dst.len()) |
112 | } else { |
113 | (DecoderResult::InputEmpty, src.len()) |
114 | }; |
115 | // Not bothering with alignment |
116 | let tail_start = length & !0xF; |
117 | let simd_iterations = length >> 4; |
118 | let src_ptr = src.as_ptr(); |
119 | let dst_ptr = dst.as_mut_ptr(); |
120 | // Safety: This is `for i in 0..length / 16` |
121 | for i in 0..simd_iterations { |
122 | // Safety: This is in bounds: length is the minumum valid length for both src/dst |
123 | // and i ranges to length/16, so multiplying by 16 will always be `< length` and can do |
124 | // a 16 byte read |
125 | let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) }; |
126 | let (first, second) = simd_unpack(input); |
127 | unsafe { |
128 | // Safety: same as above, but this is two consecutive 8-byte reads |
129 | store8_unaligned(dst_ptr.add(i * 16), shift_upper(first)); |
130 | store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second)); |
131 | } |
132 | } |
133 | let src_tail = &src[tail_start..length]; |
134 | let dst_tail = &mut dst[tail_start..length]; |
135 | src_tail |
136 | .iter() |
137 | .zip(dst_tail.iter_mut()) |
138 | .for_each(|(from, to)| { |
139 | *to = { |
140 | let unit = *from; |
141 | if unit < 0x80 { |
142 | u16::from(unit) |
143 | } else { |
144 | u16::from(unit) + 0xF700 |
145 | } |
146 | } |
147 | }); |
148 | (pending, length, length) |
149 | } |
150 | } |
151 | |
152 | pub struct UserDefinedEncoder; |
153 | |
154 | impl UserDefinedEncoder { |
155 | pub fn new(encoding: &'static Encoding) -> Encoder { |
156 | Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder)) |
157 | } |
158 | |
159 | pub fn max_buffer_length_from_utf16_without_replacement( |
160 | &self, |
161 | u16_length: usize, |
162 | ) -> Option<usize> { |
163 | Some(u16_length) |
164 | } |
165 | |
166 | pub fn max_buffer_length_from_utf8_without_replacement( |
167 | &self, |
168 | byte_length: usize, |
169 | ) -> Option<usize> { |
170 | Some(byte_length) |
171 | } |
172 | |
173 | encoder_functions!( |
174 | {}, |
175 | { |
176 | if c <= ' \u{7F}' { |
177 | // TODO optimize ASCII run |
178 | destination_handle.write_one(c as u8); |
179 | continue; |
180 | } |
181 | if c < ' \u{F780}' || c > ' \u{F7FF}' { |
182 | return ( |
183 | EncoderResult::Unmappable(c), |
184 | unread_handle.consumed(), |
185 | destination_handle.written(), |
186 | ); |
187 | } |
188 | destination_handle.write_one((u32::from(c) - 0xF700) as u8); |
189 | continue; |
190 | }, |
191 | self, |
192 | src_consumed, |
193 | source, |
194 | dest, |
195 | c, |
196 | destination_handle, |
197 | unread_handle, |
198 | check_space_one |
199 | ); |
200 | } |
201 | |
202 | // Any copyright to the test code below this comment is dedicated to the |
203 | // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ |
204 | |
205 | #[cfg (all(test, feature = "alloc" ))] |
206 | mod tests { |
207 | use super::super::testing::*; |
208 | use super::super::*; |
209 | |
210 | fn decode_x_user_defined(bytes: &[u8], expect: &str) { |
211 | decode(X_USER_DEFINED, bytes, expect); |
212 | } |
213 | |
214 | fn encode_x_user_defined(string: &str, expect: &[u8]) { |
215 | encode(X_USER_DEFINED, string, expect); |
216 | } |
217 | |
218 | #[test ] |
219 | fn test_x_user_defined_decode() { |
220 | // Empty |
221 | decode_x_user_defined(b"" , "" ); |
222 | |
223 | // ASCII |
224 | decode_x_user_defined(b" \x61\x62" , " \u{0061}\u{0062}" ); |
225 | |
226 | decode_x_user_defined(b" \x80\xFF" , " \u{F780}\u{F7FF}" ); |
227 | decode_x_user_defined(b" \x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62" , " \u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}" ); |
228 | } |
229 | |
230 | #[test ] |
231 | fn test_x_user_defined_encode() { |
232 | // Empty |
233 | encode_x_user_defined("" , b"" ); |
234 | |
235 | // ASCII |
236 | encode_x_user_defined(" \u{0061}\u{0062}" , b" \x61\x62" ); |
237 | |
238 | encode_x_user_defined(" \u{F780}\u{F7FF}" , b" \x80\xFF" ); |
239 | encode_x_user_defined(" \u{F77F}\u{F800}" , b"" ); |
240 | } |
241 | |
242 | #[test ] |
243 | fn test_x_user_defined_from_two_low_surrogates() { |
244 | let expectation = b"��" ; |
245 | let mut output = [0u8; 40]; |
246 | let mut encoder = X_USER_DEFINED.new_encoder(); |
247 | let (result, read, written, had_errors) = |
248 | encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true); |
249 | assert_eq!(result, CoderResult::InputEmpty); |
250 | assert_eq!(read, 2); |
251 | assert_eq!(written, expectation.len()); |
252 | assert!(had_errors); |
253 | assert_eq!(&output[..written], expectation); |
254 | } |
255 | } |
256 | |