1// Copyright Mozilla Foundation. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use super::*;
11use crate::handles::*;
12use crate::variant::*;
13
14cfg_if! {
15 if #[cfg(feature = "simd-accel")] {
16 use simd_funcs::*;
17 use packed_simd::u16x8;
18
19 #[inline(always)]
20 fn shift_upper(unpacked: u16x8) -> u16x8 {
21 let highest_ascii = u16x8::splat(0x7F);
22 unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) }
23 } else {
24 }
25}
26
27pub struct UserDefinedDecoder;
28
29impl UserDefinedDecoder {
30 pub fn new() -> VariantDecoder {
31 VariantDecoder::UserDefined(UserDefinedDecoder)
32 }
33
34 pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
35 Some(byte_length)
36 }
37
38 pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
39 byte_length.checked_mul(3)
40 }
41
42 pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
43 byte_length.checked_mul(3)
44 }
45
46 decoder_function!(
47 {},
48 {},
49 {},
50 {
51 if b < 0x80 {
52 // ASCII run not optimized, because binary data expected
53 destination_handle.write_ascii(b);
54 continue;
55 }
56 destination_handle.write_upper_bmp(u16::from(b) + 0xF700);
57 continue;
58 },
59 self,
60 src_consumed,
61 dest,
62 source,
63 b,
64 destination_handle,
65 _unread_handle,
66 check_space_bmp,
67 decode_to_utf8_raw,
68 u8,
69 Utf8Destination
70 );
71
72 #[cfg(not(feature = "simd-accel"))]
73 pub fn decode_to_utf16_raw(
74 &mut self,
75 src: &[u8],
76 dst: &mut [u16],
77 _last: bool,
78 ) -> (DecoderResult, usize, usize) {
79 let (pending, length) = if dst.len() < src.len() {
80 (DecoderResult::OutputFull, dst.len())
81 } else {
82 (DecoderResult::InputEmpty, src.len())
83 };
84 let src_trim = &src[..length];
85 let dst_trim = &mut dst[..length];
86 src_trim
87 .iter()
88 .zip(dst_trim.iter_mut())
89 .for_each(|(from, to)| {
90 *to = {
91 let unit = *from;
92 if unit < 0x80 {
93 u16::from(unit)
94 } else {
95 u16::from(unit) + 0xF700
96 }
97 }
98 });
99 (pending, length, length)
100 }
101
102 #[cfg(feature = "simd-accel")]
103 pub fn decode_to_utf16_raw(
104 &mut self,
105 src: &[u8],
106 dst: &mut [u16],
107 _last: bool,
108 ) -> (DecoderResult, usize, usize) {
109 let (pending, length) = if dst.len() < src.len() {
110 (DecoderResult::OutputFull, dst.len())
111 } else {
112 (DecoderResult::InputEmpty, src.len())
113 };
114 // Not bothering with alignment
115 let tail_start = length & !0xF;
116 let simd_iterations = length >> 4;
117 let src_ptr = src.as_ptr();
118 let dst_ptr = dst.as_mut_ptr();
119 for i in 0..simd_iterations {
120 let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) };
121 let (first, second) = simd_unpack(input);
122 unsafe {
123 store8_unaligned(dst_ptr.add(i * 16), shift_upper(first));
124 store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second));
125 }
126 }
127 let src_tail = &src[tail_start..length];
128 let dst_tail = &mut dst[tail_start..length];
129 src_tail
130 .iter()
131 .zip(dst_tail.iter_mut())
132 .for_each(|(from, to)| {
133 *to = {
134 let unit = *from;
135 if unit < 0x80 {
136 u16::from(unit)
137 } else {
138 u16::from(unit) + 0xF700
139 }
140 }
141 });
142 (pending, length, length)
143 }
144}
145
146pub struct UserDefinedEncoder;
147
148impl UserDefinedEncoder {
149 pub fn new(encoding: &'static Encoding) -> Encoder {
150 Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder))
151 }
152
153 pub fn max_buffer_length_from_utf16_without_replacement(
154 &self,
155 u16_length: usize,
156 ) -> Option<usize> {
157 Some(u16_length)
158 }
159
160 pub fn max_buffer_length_from_utf8_without_replacement(
161 &self,
162 byte_length: usize,
163 ) -> Option<usize> {
164 Some(byte_length)
165 }
166
167 encoder_functions!(
168 {},
169 {
170 if c <= '\u{7F}' {
171 // TODO optimize ASCII run
172 destination_handle.write_one(c as u8);
173 continue;
174 }
175 if c < '\u{F780}' || c > '\u{F7FF}' {
176 return (
177 EncoderResult::Unmappable(c),
178 unread_handle.consumed(),
179 destination_handle.written(),
180 );
181 }
182 destination_handle.write_one((u32::from(c) - 0xF700) as u8);
183 continue;
184 },
185 self,
186 src_consumed,
187 source,
188 dest,
189 c,
190 destination_handle,
191 unread_handle,
192 check_space_one
193 );
194}
195
196// Any copyright to the test code below this comment is dedicated to the
197// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
198
199#[cfg(all(test, feature = "alloc"))]
200mod tests {
201 use super::super::testing::*;
202 use super::super::*;
203
204 fn decode_x_user_defined(bytes: &[u8], expect: &str) {
205 decode(X_USER_DEFINED, bytes, expect);
206 }
207
208 fn encode_x_user_defined(string: &str, expect: &[u8]) {
209 encode(X_USER_DEFINED, string, expect);
210 }
211
212 #[test]
213 fn test_x_user_defined_decode() {
214 // Empty
215 decode_x_user_defined(b"", "");
216
217 // ASCII
218 decode_x_user_defined(b"\x61\x62", "\u{0061}\u{0062}");
219
220 decode_x_user_defined(b"\x80\xFF", "\u{F780}\u{F7FF}");
221 decode_x_user_defined(b"\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62", "\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}");
222 }
223
224 #[test]
225 fn test_x_user_defined_encode() {
226 // Empty
227 encode_x_user_defined("", b"");
228
229 // ASCII
230 encode_x_user_defined("\u{0061}\u{0062}", b"\x61\x62");
231
232 encode_x_user_defined("\u{F780}\u{F7FF}", b"\x80\xFF");
233 encode_x_user_defined("\u{F77F}\u{F800}", b"&#63359;&#63488;");
234 }
235
236 #[test]
237 fn test_x_user_defined_from_two_low_surrogates() {
238 let expectation = b"&#65533;&#65533;";
239 let mut output = [0u8; 40];
240 let mut encoder = X_USER_DEFINED.new_encoder();
241 let (result, read, written, had_errors) =
242 encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true);
243 assert_eq!(result, CoderResult::InputEmpty);
244 assert_eq!(read, 2);
245 assert_eq!(written, expectation.len());
246 assert!(had_errors);
247 assert_eq!(&output[..written], expectation);
248 }
249}
250