| 1 | // Copyright Mozilla Foundation. See the COPYRIGHT |
| 2 | // file at the top-level directory of this distribution. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 7 | // option. This file may not be copied, modified, or distributed |
| 8 | // except according to those terms. |
| 9 | |
| 10 | use super::*; |
| 11 | use crate::handles::*; |
| 12 | use crate::variant::*; |
| 13 | |
| 14 | cfg_if! { |
| 15 | if #[cfg(feature = "simd-accel" )] { |
| 16 | use simd_funcs::*; |
| 17 | use core::simd::u16x8; |
| 18 | use core::simd::cmp::SimdPartialOrd; |
| 19 | |
| 20 | #[inline(always)] |
| 21 | fn shift_upper(unpacked: u16x8) -> u16x8 { |
| 22 | let highest_ascii = u16x8::splat(0x7F); |
| 23 | unpacked + unpacked.simd_gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) } |
| 24 | } else { |
| 25 | } |
| 26 | } |
| 27 | |
| 28 | pub struct UserDefinedDecoder; |
| 29 | |
| 30 | impl UserDefinedDecoder { |
| 31 | pub fn new() -> VariantDecoder { |
| 32 | VariantDecoder::UserDefined(UserDefinedDecoder) |
| 33 | } |
| 34 | |
| 35 | pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> { |
| 36 | Some(byte_length) |
| 37 | } |
| 38 | |
| 39 | pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> { |
| 40 | byte_length.checked_mul(3) |
| 41 | } |
| 42 | |
| 43 | pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> { |
| 44 | byte_length.checked_mul(3) |
| 45 | } |
| 46 | |
| 47 | decoder_function!( |
| 48 | {}, |
| 49 | {}, |
| 50 | {}, |
| 51 | { |
| 52 | if b < 0x80 { |
| 53 | // ASCII run not optimized, because binary data expected |
| 54 | destination_handle.write_ascii(b); |
| 55 | continue; |
| 56 | } |
| 57 | destination_handle.write_upper_bmp(u16::from(b) + 0xF700); |
| 58 | continue; |
| 59 | }, |
| 60 | self, |
| 61 | src_consumed, |
| 62 | dest, |
| 63 | source, |
| 64 | b, |
| 65 | destination_handle, |
| 66 | _unread_handle, |
| 67 | check_space_bmp, |
| 68 | decode_to_utf8_raw, |
| 69 | u8, |
| 70 | Utf8Destination |
| 71 | ); |
| 72 | |
| 73 | #[cfg (not(feature = "simd-accel" ))] |
| 74 | pub fn decode_to_utf16_raw( |
| 75 | &mut self, |
| 76 | src: &[u8], |
| 77 | dst: &mut [u16], |
| 78 | _last: bool, |
| 79 | ) -> (DecoderResult, usize, usize) { |
| 80 | let (pending, length) = if dst.len() < src.len() { |
| 81 | (DecoderResult::OutputFull, dst.len()) |
| 82 | } else { |
| 83 | (DecoderResult::InputEmpty, src.len()) |
| 84 | }; |
| 85 | let src_trim = &src[..length]; |
| 86 | let dst_trim = &mut dst[..length]; |
| 87 | src_trim |
| 88 | .iter() |
| 89 | .zip(dst_trim.iter_mut()) |
| 90 | .for_each(|(from, to)| { |
| 91 | *to = { |
| 92 | let unit = *from; |
| 93 | if unit < 0x80 { |
| 94 | u16::from(unit) |
| 95 | } else { |
| 96 | u16::from(unit) + 0xF700 |
| 97 | } |
| 98 | } |
| 99 | }); |
| 100 | (pending, length, length) |
| 101 | } |
| 102 | |
| 103 | #[cfg (feature = "simd-accel" )] |
| 104 | pub fn decode_to_utf16_raw( |
| 105 | &mut self, |
| 106 | src: &[u8], |
| 107 | dst: &mut [u16], |
| 108 | _last: bool, |
| 109 | ) -> (DecoderResult, usize, usize) { |
| 110 | let (pending, length) = if dst.len() < src.len() { |
| 111 | (DecoderResult::OutputFull, dst.len()) |
| 112 | } else { |
| 113 | (DecoderResult::InputEmpty, src.len()) |
| 114 | }; |
| 115 | // Not bothering with alignment |
| 116 | let tail_start = length & !0xF; |
| 117 | let simd_iterations = length >> 4; |
| 118 | let src_ptr = src.as_ptr(); |
| 119 | let dst_ptr = dst.as_mut_ptr(); |
| 120 | // Safety: This is `for i in 0..length / 16` |
| 121 | for i in 0..simd_iterations { |
| 122 | // Safety: This is in bounds: length is the minumum valid length for both src/dst |
| 123 | // and i ranges to length/16, so multiplying by 16 will always be `< length` and can do |
| 124 | // a 16 byte read |
| 125 | let input = unsafe { load16_unaligned(src_ptr.add(i * 16)) }; |
| 126 | let (first, second) = simd_unpack(input); |
| 127 | unsafe { |
| 128 | // Safety: same as above, but this is two consecutive 8-byte reads |
| 129 | store8_unaligned(dst_ptr.add(i * 16), shift_upper(first)); |
| 130 | store8_unaligned(dst_ptr.add((i * 16) + 8), shift_upper(second)); |
| 131 | } |
| 132 | } |
| 133 | let src_tail = &src[tail_start..length]; |
| 134 | let dst_tail = &mut dst[tail_start..length]; |
| 135 | src_tail |
| 136 | .iter() |
| 137 | .zip(dst_tail.iter_mut()) |
| 138 | .for_each(|(from, to)| { |
| 139 | *to = { |
| 140 | let unit = *from; |
| 141 | if unit < 0x80 { |
| 142 | u16::from(unit) |
| 143 | } else { |
| 144 | u16::from(unit) + 0xF700 |
| 145 | } |
| 146 | } |
| 147 | }); |
| 148 | (pending, length, length) |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | pub struct UserDefinedEncoder; |
| 153 | |
| 154 | impl UserDefinedEncoder { |
| 155 | pub fn new(encoding: &'static Encoding) -> Encoder { |
| 156 | Encoder::new(encoding, VariantEncoder::UserDefined(UserDefinedEncoder)) |
| 157 | } |
| 158 | |
| 159 | pub fn max_buffer_length_from_utf16_without_replacement( |
| 160 | &self, |
| 161 | u16_length: usize, |
| 162 | ) -> Option<usize> { |
| 163 | Some(u16_length) |
| 164 | } |
| 165 | |
| 166 | pub fn max_buffer_length_from_utf8_without_replacement( |
| 167 | &self, |
| 168 | byte_length: usize, |
| 169 | ) -> Option<usize> { |
| 170 | Some(byte_length) |
| 171 | } |
| 172 | |
| 173 | encoder_functions!( |
| 174 | {}, |
| 175 | { |
| 176 | if c <= ' \u{7F}' { |
| 177 | // TODO optimize ASCII run |
| 178 | destination_handle.write_one(c as u8); |
| 179 | continue; |
| 180 | } |
| 181 | if c < ' \u{F780}' || c > ' \u{F7FF}' { |
| 182 | return ( |
| 183 | EncoderResult::Unmappable(c), |
| 184 | unread_handle.consumed(), |
| 185 | destination_handle.written(), |
| 186 | ); |
| 187 | } |
| 188 | destination_handle.write_one((u32::from(c) - 0xF700) as u8); |
| 189 | continue; |
| 190 | }, |
| 191 | self, |
| 192 | src_consumed, |
| 193 | source, |
| 194 | dest, |
| 195 | c, |
| 196 | destination_handle, |
| 197 | unread_handle, |
| 198 | check_space_one |
| 199 | ); |
| 200 | } |
| 201 | |
| 202 | // Any copyright to the test code below this comment is dedicated to the |
| 203 | // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ |
| 204 | |
| 205 | #[cfg (all(test, feature = "alloc" ))] |
| 206 | mod tests { |
| 207 | use super::super::testing::*; |
| 208 | use super::super::*; |
| 209 | |
| 210 | fn decode_x_user_defined(bytes: &[u8], expect: &str) { |
| 211 | decode(X_USER_DEFINED, bytes, expect); |
| 212 | } |
| 213 | |
| 214 | fn encode_x_user_defined(string: &str, expect: &[u8]) { |
| 215 | encode(X_USER_DEFINED, string, expect); |
| 216 | } |
| 217 | |
| 218 | #[test ] |
| 219 | fn test_x_user_defined_decode() { |
| 220 | // Empty |
| 221 | decode_x_user_defined(b"" , "" ); |
| 222 | |
| 223 | // ASCII |
| 224 | decode_x_user_defined(b" \x61\x62" , " \u{0061}\u{0062}" ); |
| 225 | |
| 226 | decode_x_user_defined(b" \x80\xFF" , " \u{F780}\u{F7FF}" ); |
| 227 | decode_x_user_defined(b" \x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62\x80\xFF\x61\x62" , " \u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}\u{F780}\u{F7FF}\u{0061}\u{0062}" ); |
| 228 | } |
| 229 | |
| 230 | #[test ] |
| 231 | fn test_x_user_defined_encode() { |
| 232 | // Empty |
| 233 | encode_x_user_defined("" , b"" ); |
| 234 | |
| 235 | // ASCII |
| 236 | encode_x_user_defined(" \u{0061}\u{0062}" , b" \x61\x62" ); |
| 237 | |
| 238 | encode_x_user_defined(" \u{F780}\u{F7FF}" , b" \x80\xFF" ); |
| 239 | encode_x_user_defined(" \u{F77F}\u{F800}" , b"" ); |
| 240 | } |
| 241 | |
| 242 | #[test ] |
| 243 | fn test_x_user_defined_from_two_low_surrogates() { |
| 244 | let expectation = b"��" ; |
| 245 | let mut output = [0u8; 40]; |
| 246 | let mut encoder = X_USER_DEFINED.new_encoder(); |
| 247 | let (result, read, written, had_errors) = |
| 248 | encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true); |
| 249 | assert_eq!(result, CoderResult::InputEmpty); |
| 250 | assert_eq!(read, 2); |
| 251 | assert_eq!(written, expectation.len()); |
| 252 | assert!(had_errors); |
| 253 | assert_eq!(&output[..written], expectation); |
| 254 | } |
| 255 | } |
| 256 | |