1 | // Copyright Mozilla Foundation. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | //! This module provides structs that use lifetimes to couple bounds checking |
11 | //! and space availability checking and detaching those from actual slice |
12 | //! reading/writing. |
13 | //! |
14 | //! At present, the internals of the implementation are safe code, so the |
15 | //! bound checks currently also happen on read/write. Once this code works, |
16 | //! the plan is to replace the internals with unsafe code that omits the |
17 | //! bound check at the read/write time. |
18 | |
19 | #[cfg (all( |
20 | feature = "simd-accel" , |
21 | any( |
22 | target_feature = "sse2" , |
23 | all(target_endian = "little" , target_arch = "aarch64" ), |
24 | all(target_endian = "little" , target_feature = "neon" ) |
25 | ) |
26 | ))] |
27 | use crate::simd_funcs::*; |
28 | |
29 | #[cfg (all( |
30 | feature = "simd-accel" , |
31 | any( |
32 | target_feature = "sse2" , |
33 | all(target_endian = "little" , target_arch = "aarch64" ), |
34 | all(target_endian = "little" , target_feature = "neon" ) |
35 | ) |
36 | ))] |
37 | use packed_simd::u16x8; |
38 | |
39 | use super::DecoderResult; |
40 | use super::EncoderResult; |
41 | use crate::ascii::*; |
42 | use crate::utf_8::convert_utf8_to_utf16_up_to_invalid; |
43 | use crate::utf_8::utf8_valid_up_to; |
44 | |
45 | pub enum Space<T> { |
46 | Available(T), |
47 | Full(usize), |
48 | } |
49 | |
50 | pub enum CopyAsciiResult<T, U> { |
51 | Stop(T), |
52 | GoOn(U), |
53 | } |
54 | |
55 | pub enum NonAscii { |
56 | BmpExclAscii(u16), |
57 | Astral(char), |
58 | } |
59 | |
60 | pub enum Unicode { |
61 | Ascii(u8), |
62 | NonAscii(NonAscii), |
63 | } |
64 | |
65 | // Start UTF-16LE/BE fast path |
66 | |
67 | pub trait Endian { |
68 | const OPPOSITE_ENDIAN: bool; |
69 | } |
70 | |
71 | pub struct BigEndian; |
72 | |
73 | impl Endian for BigEndian { |
74 | #[cfg (target_endian = "little" )] |
75 | const OPPOSITE_ENDIAN: bool = true; |
76 | |
77 | #[cfg (target_endian = "big" )] |
78 | const OPPOSITE_ENDIAN: bool = false; |
79 | } |
80 | |
81 | pub struct LittleEndian; |
82 | |
83 | impl Endian for LittleEndian { |
84 | #[cfg (target_endian = "little" )] |
85 | const OPPOSITE_ENDIAN: bool = false; |
86 | |
87 | #[cfg (target_endian = "big" )] |
88 | const OPPOSITE_ENDIAN: bool = true; |
89 | } |
90 | |
91 | #[derive (Debug, Copy, Clone)] |
92 | struct UnalignedU16Slice { |
93 | ptr: *const u8, |
94 | len: usize, |
95 | } |
96 | |
97 | impl UnalignedU16Slice { |
98 | #[inline (always)] |
99 | pub unsafe fn new(ptr: *const u8, len: usize) -> UnalignedU16Slice { |
100 | UnalignedU16Slice { ptr, len } |
101 | } |
102 | |
103 | #[inline (always)] |
104 | pub fn trim_last(&mut self) { |
105 | assert!(self.len > 0); |
106 | self.len -= 1; |
107 | } |
108 | |
109 | #[inline (always)] |
110 | pub fn at(&self, i: usize) -> u16 { |
111 | use core::mem::MaybeUninit; |
112 | |
113 | assert!(i < self.len); |
114 | unsafe { |
115 | let mut u: MaybeUninit<u16> = MaybeUninit::uninit(); |
116 | ::core::ptr::copy_nonoverlapping(self.ptr.add(i * 2), u.as_mut_ptr() as *mut u8, 2); |
117 | u.assume_init() |
118 | } |
119 | } |
120 | |
121 | #[cfg (feature = "simd-accel" )] |
122 | #[inline (always)] |
123 | pub fn simd_at(&self, i: usize) -> u16x8 { |
124 | assert!(i + SIMD_STRIDE_SIZE / 2 <= self.len); |
125 | let byte_index = i * 2; |
126 | unsafe { to_u16_lanes(load16_unaligned(self.ptr.add(byte_index))) } |
127 | } |
128 | |
129 | #[inline (always)] |
130 | pub fn len(&self) -> usize { |
131 | self.len |
132 | } |
133 | |
134 | #[inline (always)] |
135 | pub fn tail(&self, from: usize) -> UnalignedU16Slice { |
136 | // XXX the return value should be restricted not to |
137 | // outlive self. |
138 | assert!(from <= self.len); |
139 | unsafe { UnalignedU16Slice::new(self.ptr.add(from * 2), self.len - from) } |
140 | } |
141 | |
142 | #[cfg (feature = "simd-accel" )] |
143 | #[inline (always)] |
144 | pub fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> { |
145 | assert!(self.len <= other.len()); |
146 | let mut offset = 0; |
147 | if SIMD_STRIDE_SIZE / 2 <= self.len { |
148 | let len_minus_stride = self.len - SIMD_STRIDE_SIZE / 2; |
149 | loop { |
150 | let mut simd = self.simd_at(offset); |
151 | if E::OPPOSITE_ENDIAN { |
152 | simd = simd_byte_swap(simd); |
153 | } |
154 | unsafe { |
155 | store8_unaligned(other.as_mut_ptr().add(offset), simd); |
156 | } |
157 | if contains_surrogates(simd) { |
158 | break; |
159 | } |
160 | offset += SIMD_STRIDE_SIZE / 2; |
161 | if offset > len_minus_stride { |
162 | break; |
163 | } |
164 | } |
165 | } |
166 | while offset < self.len { |
167 | let unit = swap_if_opposite_endian::<E>(self.at(offset)); |
168 | other[offset] = unit; |
169 | if super::in_range16(unit, 0xD800, 0xE000) { |
170 | return Some((unit, offset)); |
171 | } |
172 | offset += 1; |
173 | } |
174 | None |
175 | } |
176 | |
177 | #[cfg (not(feature = "simd-accel" ))] |
178 | #[inline (always)] |
179 | fn copy_bmp_to<E: Endian>(&self, other: &mut [u16]) -> Option<(u16, usize)> { |
180 | assert!(self.len <= other.len()); |
181 | for (i, target) in other.iter_mut().enumerate().take(self.len) { |
182 | let unit = swap_if_opposite_endian::<E>(self.at(i)); |
183 | *target = unit; |
184 | if super::in_range16(unit, 0xD800, 0xE000) { |
185 | return Some((unit, i)); |
186 | } |
187 | } |
188 | None |
189 | } |
190 | } |
191 | |
192 | #[inline (always)] |
193 | fn copy_unaligned_basic_latin_to_ascii_alu<E: Endian>( |
194 | src: UnalignedU16Slice, |
195 | dst: &mut [u8], |
196 | offset: usize, |
197 | ) -> CopyAsciiResult<usize, (u16, usize)> { |
198 | let len: usize = ::core::cmp::min(v1:src.len(), v2:dst.len()); |
199 | let mut i: usize = 0usize; |
200 | loop { |
201 | if i == len { |
202 | return CopyAsciiResult::Stop(i + offset); |
203 | } |
204 | let unit: u16 = swap_if_opposite_endian::<E>(unit:src.at(i)); |
205 | if unit > 0x7F { |
206 | return CopyAsciiResult::GoOn((unit, i + offset)); |
207 | } |
208 | dst[i] = unit as u8; |
209 | i += 1; |
210 | } |
211 | } |
212 | |
213 | #[inline (always)] |
214 | fn swap_if_opposite_endian<E: Endian>(unit: u16) -> u16 { |
215 | if E::OPPOSITE_ENDIAN { |
216 | unit.swap_bytes() |
217 | } else { |
218 | unit |
219 | } |
220 | } |
221 | |
222 | #[cfg (not(feature = "simd-accel" ))] |
223 | #[inline (always)] |
224 | fn copy_unaligned_basic_latin_to_ascii<E: Endian>( |
225 | src: UnalignedU16Slice, |
226 | dst: &mut [u8], |
227 | ) -> CopyAsciiResult<usize, (u16, usize)> { |
228 | copy_unaligned_basic_latin_to_ascii_alu::<E>(src, dst, offset:0) |
229 | } |
230 | |
231 | #[cfg (feature = "simd-accel" )] |
232 | #[inline (always)] |
233 | fn copy_unaligned_basic_latin_to_ascii<E: Endian>( |
234 | src: UnalignedU16Slice, |
235 | dst: &mut [u8], |
236 | ) -> CopyAsciiResult<usize, (u16, usize)> { |
237 | let len = ::core::cmp::min(src.len(), dst.len()); |
238 | let mut offset = 0; |
239 | if SIMD_STRIDE_SIZE <= len { |
240 | let len_minus_stride = len - SIMD_STRIDE_SIZE; |
241 | loop { |
242 | let mut first = src.simd_at(offset); |
243 | let mut second = src.simd_at(offset + (SIMD_STRIDE_SIZE / 2)); |
244 | if E::OPPOSITE_ENDIAN { |
245 | first = simd_byte_swap(first); |
246 | second = simd_byte_swap(second); |
247 | } |
248 | if !simd_is_basic_latin(first | second) { |
249 | break; |
250 | } |
251 | let packed = simd_pack(first, second); |
252 | unsafe { |
253 | store16_unaligned(dst.as_mut_ptr().add(offset), packed); |
254 | } |
255 | offset += SIMD_STRIDE_SIZE; |
256 | if offset > len_minus_stride { |
257 | break; |
258 | } |
259 | } |
260 | } |
261 | copy_unaligned_basic_latin_to_ascii_alu::<E>(src.tail(offset), &mut dst[offset..], offset) |
262 | } |
263 | |
264 | #[inline (always)] |
265 | fn convert_unaligned_utf16_to_utf8<E: Endian>( |
266 | src: UnalignedU16Slice, |
267 | dst: &mut [u8], |
268 | ) -> (usize, usize, bool) { |
269 | if dst.len() < 4 { |
270 | return (0, 0, false); |
271 | } |
272 | let mut src_pos = 0usize; |
273 | let mut dst_pos = 0usize; |
274 | let src_len = src.len(); |
275 | let dst_len_minus_three = dst.len() - 3; |
276 | 'outer: loop { |
277 | let mut non_ascii = match copy_unaligned_basic_latin_to_ascii::<E>( |
278 | src.tail(src_pos), |
279 | &mut dst[dst_pos..], |
280 | ) { |
281 | CopyAsciiResult::GoOn((unit, read_written)) => { |
282 | src_pos += read_written; |
283 | dst_pos += read_written; |
284 | unit |
285 | } |
286 | CopyAsciiResult::Stop(read_written) => { |
287 | return (src_pos + read_written, dst_pos + read_written, false); |
288 | } |
289 | }; |
290 | if dst_pos >= dst_len_minus_three { |
291 | break 'outer; |
292 | } |
293 | // We have enough destination space to commit to |
294 | // having read `non_ascii`. |
295 | src_pos += 1; |
296 | 'inner: loop { |
297 | let non_ascii_minus_surrogate_start = non_ascii.wrapping_sub(0xD800); |
298 | if non_ascii_minus_surrogate_start > (0xDFFF - 0xD800) { |
299 | if non_ascii < 0x800 { |
300 | dst[dst_pos] = ((non_ascii >> 6) | 0xC0) as u8; |
301 | dst_pos += 1; |
302 | dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8; |
303 | dst_pos += 1; |
304 | } else { |
305 | dst[dst_pos] = ((non_ascii >> 12) | 0xE0) as u8; |
306 | dst_pos += 1; |
307 | dst[dst_pos] = (((non_ascii & 0xFC0) >> 6) | 0x80) as u8; |
308 | dst_pos += 1; |
309 | dst[dst_pos] = ((non_ascii & 0x3F) | 0x80) as u8; |
310 | dst_pos += 1; |
311 | } |
312 | } else if non_ascii_minus_surrogate_start <= (0xDBFF - 0xD800) { |
313 | // high surrogate |
314 | if src_pos < src_len { |
315 | let second = swap_if_opposite_endian::<E>(src.at(src_pos)); |
316 | let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00); |
317 | if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) { |
318 | // The next code unit is a low surrogate. Advance position. |
319 | src_pos += 1; |
320 | let point = (u32::from(non_ascii) << 10) + u32::from(second) |
321 | - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32); |
322 | |
323 | dst[dst_pos] = ((point >> 18) | 0xF0u32) as u8; |
324 | dst_pos += 1; |
325 | dst[dst_pos] = (((point & 0x3F000u32) >> 12) | 0x80u32) as u8; |
326 | dst_pos += 1; |
327 | dst[dst_pos] = (((point & 0xFC0u32) >> 6) | 0x80u32) as u8; |
328 | dst_pos += 1; |
329 | dst[dst_pos] = ((point & 0x3Fu32) | 0x80u32) as u8; |
330 | dst_pos += 1; |
331 | } else { |
332 | // The next code unit is not a low surrogate. Don't advance |
333 | // position and treat the high surrogate as unpaired. |
334 | return (src_pos, dst_pos, true); |
335 | } |
336 | } else { |
337 | // Unpaired surrogate at the end of buffer |
338 | return (src_pos, dst_pos, true); |
339 | } |
340 | } else { |
341 | // Unpaired low surrogate |
342 | return (src_pos, dst_pos, true); |
343 | } |
344 | if dst_pos >= dst_len_minus_three || src_pos == src_len { |
345 | break 'outer; |
346 | } |
347 | let unit = swap_if_opposite_endian::<E>(src.at(src_pos)); |
348 | src_pos += 1; |
349 | if unit > 0x7F { |
350 | non_ascii = unit; |
351 | continue 'inner; |
352 | } |
353 | dst[dst_pos] = unit as u8; |
354 | dst_pos += 1; |
355 | continue 'outer; |
356 | } |
357 | } |
358 | (src_pos, dst_pos, false) |
359 | } |
360 | |
361 | // Byte source |
362 | |
363 | pub struct ByteSource<'a> { |
364 | slice: &'a [u8], |
365 | pos: usize, |
366 | } |
367 | |
368 | impl<'a> ByteSource<'a> { |
369 | #[inline (always)] |
370 | pub fn new(src: &[u8]) -> ByteSource { |
371 | ByteSource { slice: src, pos: 0 } |
372 | } |
373 | #[inline (always)] |
374 | pub fn check_available<'b>(&'b mut self) -> Space<ByteReadHandle<'b, 'a>> { |
375 | if self.pos < self.slice.len() { |
376 | Space::Available(ByteReadHandle::new(self)) |
377 | } else { |
378 | Space::Full(self.consumed()) |
379 | } |
380 | } |
381 | #[inline (always)] |
382 | fn read(&mut self) -> u8 { |
383 | let ret = self.slice[self.pos]; |
384 | self.pos += 1; |
385 | ret |
386 | } |
387 | #[inline (always)] |
388 | fn unread(&mut self) -> usize { |
389 | self.pos -= 1; |
390 | self.pos |
391 | } |
392 | #[inline (always)] |
393 | pub fn consumed(&self) -> usize { |
394 | self.pos |
395 | } |
396 | } |
397 | |
398 | pub struct ByteReadHandle<'a, 'b> |
399 | where |
400 | 'b: 'a, |
401 | { |
402 | source: &'a mut ByteSource<'b>, |
403 | } |
404 | |
405 | impl<'a, 'b> ByteReadHandle<'a, 'b> |
406 | where |
407 | 'b: 'a, |
408 | { |
409 | #[inline (always)] |
410 | fn new(src: &'a mut ByteSource<'b>) -> ByteReadHandle<'a, 'b> { |
411 | ByteReadHandle { source: src } |
412 | } |
413 | #[inline (always)] |
414 | pub fn read(self) -> (u8, ByteUnreadHandle<'a, 'b>) { |
415 | let byte: u8 = self.source.read(); |
416 | let handle: ByteUnreadHandle<'_, '_> = ByteUnreadHandle::new(self.source); |
417 | (byte, handle) |
418 | } |
419 | #[inline (always)] |
420 | pub fn consumed(&self) -> usize { |
421 | self.source.consumed() |
422 | } |
423 | } |
424 | |
425 | pub struct ByteUnreadHandle<'a, 'b> |
426 | where |
427 | 'b: 'a, |
428 | { |
429 | source: &'a mut ByteSource<'b>, |
430 | } |
431 | |
432 | impl<'a, 'b> ByteUnreadHandle<'a, 'b> |
433 | where |
434 | 'b: 'a, |
435 | { |
436 | #[inline (always)] |
437 | fn new(src: &'a mut ByteSource<'b>) -> ByteUnreadHandle<'a, 'b> { |
438 | ByteUnreadHandle { source: src } |
439 | } |
440 | #[inline (always)] |
441 | pub fn unread(self) -> usize { |
442 | self.source.unread() |
443 | } |
444 | #[inline (always)] |
445 | pub fn consumed(&self) -> usize { |
446 | self.source.consumed() |
447 | } |
448 | #[inline (always)] |
449 | pub fn commit(self) -> &'a mut ByteSource<'b> { |
450 | self.source |
451 | } |
452 | } |
453 | |
454 | // UTF-16 destination |
455 | |
456 | pub struct Utf16BmpHandle<'a, 'b> |
457 | where |
458 | 'b: 'a, |
459 | { |
460 | dest: &'a mut Utf16Destination<'b>, |
461 | } |
462 | |
463 | impl<'a, 'b> Utf16BmpHandle<'a, 'b> |
464 | where |
465 | 'b: 'a, |
466 | { |
467 | #[inline (always)] |
468 | fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16BmpHandle<'a, 'b> { |
469 | Utf16BmpHandle { dest: dst } |
470 | } |
471 | #[inline (always)] |
472 | pub fn written(&self) -> usize { |
473 | self.dest.written() |
474 | } |
475 | #[inline (always)] |
476 | pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> { |
477 | self.dest.write_ascii(ascii); |
478 | self.dest |
479 | } |
480 | #[inline (always)] |
481 | pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> { |
482 | self.dest.write_bmp(bmp); |
483 | self.dest |
484 | } |
485 | #[inline (always)] |
486 | pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> { |
487 | self.dest.write_bmp_excl_ascii(bmp); |
488 | self.dest |
489 | } |
490 | #[inline (always)] |
491 | pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> { |
492 | self.dest.write_mid_bmp(bmp); |
493 | self.dest |
494 | } |
495 | #[inline (always)] |
496 | pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> { |
497 | self.dest.write_upper_bmp(bmp); |
498 | self.dest |
499 | } |
500 | #[inline (always)] |
501 | pub fn commit(self) -> &'a mut Utf16Destination<'b> { |
502 | self.dest |
503 | } |
504 | } |
505 | |
506 | pub struct Utf16AstralHandle<'a, 'b> |
507 | where |
508 | 'b: 'a, |
509 | { |
510 | dest: &'a mut Utf16Destination<'b>, |
511 | } |
512 | |
513 | impl<'a, 'b> Utf16AstralHandle<'a, 'b> |
514 | where |
515 | 'b: 'a, |
516 | { |
517 | #[inline (always)] |
518 | fn new(dst: &'a mut Utf16Destination<'b>) -> Utf16AstralHandle<'a, 'b> { |
519 | Utf16AstralHandle { dest: dst } |
520 | } |
521 | #[inline (always)] |
522 | pub fn written(&self) -> usize { |
523 | self.dest.written() |
524 | } |
525 | #[inline (always)] |
526 | pub fn write_ascii(self, ascii: u8) -> &'a mut Utf16Destination<'b> { |
527 | self.dest.write_ascii(ascii); |
528 | self.dest |
529 | } |
530 | #[inline (always)] |
531 | pub fn write_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> { |
532 | self.dest.write_bmp(bmp); |
533 | self.dest |
534 | } |
535 | #[inline (always)] |
536 | pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf16Destination<'b> { |
537 | self.dest.write_bmp_excl_ascii(bmp); |
538 | self.dest |
539 | } |
540 | #[inline (always)] |
541 | pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf16Destination<'b> { |
542 | self.dest.write_upper_bmp(bmp); |
543 | self.dest |
544 | } |
545 | #[inline (always)] |
546 | pub fn write_astral(self, astral: u32) -> &'a mut Utf16Destination<'b> { |
547 | self.dest.write_astral(astral); |
548 | self.dest |
549 | } |
550 | #[inline (always)] |
551 | pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf16Destination<'b> { |
552 | self.dest.write_surrogate_pair(high, low); |
553 | self.dest |
554 | } |
555 | #[inline (always)] |
556 | pub fn write_big5_combination( |
557 | self, |
558 | combined: u16, |
559 | combining: u16, |
560 | ) -> &'a mut Utf16Destination<'b> { |
561 | self.dest.write_big5_combination(combined, combining); |
562 | self.dest |
563 | } |
564 | #[inline (always)] |
565 | pub fn commit(self) -> &'a mut Utf16Destination<'b> { |
566 | self.dest |
567 | } |
568 | } |
569 | |
570 | pub struct Utf16Destination<'a> { |
571 | slice: &'a mut [u16], |
572 | pos: usize, |
573 | } |
574 | |
575 | impl<'a> Utf16Destination<'a> { |
576 | #[inline (always)] |
577 | pub fn new(dst: &mut [u16]) -> Utf16Destination { |
578 | Utf16Destination { slice: dst, pos: 0 } |
579 | } |
580 | #[inline (always)] |
581 | pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf16BmpHandle<'b, 'a>> { |
582 | if self.pos < self.slice.len() { |
583 | Space::Available(Utf16BmpHandle::new(self)) |
584 | } else { |
585 | Space::Full(self.written()) |
586 | } |
587 | } |
588 | #[inline (always)] |
589 | pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf16AstralHandle<'b, 'a>> { |
590 | if self.pos + 1 < self.slice.len() { |
591 | Space::Available(Utf16AstralHandle::new(self)) |
592 | } else { |
593 | Space::Full(self.written()) |
594 | } |
595 | } |
596 | #[inline (always)] |
597 | pub fn written(&self) -> usize { |
598 | self.pos |
599 | } |
600 | #[inline (always)] |
601 | fn write_code_unit(&mut self, u: u16) { |
602 | unsafe { |
603 | // OK, because we checked before handing out a handle. |
604 | *(self.slice.get_unchecked_mut(self.pos)) = u; |
605 | } |
606 | self.pos += 1; |
607 | } |
608 | #[inline (always)] |
609 | fn write_ascii(&mut self, ascii: u8) { |
610 | debug_assert!(ascii < 0x80); |
611 | self.write_code_unit(u16::from(ascii)); |
612 | } |
613 | #[inline (always)] |
614 | fn write_bmp(&mut self, bmp: u16) { |
615 | self.write_code_unit(bmp); |
616 | } |
617 | #[inline (always)] |
618 | fn write_bmp_excl_ascii(&mut self, bmp: u16) { |
619 | debug_assert!(bmp >= 0x80); |
620 | self.write_code_unit(bmp); |
621 | } |
622 | #[inline (always)] |
623 | fn write_mid_bmp(&mut self, bmp: u16) { |
624 | debug_assert!(bmp >= 0x80); // XXX |
625 | self.write_code_unit(bmp); |
626 | } |
627 | #[inline (always)] |
628 | fn write_upper_bmp(&mut self, bmp: u16) { |
629 | debug_assert!(bmp >= 0x80); |
630 | self.write_code_unit(bmp); |
631 | } |
632 | #[inline (always)] |
633 | fn write_astral(&mut self, astral: u32) { |
634 | debug_assert!(astral > 0xFFFF); |
635 | debug_assert!(astral <= 0x10_FFFF); |
636 | self.write_code_unit((0xD7C0 + (astral >> 10)) as u16); |
637 | self.write_code_unit((0xDC00 + (astral & 0x3FF)) as u16); |
638 | } |
639 | #[inline (always)] |
640 | pub fn write_surrogate_pair(&mut self, high: u16, low: u16) { |
641 | self.write_code_unit(high); |
642 | self.write_code_unit(low); |
643 | } |
644 | #[inline (always)] |
645 | fn write_big5_combination(&mut self, combined: u16, combining: u16) { |
646 | self.write_bmp_excl_ascii(combined); |
647 | self.write_bmp_excl_ascii(combining); |
648 | } |
649 | #[inline (always)] |
650 | pub fn copy_ascii_from_check_space_bmp<'b>( |
651 | &'b mut self, |
652 | source: &mut ByteSource, |
653 | ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16BmpHandle<'b, 'a>)> { |
654 | let non_ascii_ret = { |
655 | let src_remaining = &source.slice[source.pos..]; |
656 | let dst_remaining = &mut self.slice[self.pos..]; |
657 | let (pending, length) = if dst_remaining.len() < src_remaining.len() { |
658 | (DecoderResult::OutputFull, dst_remaining.len()) |
659 | } else { |
660 | (DecoderResult::InputEmpty, src_remaining.len()) |
661 | }; |
662 | match unsafe { |
663 | ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) |
664 | } { |
665 | None => { |
666 | source.pos += length; |
667 | self.pos += length; |
668 | return CopyAsciiResult::Stop((pending, source.pos, self.pos)); |
669 | } |
670 | Some((non_ascii, consumed)) => { |
671 | source.pos += consumed; |
672 | self.pos += consumed; |
673 | source.pos += 1; // +1 for non_ascii |
674 | non_ascii |
675 | } |
676 | } |
677 | }; |
678 | CopyAsciiResult::GoOn((non_ascii_ret, Utf16BmpHandle::new(self))) |
679 | } |
680 | #[inline (always)] |
681 | pub fn copy_ascii_from_check_space_astral<'b>( |
682 | &'b mut self, |
683 | source: &mut ByteSource, |
684 | ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf16AstralHandle<'b, 'a>)> { |
685 | let non_ascii_ret = { |
686 | let dst_len = self.slice.len(); |
687 | let src_remaining = &source.slice[source.pos..]; |
688 | let dst_remaining = &mut self.slice[self.pos..]; |
689 | let (pending, length) = if dst_remaining.len() < src_remaining.len() { |
690 | (DecoderResult::OutputFull, dst_remaining.len()) |
691 | } else { |
692 | (DecoderResult::InputEmpty, src_remaining.len()) |
693 | }; |
694 | match unsafe { |
695 | ascii_to_basic_latin(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) |
696 | } { |
697 | None => { |
698 | source.pos += length; |
699 | self.pos += length; |
700 | return CopyAsciiResult::Stop((pending, source.pos, self.pos)); |
701 | } |
702 | Some((non_ascii, consumed)) => { |
703 | source.pos += consumed; |
704 | self.pos += consumed; |
705 | if self.pos + 1 < dst_len { |
706 | source.pos += 1; // +1 for non_ascii |
707 | non_ascii |
708 | } else { |
709 | return CopyAsciiResult::Stop(( |
710 | DecoderResult::OutputFull, |
711 | source.pos, |
712 | self.pos, |
713 | )); |
714 | } |
715 | } |
716 | } |
717 | }; |
718 | CopyAsciiResult::GoOn((non_ascii_ret, Utf16AstralHandle::new(self))) |
719 | } |
720 | #[inline (always)] |
721 | pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) { |
722 | let src_remaining = &source.slice[source.pos..]; |
723 | let dst_remaining = &mut self.slice[self.pos..]; |
724 | let (read, written) = convert_utf8_to_utf16_up_to_invalid(src_remaining, dst_remaining); |
725 | source.pos += read; |
726 | self.pos += written; |
727 | } |
728 | #[inline (always)] |
729 | pub fn copy_utf16_from<E: Endian>( |
730 | &mut self, |
731 | source: &mut ByteSource, |
732 | ) -> Option<(usize, usize)> { |
733 | let src_remaining = &source.slice[source.pos..]; |
734 | let dst_remaining = &mut self.slice[self.pos..]; |
735 | |
736 | let mut src_unaligned = unsafe { |
737 | UnalignedU16Slice::new( |
738 | src_remaining.as_ptr(), |
739 | ::core::cmp::min(src_remaining.len() / 2, dst_remaining.len()), |
740 | ) |
741 | }; |
742 | if src_unaligned.len() == 0 { |
743 | return None; |
744 | } |
745 | let last_unit = swap_if_opposite_endian::<E>(src_unaligned.at(src_unaligned.len() - 1)); |
746 | if super::in_range16(last_unit, 0xD800, 0xDC00) { |
747 | // Last code unit is a high surrogate. It might |
748 | // legitimately form a pair later, so let's not |
749 | // include it. |
750 | src_unaligned.trim_last(); |
751 | } |
752 | let mut offset = 0usize; |
753 | loop { |
754 | if let Some((surrogate, bmp_len)) = { |
755 | let src_left = src_unaligned.tail(offset); |
756 | let dst_left = &mut dst_remaining[offset..src_unaligned.len()]; |
757 | src_left.copy_bmp_to::<E>(dst_left) |
758 | } { |
759 | offset += bmp_len; // surrogate has not been consumed yet |
760 | let second_pos = offset + 1; |
761 | if surrogate > 0xDBFF || second_pos == src_unaligned.len() { |
762 | // Unpaired surrogate |
763 | source.pos += second_pos * 2; |
764 | self.pos += offset; |
765 | return Some((source.pos, self.pos)); |
766 | } |
767 | let second = swap_if_opposite_endian::<E>(src_unaligned.at(second_pos)); |
768 | if !super::in_range16(second, 0xDC00, 0xE000) { |
769 | // Unpaired surrogate |
770 | source.pos += second_pos * 2; |
771 | self.pos += offset; |
772 | return Some((source.pos, self.pos)); |
773 | } |
774 | // `surrogate` was already speculatively written |
775 | dst_remaining[second_pos] = second; |
776 | offset += 2; |
777 | continue; |
778 | } else { |
779 | source.pos += src_unaligned.len() * 2; |
780 | self.pos += src_unaligned.len(); |
781 | return None; |
782 | } |
783 | } |
784 | } |
785 | } |
786 | |
787 | // UTF-8 destination |
788 | |
789 | pub struct Utf8BmpHandle<'a, 'b> |
790 | where |
791 | 'b: 'a, |
792 | { |
793 | dest: &'a mut Utf8Destination<'b>, |
794 | } |
795 | |
796 | impl<'a, 'b> Utf8BmpHandle<'a, 'b> |
797 | where |
798 | 'b: 'a, |
799 | { |
800 | #[inline (always)] |
801 | fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8BmpHandle<'a, 'b> { |
802 | Utf8BmpHandle { dest: dst } |
803 | } |
804 | #[inline (always)] |
805 | pub fn written(&self) -> usize { |
806 | self.dest.written() |
807 | } |
808 | #[inline (always)] |
809 | pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> { |
810 | self.dest.write_ascii(ascii); |
811 | self.dest |
812 | } |
813 | #[inline (always)] |
814 | pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> { |
815 | self.dest.write_bmp(bmp); |
816 | self.dest |
817 | } |
818 | #[inline (always)] |
819 | pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> { |
820 | self.dest.write_bmp_excl_ascii(bmp); |
821 | self.dest |
822 | } |
823 | #[inline (always)] |
824 | pub fn write_mid_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> { |
825 | self.dest.write_mid_bmp(bmp); |
826 | self.dest |
827 | } |
828 | #[inline (always)] |
829 | pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> { |
830 | self.dest.write_upper_bmp(bmp); |
831 | self.dest |
832 | } |
833 | #[inline (always)] |
834 | pub fn commit(self) -> &'a mut Utf8Destination<'b> { |
835 | self.dest |
836 | } |
837 | } |
838 | |
839 | pub struct Utf8AstralHandle<'a, 'b> |
840 | where |
841 | 'b: 'a, |
842 | { |
843 | dest: &'a mut Utf8Destination<'b>, |
844 | } |
845 | |
846 | impl<'a, 'b> Utf8AstralHandle<'a, 'b> |
847 | where |
848 | 'b: 'a, |
849 | { |
850 | #[inline (always)] |
851 | fn new(dst: &'a mut Utf8Destination<'b>) -> Utf8AstralHandle<'a, 'b> { |
852 | Utf8AstralHandle { dest: dst } |
853 | } |
854 | #[inline (always)] |
855 | pub fn written(&self) -> usize { |
856 | self.dest.written() |
857 | } |
858 | #[inline (always)] |
859 | pub fn write_ascii(self, ascii: u8) -> &'a mut Utf8Destination<'b> { |
860 | self.dest.write_ascii(ascii); |
861 | self.dest |
862 | } |
863 | #[inline (always)] |
864 | pub fn write_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> { |
865 | self.dest.write_bmp(bmp); |
866 | self.dest |
867 | } |
868 | #[inline (always)] |
869 | pub fn write_bmp_excl_ascii(self, bmp: u16) -> &'a mut Utf8Destination<'b> { |
870 | self.dest.write_bmp_excl_ascii(bmp); |
871 | self.dest |
872 | } |
873 | #[inline (always)] |
874 | pub fn write_upper_bmp(self, bmp: u16) -> &'a mut Utf8Destination<'b> { |
875 | self.dest.write_upper_bmp(bmp); |
876 | self.dest |
877 | } |
878 | #[inline (always)] |
879 | pub fn write_astral(self, astral: u32) -> &'a mut Utf8Destination<'b> { |
880 | self.dest.write_astral(astral); |
881 | self.dest |
882 | } |
883 | #[inline (always)] |
884 | pub fn write_surrogate_pair(self, high: u16, low: u16) -> &'a mut Utf8Destination<'b> { |
885 | self.dest.write_surrogate_pair(high, low); |
886 | self.dest |
887 | } |
888 | #[inline (always)] |
889 | pub fn write_big5_combination( |
890 | self, |
891 | combined: u16, |
892 | combining: u16, |
893 | ) -> &'a mut Utf8Destination<'b> { |
894 | self.dest.write_big5_combination(combined, combining); |
895 | self.dest |
896 | } |
897 | #[inline (always)] |
898 | pub fn commit(self) -> &'a mut Utf8Destination<'b> { |
899 | self.dest |
900 | } |
901 | } |
902 | |
903 | pub struct Utf8Destination<'a> { |
904 | slice: &'a mut [u8], |
905 | pos: usize, |
906 | } |
907 | |
908 | impl<'a> Utf8Destination<'a> { |
909 | #[inline (always)] |
910 | pub fn new(dst: &mut [u8]) -> Utf8Destination { |
911 | Utf8Destination { slice: dst, pos: 0 } |
912 | } |
913 | #[inline (always)] |
914 | pub fn check_space_bmp<'b>(&'b mut self) -> Space<Utf8BmpHandle<'b, 'a>> { |
915 | if self.pos + 2 < self.slice.len() { |
916 | Space::Available(Utf8BmpHandle::new(self)) |
917 | } else { |
918 | Space::Full(self.written()) |
919 | } |
920 | } |
921 | #[inline (always)] |
922 | pub fn check_space_astral<'b>(&'b mut self) -> Space<Utf8AstralHandle<'b, 'a>> { |
923 | if self.pos + 3 < self.slice.len() { |
924 | Space::Available(Utf8AstralHandle::new(self)) |
925 | } else { |
926 | Space::Full(self.written()) |
927 | } |
928 | } |
929 | #[inline (always)] |
930 | pub fn written(&self) -> usize { |
931 | self.pos |
932 | } |
933 | #[inline (always)] |
934 | fn write_code_unit(&mut self, u: u8) { |
935 | unsafe { |
936 | // OK, because we checked before handing out a handle. |
937 | *(self.slice.get_unchecked_mut(self.pos)) = u; |
938 | } |
939 | self.pos += 1; |
940 | } |
941 | #[inline (always)] |
942 | fn write_ascii(&mut self, ascii: u8) { |
943 | debug_assert!(ascii < 0x80); |
944 | self.write_code_unit(ascii); |
945 | } |
946 | #[inline (always)] |
947 | fn write_bmp(&mut self, bmp: u16) { |
948 | if bmp < 0x80u16 { |
949 | self.write_ascii(bmp as u8); |
950 | } else if bmp < 0x800u16 { |
951 | self.write_mid_bmp(bmp); |
952 | } else { |
953 | self.write_upper_bmp(bmp); |
954 | } |
955 | } |
956 | #[inline (always)] |
957 | fn write_mid_bmp(&mut self, mid_bmp: u16) { |
958 | debug_assert!(mid_bmp >= 0x80); |
959 | debug_assert!(mid_bmp < 0x800); |
960 | self.write_code_unit(((mid_bmp >> 6) | 0xC0) as u8); |
961 | self.write_code_unit(((mid_bmp & 0x3F) | 0x80) as u8); |
962 | } |
963 | #[inline (always)] |
964 | fn write_upper_bmp(&mut self, upper_bmp: u16) { |
965 | debug_assert!(upper_bmp >= 0x800); |
966 | self.write_code_unit(((upper_bmp >> 12) | 0xE0) as u8); |
967 | self.write_code_unit((((upper_bmp & 0xFC0) >> 6) | 0x80) as u8); |
968 | self.write_code_unit(((upper_bmp & 0x3F) | 0x80) as u8); |
969 | } |
970 | #[inline (always)] |
971 | fn write_bmp_excl_ascii(&mut self, bmp: u16) { |
972 | if bmp < 0x800u16 { |
973 | self.write_mid_bmp(bmp); |
974 | } else { |
975 | self.write_upper_bmp(bmp); |
976 | } |
977 | } |
978 | #[inline (always)] |
979 | fn write_astral(&mut self, astral: u32) { |
980 | debug_assert!(astral > 0xFFFF); |
981 | debug_assert!(astral <= 0x10_FFFF); |
982 | self.write_code_unit(((astral >> 18) | 0xF0) as u8); |
983 | self.write_code_unit((((astral & 0x3F000) >> 12) | 0x80) as u8); |
984 | self.write_code_unit((((astral & 0xFC0) >> 6) | 0x80) as u8); |
985 | self.write_code_unit(((astral & 0x3F) | 0x80) as u8); |
986 | } |
987 | #[inline (always)] |
988 | pub fn write_surrogate_pair(&mut self, high: u16, low: u16) { |
989 | self.write_astral( |
990 | (u32::from(high) << 10) + u32::from(low) |
991 | - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32), |
992 | ); |
993 | } |
994 | #[inline (always)] |
995 | fn write_big5_combination(&mut self, combined: u16, combining: u16) { |
996 | self.write_mid_bmp(combined); |
997 | self.write_mid_bmp(combining); |
998 | } |
999 | #[inline (always)] |
1000 | pub fn copy_ascii_from_check_space_bmp<'b>( |
1001 | &'b mut self, |
1002 | source: &mut ByteSource, |
1003 | ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8BmpHandle<'b, 'a>)> { |
1004 | let non_ascii_ret = { |
1005 | let dst_len = self.slice.len(); |
1006 | let src_remaining = &source.slice[source.pos..]; |
1007 | let dst_remaining = &mut self.slice[self.pos..]; |
1008 | let (pending, length) = if dst_remaining.len() < src_remaining.len() { |
1009 | (DecoderResult::OutputFull, dst_remaining.len()) |
1010 | } else { |
1011 | (DecoderResult::InputEmpty, src_remaining.len()) |
1012 | }; |
1013 | match unsafe { |
1014 | ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) |
1015 | } { |
1016 | None => { |
1017 | source.pos += length; |
1018 | self.pos += length; |
1019 | return CopyAsciiResult::Stop((pending, source.pos, self.pos)); |
1020 | } |
1021 | Some((non_ascii, consumed)) => { |
1022 | source.pos += consumed; |
1023 | self.pos += consumed; |
1024 | if self.pos + 2 < dst_len { |
1025 | source.pos += 1; // +1 for non_ascii |
1026 | non_ascii |
1027 | } else { |
1028 | return CopyAsciiResult::Stop(( |
1029 | DecoderResult::OutputFull, |
1030 | source.pos, |
1031 | self.pos, |
1032 | )); |
1033 | } |
1034 | } |
1035 | } |
1036 | }; |
1037 | CopyAsciiResult::GoOn((non_ascii_ret, Utf8BmpHandle::new(self))) |
1038 | } |
1039 | #[inline (always)] |
1040 | pub fn copy_ascii_from_check_space_astral<'b>( |
1041 | &'b mut self, |
1042 | source: &mut ByteSource, |
1043 | ) -> CopyAsciiResult<(DecoderResult, usize, usize), (u8, Utf8AstralHandle<'b, 'a>)> { |
1044 | let non_ascii_ret = { |
1045 | let dst_len = self.slice.len(); |
1046 | let src_remaining = &source.slice[source.pos..]; |
1047 | let dst_remaining = &mut self.slice[self.pos..]; |
1048 | let (pending, length) = if dst_remaining.len() < src_remaining.len() { |
1049 | (DecoderResult::OutputFull, dst_remaining.len()) |
1050 | } else { |
1051 | (DecoderResult::InputEmpty, src_remaining.len()) |
1052 | }; |
1053 | match unsafe { |
1054 | ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) |
1055 | } { |
1056 | None => { |
1057 | source.pos += length; |
1058 | self.pos += length; |
1059 | return CopyAsciiResult::Stop((pending, source.pos, self.pos)); |
1060 | } |
1061 | Some((non_ascii, consumed)) => { |
1062 | source.pos += consumed; |
1063 | self.pos += consumed; |
1064 | if self.pos + 3 < dst_len { |
1065 | source.pos += 1; // +1 for non_ascii |
1066 | non_ascii |
1067 | } else { |
1068 | return CopyAsciiResult::Stop(( |
1069 | DecoderResult::OutputFull, |
1070 | source.pos, |
1071 | self.pos, |
1072 | )); |
1073 | } |
1074 | } |
1075 | } |
1076 | }; |
1077 | CopyAsciiResult::GoOn((non_ascii_ret, Utf8AstralHandle::new(self))) |
1078 | } |
1079 | #[inline (always)] |
1080 | pub fn copy_utf8_up_to_invalid_from(&mut self, source: &mut ByteSource) { |
1081 | let src_remaining = &source.slice[source.pos..]; |
1082 | let dst_remaining = &mut self.slice[self.pos..]; |
1083 | let min_len = ::core::cmp::min(src_remaining.len(), dst_remaining.len()); |
1084 | // Validate first, then memcpy to let memcpy do its thing even for |
1085 | // non-ASCII. (And potentially do something better than SSE2 for ASCII.) |
1086 | let valid_len = utf8_valid_up_to(&src_remaining[..min_len]); |
1087 | (&mut dst_remaining[..valid_len]).copy_from_slice(&src_remaining[..valid_len]); |
1088 | source.pos += valid_len; |
1089 | self.pos += valid_len; |
1090 | } |
1091 | #[inline (always)] |
1092 | pub fn copy_utf16_from<E: Endian>( |
1093 | &mut self, |
1094 | source: &mut ByteSource, |
1095 | ) -> Option<(usize, usize)> { |
1096 | let src_remaining = &source.slice[source.pos..]; |
1097 | let dst_remaining = &mut self.slice[self.pos..]; |
1098 | |
1099 | let mut src_unaligned = |
1100 | unsafe { UnalignedU16Slice::new(src_remaining.as_ptr(), src_remaining.len() / 2) }; |
1101 | if src_unaligned.len() == 0 { |
1102 | return None; |
1103 | } |
1104 | let mut last_unit = src_unaligned.at(src_unaligned.len() - 1); |
1105 | if E::OPPOSITE_ENDIAN { |
1106 | last_unit = last_unit.swap_bytes(); |
1107 | } |
1108 | if super::in_range16(last_unit, 0xD800, 0xDC00) { |
1109 | // Last code unit is a high surrogate. It might |
1110 | // legitimately form a pair later, so let's not |
1111 | // include it. |
1112 | src_unaligned.trim_last(); |
1113 | } |
1114 | let (read, written, had_error) = |
1115 | convert_unaligned_utf16_to_utf8::<E>(src_unaligned, dst_remaining); |
1116 | source.pos += read * 2; |
1117 | self.pos += written; |
1118 | if had_error { |
1119 | Some((source.pos, self.pos)) |
1120 | } else { |
1121 | None |
1122 | } |
1123 | } |
1124 | } |
1125 | |
1126 | // UTF-16 source |
1127 | |
1128 | pub struct Utf16Source<'a> { |
1129 | slice: &'a [u16], |
1130 | pos: usize, |
1131 | old_pos: usize, |
1132 | } |
1133 | |
1134 | impl<'a> Utf16Source<'a> { |
1135 | #[inline (always)] |
1136 | pub fn new(src: &[u16]) -> Utf16Source { |
1137 | Utf16Source { |
1138 | slice: src, |
1139 | pos: 0, |
1140 | old_pos: 0, |
1141 | } |
1142 | } |
1143 | #[inline (always)] |
1144 | pub fn check_available<'b>(&'b mut self) -> Space<Utf16ReadHandle<'b, 'a>> { |
1145 | if self.pos < self.slice.len() { |
1146 | Space::Available(Utf16ReadHandle::new(self)) |
1147 | } else { |
1148 | Space::Full(self.consumed()) |
1149 | } |
1150 | } |
1151 | #[cfg_attr (feature = "cargo-clippy" , allow(collapsible_if))] |
1152 | #[inline (always)] |
1153 | fn read(&mut self) -> char { |
1154 | self.old_pos = self.pos; |
1155 | let unit = self.slice[self.pos]; |
1156 | self.pos += 1; |
1157 | let unit_minus_surrogate_start = unit.wrapping_sub(0xD800); |
1158 | if unit_minus_surrogate_start > (0xDFFF - 0xD800) { |
1159 | return unsafe { ::core::char::from_u32_unchecked(u32::from(unit)) }; |
1160 | } |
1161 | if unit_minus_surrogate_start <= (0xDBFF - 0xD800) { |
1162 | // high surrogate |
1163 | if self.pos < self.slice.len() { |
1164 | let second = self.slice[self.pos]; |
1165 | let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00); |
1166 | if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) { |
1167 | // The next code unit is a low surrogate. Advance position. |
1168 | self.pos += 1; |
1169 | return unsafe { |
1170 | ::core::char::from_u32_unchecked( |
1171 | (u32::from(unit) << 10) + u32::from(second) |
1172 | - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32), |
1173 | ) |
1174 | }; |
1175 | } |
1176 | // The next code unit is not a low surrogate. Don't advance |
1177 | // position and treat the high surrogate as unpaired. |
1178 | // fall through |
1179 | } |
1180 | // Unpaired surrogate at the end of buffer, fall through |
1181 | } |
1182 | // Unpaired low surrogate |
1183 | ' \u{FFFD}' |
1184 | } |
1185 | #[cfg_attr (feature = "cargo-clippy" , allow(collapsible_if))] |
1186 | #[inline (always)] |
1187 | fn read_enum(&mut self) -> Unicode { |
1188 | self.old_pos = self.pos; |
1189 | let unit = self.slice[self.pos]; |
1190 | self.pos += 1; |
1191 | if unit < 0x80 { |
1192 | return Unicode::Ascii(unit as u8); |
1193 | } |
1194 | let unit_minus_surrogate_start = unit.wrapping_sub(0xD800); |
1195 | if unit_minus_surrogate_start > (0xDFFF - 0xD800) { |
1196 | return Unicode::NonAscii(NonAscii::BmpExclAscii(unit)); |
1197 | } |
1198 | if unit_minus_surrogate_start <= (0xDBFF - 0xD800) { |
1199 | // high surrogate |
1200 | if self.pos < self.slice.len() { |
1201 | let second = self.slice[self.pos]; |
1202 | let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00); |
1203 | if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) { |
1204 | // The next code unit is a low surrogate. Advance position. |
1205 | self.pos += 1; |
1206 | return Unicode::NonAscii(NonAscii::Astral(unsafe { |
1207 | ::core::char::from_u32_unchecked( |
1208 | (u32::from(unit) << 10) + u32::from(second) |
1209 | - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32), |
1210 | ) |
1211 | })); |
1212 | } |
1213 | // The next code unit is not a low surrogate. Don't advance |
1214 | // position and treat the high surrogate as unpaired. |
1215 | // fall through |
1216 | } |
1217 | // Unpaired surrogate at the end of buffer, fall through |
1218 | } |
1219 | // Unpaired low surrogate |
1220 | Unicode::NonAscii(NonAscii::BmpExclAscii(0xFFFDu16)) |
1221 | } |
1222 | #[inline (always)] |
1223 | fn unread(&mut self) -> usize { |
1224 | self.pos = self.old_pos; |
1225 | self.pos |
1226 | } |
1227 | #[inline (always)] |
1228 | pub fn consumed(&self) -> usize { |
1229 | self.pos |
1230 | } |
1231 | #[inline (always)] |
1232 | pub fn copy_ascii_to_check_space_two<'b>( |
1233 | &mut self, |
1234 | dest: &'b mut ByteDestination<'a>, |
1235 | ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> { |
1236 | let non_ascii_ret = { |
1237 | let dst_len = dest.slice.len(); |
1238 | let src_remaining = &self.slice[self.pos..]; |
1239 | let dst_remaining = &mut dest.slice[dest.pos..]; |
1240 | let (pending, length) = if dst_remaining.len() < src_remaining.len() { |
1241 | (EncoderResult::OutputFull, dst_remaining.len()) |
1242 | } else { |
1243 | (EncoderResult::InputEmpty, src_remaining.len()) |
1244 | }; |
1245 | match unsafe { |
1246 | basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) |
1247 | } { |
1248 | None => { |
1249 | self.pos += length; |
1250 | dest.pos += length; |
1251 | return CopyAsciiResult::Stop((pending, self.pos, dest.pos)); |
1252 | } |
1253 | Some((non_ascii, consumed)) => { |
1254 | self.pos += consumed; |
1255 | dest.pos += consumed; |
1256 | if dest.pos + 1 < dst_len { |
1257 | self.pos += 1; // commit to reading `non_ascii` |
1258 | let unit = non_ascii; |
1259 | let unit_minus_surrogate_start = unit.wrapping_sub(0xD800); |
1260 | if unit_minus_surrogate_start > (0xDFFF - 0xD800) { |
1261 | NonAscii::BmpExclAscii(unit) |
1262 | } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) { |
1263 | // high surrogate |
1264 | if self.pos < self.slice.len() { |
1265 | let second = self.slice[self.pos]; |
1266 | let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00); |
1267 | if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) { |
1268 | // The next code unit is a low surrogate. Advance position. |
1269 | self.pos += 1; |
1270 | NonAscii::Astral(unsafe { |
1271 | ::core::char::from_u32_unchecked( |
1272 | (u32::from(unit) << 10) + u32::from(second) |
1273 | - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32), |
1274 | ) |
1275 | }) |
1276 | } else { |
1277 | // The next code unit is not a low surrogate. Don't advance |
1278 | // position and treat the high surrogate as unpaired. |
1279 | NonAscii::BmpExclAscii(0xFFFDu16) |
1280 | } |
1281 | } else { |
1282 | // Unpaired surrogate at the end of the buffer. |
1283 | NonAscii::BmpExclAscii(0xFFFDu16) |
1284 | } |
1285 | } else { |
1286 | // Unpaired low surrogate |
1287 | NonAscii::BmpExclAscii(0xFFFDu16) |
1288 | } |
1289 | } else { |
1290 | return CopyAsciiResult::Stop(( |
1291 | EncoderResult::OutputFull, |
1292 | self.pos, |
1293 | dest.pos, |
1294 | )); |
1295 | } |
1296 | } |
1297 | } |
1298 | }; |
1299 | CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest))) |
1300 | } |
1301 | #[inline (always)] |
1302 | pub fn copy_ascii_to_check_space_four<'b>( |
1303 | &mut self, |
1304 | dest: &'b mut ByteDestination<'a>, |
1305 | ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> { |
1306 | let non_ascii_ret = { |
1307 | let dst_len = dest.slice.len(); |
1308 | let src_remaining = &self.slice[self.pos..]; |
1309 | let dst_remaining = &mut dest.slice[dest.pos..]; |
1310 | let (pending, length) = if dst_remaining.len() < src_remaining.len() { |
1311 | (EncoderResult::OutputFull, dst_remaining.len()) |
1312 | } else { |
1313 | (EncoderResult::InputEmpty, src_remaining.len()) |
1314 | }; |
1315 | match unsafe { |
1316 | basic_latin_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) |
1317 | } { |
1318 | None => { |
1319 | self.pos += length; |
1320 | dest.pos += length; |
1321 | return CopyAsciiResult::Stop((pending, self.pos, dest.pos)); |
1322 | } |
1323 | Some((non_ascii, consumed)) => { |
1324 | self.pos += consumed; |
1325 | dest.pos += consumed; |
1326 | if dest.pos + 3 < dst_len { |
1327 | self.pos += 1; // commit to reading `non_ascii` |
1328 | let unit = non_ascii; |
1329 | let unit_minus_surrogate_start = unit.wrapping_sub(0xD800); |
1330 | if unit_minus_surrogate_start > (0xDFFF - 0xD800) { |
1331 | NonAscii::BmpExclAscii(unit) |
1332 | } else if unit_minus_surrogate_start <= (0xDBFF - 0xD800) { |
1333 | // high surrogate |
1334 | if self.pos == self.slice.len() { |
1335 | // Unpaired surrogate at the end of the buffer. |
1336 | NonAscii::BmpExclAscii(0xFFFDu16) |
1337 | } else { |
1338 | let second = self.slice[self.pos]; |
1339 | let second_minus_low_surrogate_start = second.wrapping_sub(0xDC00); |
1340 | if second_minus_low_surrogate_start <= (0xDFFF - 0xDC00) { |
1341 | // The next code unit is a low surrogate. Advance position. |
1342 | self.pos += 1; |
1343 | NonAscii::Astral(unsafe { |
1344 | ::core::char::from_u32_unchecked( |
1345 | (u32::from(unit) << 10) + u32::from(second) |
1346 | - (((0xD800u32 << 10) - 0x1_0000u32) + 0xDC00u32), |
1347 | ) |
1348 | }) |
1349 | } else { |
1350 | // The next code unit is not a low surrogate. Don't advance |
1351 | // position and treat the high surrogate as unpaired. |
1352 | NonAscii::BmpExclAscii(0xFFFDu16) |
1353 | } |
1354 | } |
1355 | } else { |
1356 | // Unpaired low surrogate |
1357 | NonAscii::BmpExclAscii(0xFFFDu16) |
1358 | } |
1359 | } else { |
1360 | return CopyAsciiResult::Stop(( |
1361 | EncoderResult::OutputFull, |
1362 | self.pos, |
1363 | dest.pos, |
1364 | )); |
1365 | } |
1366 | } |
1367 | } |
1368 | }; |
1369 | CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest))) |
1370 | } |
1371 | } |
1372 | |
1373 | pub struct Utf16ReadHandle<'a, 'b> |
1374 | where |
1375 | 'b: 'a, |
1376 | { |
1377 | source: &'a mut Utf16Source<'b>, |
1378 | } |
1379 | |
1380 | impl<'a, 'b> Utf16ReadHandle<'a, 'b> |
1381 | where |
1382 | 'b: 'a, |
1383 | { |
1384 | #[inline (always)] |
1385 | fn new(src: &'a mut Utf16Source<'b>) -> Utf16ReadHandle<'a, 'b> { |
1386 | Utf16ReadHandle { source: src } |
1387 | } |
1388 | #[inline (always)] |
1389 | pub fn read(self) -> (char, Utf16UnreadHandle<'a, 'b>) { |
1390 | let character: char = self.source.read(); |
1391 | let handle: Utf16UnreadHandle<'_, '_> = Utf16UnreadHandle::new(self.source); |
1392 | (character, handle) |
1393 | } |
1394 | #[inline (always)] |
1395 | pub fn read_enum(self) -> (Unicode, Utf16UnreadHandle<'a, 'b>) { |
1396 | let character: Unicode = self.source.read_enum(); |
1397 | let handle: Utf16UnreadHandle<'_, '_> = Utf16UnreadHandle::new(self.source); |
1398 | (character, handle) |
1399 | } |
1400 | #[inline (always)] |
1401 | pub fn consumed(&self) -> usize { |
1402 | self.source.consumed() |
1403 | } |
1404 | } |
1405 | |
1406 | pub struct Utf16UnreadHandle<'a, 'b> |
1407 | where |
1408 | 'b: 'a, |
1409 | { |
1410 | source: &'a mut Utf16Source<'b>, |
1411 | } |
1412 | |
1413 | impl<'a, 'b> Utf16UnreadHandle<'a, 'b> |
1414 | where |
1415 | 'b: 'a, |
1416 | { |
1417 | #[inline (always)] |
1418 | fn new(src: &'a mut Utf16Source<'b>) -> Utf16UnreadHandle<'a, 'b> { |
1419 | Utf16UnreadHandle { source: src } |
1420 | } |
1421 | #[inline (always)] |
1422 | pub fn unread(self) -> usize { |
1423 | self.source.unread() |
1424 | } |
1425 | #[inline (always)] |
1426 | pub fn consumed(&self) -> usize { |
1427 | self.source.consumed() |
1428 | } |
1429 | #[inline (always)] |
1430 | pub fn commit(self) -> &'a mut Utf16Source<'b> { |
1431 | self.source |
1432 | } |
1433 | } |
1434 | |
1435 | // UTF-8 source |
1436 | |
1437 | pub struct Utf8Source<'a> { |
1438 | slice: &'a [u8], |
1439 | pos: usize, |
1440 | old_pos: usize, |
1441 | } |
1442 | |
1443 | impl<'a> Utf8Source<'a> { |
1444 | #[inline (always)] |
1445 | pub fn new(src: &str) -> Utf8Source { |
1446 | Utf8Source { |
1447 | slice: src.as_bytes(), |
1448 | pos: 0, |
1449 | old_pos: 0, |
1450 | } |
1451 | } |
1452 | #[inline (always)] |
1453 | pub fn check_available<'b>(&'b mut self) -> Space<Utf8ReadHandle<'b, 'a>> { |
1454 | if self.pos < self.slice.len() { |
1455 | Space::Available(Utf8ReadHandle::new(self)) |
1456 | } else { |
1457 | Space::Full(self.consumed()) |
1458 | } |
1459 | } |
1460 | #[inline (always)] |
1461 | fn read(&mut self) -> char { |
1462 | self.old_pos = self.pos; |
1463 | let unit = self.slice[self.pos]; |
1464 | if unit < 0x80 { |
1465 | self.pos += 1; |
1466 | return char::from(unit); |
1467 | } |
1468 | if unit < 0xE0 { |
1469 | let point = |
1470 | ((u32::from(unit) & 0x1F) << 6) | (u32::from(self.slice[self.pos + 1]) & 0x3F); |
1471 | self.pos += 2; |
1472 | return unsafe { ::core::char::from_u32_unchecked(point) }; |
1473 | } |
1474 | if unit < 0xF0 { |
1475 | let point = ((u32::from(unit) & 0xF) << 12) |
1476 | | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 6) |
1477 | | (u32::from(self.slice[self.pos + 2]) & 0x3F); |
1478 | self.pos += 3; |
1479 | return unsafe { ::core::char::from_u32_unchecked(point) }; |
1480 | } |
1481 | let point = ((u32::from(unit) & 0x7) << 18) |
1482 | | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12) |
1483 | | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6) |
1484 | | (u32::from(self.slice[self.pos + 3]) & 0x3F); |
1485 | self.pos += 4; |
1486 | unsafe { ::core::char::from_u32_unchecked(point) } |
1487 | } |
1488 | #[inline (always)] |
1489 | fn read_enum(&mut self) -> Unicode { |
1490 | self.old_pos = self.pos; |
1491 | let unit = self.slice[self.pos]; |
1492 | if unit < 0x80 { |
1493 | self.pos += 1; |
1494 | return Unicode::Ascii(unit); |
1495 | } |
1496 | if unit < 0xE0 { |
1497 | let point = |
1498 | ((u16::from(unit) & 0x1F) << 6) | (u16::from(self.slice[self.pos + 1]) & 0x3F); |
1499 | self.pos += 2; |
1500 | return Unicode::NonAscii(NonAscii::BmpExclAscii(point)); |
1501 | } |
1502 | if unit < 0xF0 { |
1503 | let point = ((u16::from(unit) & 0xF) << 12) |
1504 | | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6) |
1505 | | (u16::from(self.slice[self.pos + 2]) & 0x3F); |
1506 | self.pos += 3; |
1507 | return Unicode::NonAscii(NonAscii::BmpExclAscii(point)); |
1508 | } |
1509 | let point = ((u32::from(unit) & 0x7) << 18) |
1510 | | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12) |
1511 | | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6) |
1512 | | (u32::from(self.slice[self.pos + 3]) & 0x3F); |
1513 | self.pos += 4; |
1514 | Unicode::NonAscii(NonAscii::Astral(unsafe { |
1515 | ::core::char::from_u32_unchecked(point) |
1516 | })) |
1517 | } |
1518 | #[inline (always)] |
1519 | fn unread(&mut self) -> usize { |
1520 | self.pos = self.old_pos; |
1521 | self.pos |
1522 | } |
1523 | #[inline (always)] |
1524 | pub fn consumed(&self) -> usize { |
1525 | self.pos |
1526 | } |
1527 | #[inline (always)] |
1528 | pub fn copy_ascii_to_check_space_one<'b>( |
1529 | &mut self, |
1530 | dest: &'b mut ByteDestination<'a>, |
1531 | ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteOneHandle<'b, 'a>)> { |
1532 | let non_ascii_ret = { |
1533 | let src_remaining = &self.slice[self.pos..]; |
1534 | let dst_remaining = &mut dest.slice[dest.pos..]; |
1535 | let (pending, length) = if dst_remaining.len() < src_remaining.len() { |
1536 | (EncoderResult::OutputFull, dst_remaining.len()) |
1537 | } else { |
1538 | (EncoderResult::InputEmpty, src_remaining.len()) |
1539 | }; |
1540 | match unsafe { |
1541 | ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) |
1542 | } { |
1543 | None => { |
1544 | self.pos += length; |
1545 | dest.pos += length; |
1546 | return CopyAsciiResult::Stop((pending, self.pos, dest.pos)); |
1547 | } |
1548 | Some((non_ascii, consumed)) => { |
1549 | self.pos += consumed; |
1550 | dest.pos += consumed; |
1551 | // We don't need to check space in destination, because |
1552 | // `ascii_to_ascii()` already did. |
1553 | if non_ascii < 0xE0 { |
1554 | let point = ((u16::from(non_ascii) & 0x1F) << 6) |
1555 | | (u16::from(self.slice[self.pos + 1]) & 0x3F); |
1556 | self.pos += 2; |
1557 | NonAscii::BmpExclAscii(point) |
1558 | } else if non_ascii < 0xF0 { |
1559 | let point = ((u16::from(non_ascii) & 0xF) << 12) |
1560 | | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6) |
1561 | | (u16::from(self.slice[self.pos + 2]) & 0x3F); |
1562 | self.pos += 3; |
1563 | NonAscii::BmpExclAscii(point) |
1564 | } else { |
1565 | let point = ((u32::from(non_ascii) & 0x7) << 18) |
1566 | | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12) |
1567 | | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6) |
1568 | | (u32::from(self.slice[self.pos + 3]) & 0x3F); |
1569 | self.pos += 4; |
1570 | NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) }) |
1571 | } |
1572 | } |
1573 | } |
1574 | }; |
1575 | CopyAsciiResult::GoOn((non_ascii_ret, ByteOneHandle::new(dest))) |
1576 | } |
1577 | #[inline (always)] |
1578 | pub fn copy_ascii_to_check_space_two<'b>( |
1579 | &mut self, |
1580 | dest: &'b mut ByteDestination<'a>, |
1581 | ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteTwoHandle<'b, 'a>)> { |
1582 | let non_ascii_ret = { |
1583 | let dst_len = dest.slice.len(); |
1584 | let src_remaining = &self.slice[self.pos..]; |
1585 | let dst_remaining = &mut dest.slice[dest.pos..]; |
1586 | let (pending, length) = if dst_remaining.len() < src_remaining.len() { |
1587 | (EncoderResult::OutputFull, dst_remaining.len()) |
1588 | } else { |
1589 | (EncoderResult::InputEmpty, src_remaining.len()) |
1590 | }; |
1591 | match unsafe { |
1592 | ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) |
1593 | } { |
1594 | None => { |
1595 | self.pos += length; |
1596 | dest.pos += length; |
1597 | return CopyAsciiResult::Stop((pending, self.pos, dest.pos)); |
1598 | } |
1599 | Some((non_ascii, consumed)) => { |
1600 | self.pos += consumed; |
1601 | dest.pos += consumed; |
1602 | if dest.pos + 1 < dst_len { |
1603 | if non_ascii < 0xE0 { |
1604 | let point = ((u16::from(non_ascii) & 0x1F) << 6) |
1605 | | (u16::from(self.slice[self.pos + 1]) & 0x3F); |
1606 | self.pos += 2; |
1607 | NonAscii::BmpExclAscii(point) |
1608 | } else if non_ascii < 0xF0 { |
1609 | let point = ((u16::from(non_ascii) & 0xF) << 12) |
1610 | | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6) |
1611 | | (u16::from(self.slice[self.pos + 2]) & 0x3F); |
1612 | self.pos += 3; |
1613 | NonAscii::BmpExclAscii(point) |
1614 | } else { |
1615 | let point = ((u32::from(non_ascii) & 0x7) << 18) |
1616 | | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12) |
1617 | | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6) |
1618 | | (u32::from(self.slice[self.pos + 3]) & 0x3F); |
1619 | self.pos += 4; |
1620 | NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) }) |
1621 | } |
1622 | } else { |
1623 | return CopyAsciiResult::Stop(( |
1624 | EncoderResult::OutputFull, |
1625 | self.pos, |
1626 | dest.pos, |
1627 | )); |
1628 | } |
1629 | } |
1630 | } |
1631 | }; |
1632 | CopyAsciiResult::GoOn((non_ascii_ret, ByteTwoHandle::new(dest))) |
1633 | } |
1634 | #[inline (always)] |
1635 | pub fn copy_ascii_to_check_space_four<'b>( |
1636 | &mut self, |
1637 | dest: &'b mut ByteDestination<'a>, |
1638 | ) -> CopyAsciiResult<(EncoderResult, usize, usize), (NonAscii, ByteFourHandle<'b, 'a>)> { |
1639 | let non_ascii_ret = { |
1640 | let dst_len = dest.slice.len(); |
1641 | let src_remaining = &self.slice[self.pos..]; |
1642 | let dst_remaining = &mut dest.slice[dest.pos..]; |
1643 | let (pending, length) = if dst_remaining.len() < src_remaining.len() { |
1644 | (EncoderResult::OutputFull, dst_remaining.len()) |
1645 | } else { |
1646 | (EncoderResult::InputEmpty, src_remaining.len()) |
1647 | }; |
1648 | match unsafe { |
1649 | ascii_to_ascii(src_remaining.as_ptr(), dst_remaining.as_mut_ptr(), length) |
1650 | } { |
1651 | None => { |
1652 | self.pos += length; |
1653 | dest.pos += length; |
1654 | return CopyAsciiResult::Stop((pending, self.pos, dest.pos)); |
1655 | } |
1656 | Some((non_ascii, consumed)) => { |
1657 | self.pos += consumed; |
1658 | dest.pos += consumed; |
1659 | if dest.pos + 3 < dst_len { |
1660 | if non_ascii < 0xE0 { |
1661 | let point = ((u16::from(non_ascii) & 0x1F) << 6) |
1662 | | (u16::from(self.slice[self.pos + 1]) & 0x3F); |
1663 | self.pos += 2; |
1664 | NonAscii::BmpExclAscii(point) |
1665 | } else if non_ascii < 0xF0 { |
1666 | let point = ((u16::from(non_ascii) & 0xF) << 12) |
1667 | | ((u16::from(self.slice[self.pos + 1]) & 0x3F) << 6) |
1668 | | (u16::from(self.slice[self.pos + 2]) & 0x3F); |
1669 | self.pos += 3; |
1670 | NonAscii::BmpExclAscii(point) |
1671 | } else { |
1672 | let point = ((u32::from(non_ascii) & 0x7) << 18) |
1673 | | ((u32::from(self.slice[self.pos + 1]) & 0x3F) << 12) |
1674 | | ((u32::from(self.slice[self.pos + 2]) & 0x3F) << 6) |
1675 | | (u32::from(self.slice[self.pos + 3]) & 0x3F); |
1676 | self.pos += 4; |
1677 | NonAscii::Astral(unsafe { ::core::char::from_u32_unchecked(point) }) |
1678 | } |
1679 | } else { |
1680 | return CopyAsciiResult::Stop(( |
1681 | EncoderResult::OutputFull, |
1682 | self.pos, |
1683 | dest.pos, |
1684 | )); |
1685 | } |
1686 | } |
1687 | } |
1688 | }; |
1689 | CopyAsciiResult::GoOn((non_ascii_ret, ByteFourHandle::new(dest))) |
1690 | } |
1691 | } |
1692 | |
1693 | pub struct Utf8ReadHandle<'a, 'b> |
1694 | where |
1695 | 'b: 'a, |
1696 | { |
1697 | source: &'a mut Utf8Source<'b>, |
1698 | } |
1699 | |
1700 | impl<'a, 'b> Utf8ReadHandle<'a, 'b> |
1701 | where |
1702 | 'b: 'a, |
1703 | { |
1704 | #[inline (always)] |
1705 | fn new(src: &'a mut Utf8Source<'b>) -> Utf8ReadHandle<'a, 'b> { |
1706 | Utf8ReadHandle { source: src } |
1707 | } |
1708 | #[inline (always)] |
1709 | pub fn read(self) -> (char, Utf8UnreadHandle<'a, 'b>) { |
1710 | let character: char = self.source.read(); |
1711 | let handle: Utf8UnreadHandle<'_, '_> = Utf8UnreadHandle::new(self.source); |
1712 | (character, handle) |
1713 | } |
1714 | #[inline (always)] |
1715 | pub fn read_enum(self) -> (Unicode, Utf8UnreadHandle<'a, 'b>) { |
1716 | let character: Unicode = self.source.read_enum(); |
1717 | let handle: Utf8UnreadHandle<'_, '_> = Utf8UnreadHandle::new(self.source); |
1718 | (character, handle) |
1719 | } |
1720 | #[inline (always)] |
1721 | pub fn consumed(&self) -> usize { |
1722 | self.source.consumed() |
1723 | } |
1724 | } |
1725 | |
1726 | pub struct Utf8UnreadHandle<'a, 'b> |
1727 | where |
1728 | 'b: 'a, |
1729 | { |
1730 | source: &'a mut Utf8Source<'b>, |
1731 | } |
1732 | |
1733 | impl<'a, 'b> Utf8UnreadHandle<'a, 'b> |
1734 | where |
1735 | 'b: 'a, |
1736 | { |
1737 | #[inline (always)] |
1738 | fn new(src: &'a mut Utf8Source<'b>) -> Utf8UnreadHandle<'a, 'b> { |
1739 | Utf8UnreadHandle { source: src } |
1740 | } |
1741 | #[inline (always)] |
1742 | pub fn unread(self) -> usize { |
1743 | self.source.unread() |
1744 | } |
1745 | #[inline (always)] |
1746 | pub fn consumed(&self) -> usize { |
1747 | self.source.consumed() |
1748 | } |
1749 | #[inline (always)] |
1750 | pub fn commit(self) -> &'a mut Utf8Source<'b> { |
1751 | self.source |
1752 | } |
1753 | } |
1754 | |
1755 | // Byte destination |
1756 | |
1757 | pub struct ByteOneHandle<'a, 'b> |
1758 | where |
1759 | 'b: 'a, |
1760 | { |
1761 | dest: &'a mut ByteDestination<'b>, |
1762 | } |
1763 | |
1764 | impl<'a, 'b> ByteOneHandle<'a, 'b> |
1765 | where |
1766 | 'b: 'a, |
1767 | { |
1768 | #[inline (always)] |
1769 | fn new(dst: &'a mut ByteDestination<'b>) -> ByteOneHandle<'a, 'b> { |
1770 | ByteOneHandle { dest: dst } |
1771 | } |
1772 | #[inline (always)] |
1773 | pub fn written(&self) -> usize { |
1774 | self.dest.written() |
1775 | } |
1776 | #[inline (always)] |
1777 | pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> { |
1778 | self.dest.write_one(first); |
1779 | self.dest |
1780 | } |
1781 | } |
1782 | |
1783 | pub struct ByteTwoHandle<'a, 'b> |
1784 | where |
1785 | 'b: 'a, |
1786 | { |
1787 | dest: &'a mut ByteDestination<'b>, |
1788 | } |
1789 | |
1790 | impl<'a, 'b> ByteTwoHandle<'a, 'b> |
1791 | where |
1792 | 'b: 'a, |
1793 | { |
1794 | #[inline (always)] |
1795 | fn new(dst: &'a mut ByteDestination<'b>) -> ByteTwoHandle<'a, 'b> { |
1796 | ByteTwoHandle { dest: dst } |
1797 | } |
1798 | #[inline (always)] |
1799 | pub fn written(&self) -> usize { |
1800 | self.dest.written() |
1801 | } |
1802 | #[inline (always)] |
1803 | pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> { |
1804 | self.dest.write_one(first); |
1805 | self.dest |
1806 | } |
1807 | #[inline (always)] |
1808 | pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> { |
1809 | self.dest.write_two(first, second); |
1810 | self.dest |
1811 | } |
1812 | } |
1813 | |
1814 | pub struct ByteThreeHandle<'a, 'b> |
1815 | where |
1816 | 'b: 'a, |
1817 | { |
1818 | dest: &'a mut ByteDestination<'b>, |
1819 | } |
1820 | |
1821 | impl<'a, 'b> ByteThreeHandle<'a, 'b> |
1822 | where |
1823 | 'b: 'a, |
1824 | { |
1825 | #[inline (always)] |
1826 | fn new(dst: &'a mut ByteDestination<'b>) -> ByteThreeHandle<'a, 'b> { |
1827 | ByteThreeHandle { dest: dst } |
1828 | } |
1829 | #[inline (always)] |
1830 | pub fn written(&self) -> usize { |
1831 | self.dest.written() |
1832 | } |
1833 | #[inline (always)] |
1834 | pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> { |
1835 | self.dest.write_one(first); |
1836 | self.dest |
1837 | } |
1838 | #[inline (always)] |
1839 | pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> { |
1840 | self.dest.write_two(first, second); |
1841 | self.dest |
1842 | } |
1843 | #[inline (always)] |
1844 | pub fn write_three(self, first: u8, second: u8, third: u8) -> &'a mut ByteDestination<'b> { |
1845 | self.dest.write_three(first, second, third); |
1846 | self.dest |
1847 | } |
1848 | #[inline (always)] |
1849 | pub fn write_three_return_written(self, first: u8, second: u8, third: u8) -> usize { |
1850 | self.dest.write_three(first, second, third); |
1851 | self.dest.written() |
1852 | } |
1853 | } |
1854 | |
1855 | pub struct ByteFourHandle<'a, 'b> |
1856 | where |
1857 | 'b: 'a, |
1858 | { |
1859 | dest: &'a mut ByteDestination<'b>, |
1860 | } |
1861 | |
1862 | impl<'a, 'b> ByteFourHandle<'a, 'b> |
1863 | where |
1864 | 'b: 'a, |
1865 | { |
1866 | #[inline (always)] |
1867 | fn new(dst: &'a mut ByteDestination<'b>) -> ByteFourHandle<'a, 'b> { |
1868 | ByteFourHandle { dest: dst } |
1869 | } |
1870 | #[inline (always)] |
1871 | pub fn written(&self) -> usize { |
1872 | self.dest.written() |
1873 | } |
1874 | #[inline (always)] |
1875 | pub fn write_one(self, first: u8) -> &'a mut ByteDestination<'b> { |
1876 | self.dest.write_one(first); |
1877 | self.dest |
1878 | } |
1879 | #[inline (always)] |
1880 | pub fn write_two(self, first: u8, second: u8) -> &'a mut ByteDestination<'b> { |
1881 | self.dest.write_two(first, second); |
1882 | self.dest |
1883 | } |
1884 | #[inline (always)] |
1885 | pub fn write_four( |
1886 | self, |
1887 | first: u8, |
1888 | second: u8, |
1889 | third: u8, |
1890 | fourth: u8, |
1891 | ) -> &'a mut ByteDestination<'b> { |
1892 | self.dest.write_four(first, second, third, fourth); |
1893 | self.dest |
1894 | } |
1895 | } |
1896 | |
1897 | pub struct ByteDestination<'a> { |
1898 | slice: &'a mut [u8], |
1899 | pos: usize, |
1900 | } |
1901 | |
1902 | impl<'a> ByteDestination<'a> { |
1903 | #[inline (always)] |
1904 | pub fn new(dst: &mut [u8]) -> ByteDestination { |
1905 | ByteDestination { slice: dst, pos: 0 } |
1906 | } |
1907 | #[inline (always)] |
1908 | pub fn check_space_one<'b>(&'b mut self) -> Space<ByteOneHandle<'b, 'a>> { |
1909 | if self.pos < self.slice.len() { |
1910 | Space::Available(ByteOneHandle::new(self)) |
1911 | } else { |
1912 | Space::Full(self.written()) |
1913 | } |
1914 | } |
1915 | #[inline (always)] |
1916 | pub fn check_space_two<'b>(&'b mut self) -> Space<ByteTwoHandle<'b, 'a>> { |
1917 | if self.pos + 1 < self.slice.len() { |
1918 | Space::Available(ByteTwoHandle::new(self)) |
1919 | } else { |
1920 | Space::Full(self.written()) |
1921 | } |
1922 | } |
1923 | #[inline (always)] |
1924 | pub fn check_space_three<'b>(&'b mut self) -> Space<ByteThreeHandle<'b, 'a>> { |
1925 | if self.pos + 2 < self.slice.len() { |
1926 | Space::Available(ByteThreeHandle::new(self)) |
1927 | } else { |
1928 | Space::Full(self.written()) |
1929 | } |
1930 | } |
1931 | #[inline (always)] |
1932 | pub fn check_space_four<'b>(&'b mut self) -> Space<ByteFourHandle<'b, 'a>> { |
1933 | if self.pos + 3 < self.slice.len() { |
1934 | Space::Available(ByteFourHandle::new(self)) |
1935 | } else { |
1936 | Space::Full(self.written()) |
1937 | } |
1938 | } |
1939 | #[inline (always)] |
1940 | pub fn written(&self) -> usize { |
1941 | self.pos |
1942 | } |
1943 | #[inline (always)] |
1944 | fn write_one(&mut self, first: u8) { |
1945 | self.slice[self.pos] = first; |
1946 | self.pos += 1; |
1947 | } |
1948 | #[inline (always)] |
1949 | fn write_two(&mut self, first: u8, second: u8) { |
1950 | self.slice[self.pos] = first; |
1951 | self.slice[self.pos + 1] = second; |
1952 | self.pos += 2; |
1953 | } |
1954 | #[inline (always)] |
1955 | fn write_three(&mut self, first: u8, second: u8, third: u8) { |
1956 | self.slice[self.pos] = first; |
1957 | self.slice[self.pos + 1] = second; |
1958 | self.slice[self.pos + 2] = third; |
1959 | self.pos += 3; |
1960 | } |
1961 | #[inline (always)] |
1962 | fn write_four(&mut self, first: u8, second: u8, third: u8, fourth: u8) { |
1963 | self.slice[self.pos] = first; |
1964 | self.slice[self.pos + 1] = second; |
1965 | self.slice[self.pos + 2] = third; |
1966 | self.slice[self.pos + 3] = fourth; |
1967 | self.pos += 4; |
1968 | } |
1969 | } |
1970 | |