1 | // Copyright Mozilla Foundation. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | macro_rules! decoder_function { |
11 | ($preamble:block, |
12 | $loop_preable:block, |
13 | $eof:block, |
14 | $body:block, |
15 | $slf:ident, |
16 | $src_consumed:ident, |
17 | $dest:ident, |
18 | $source:ident, |
19 | $b:ident, |
20 | $destination_handle:ident, |
21 | $unread_handle:ident, |
22 | $destination_check:ident, |
23 | $name:ident, |
24 | $code_unit:ty, |
25 | $dest_struct:ident) => ( |
26 | pub fn $name(&mut $slf, |
27 | src: &[u8], |
28 | dst: &mut [$code_unit], |
29 | last: bool) |
30 | -> (DecoderResult, usize, usize) { |
31 | let mut $source = ByteSource::new(src); |
32 | let mut $dest = $dest_struct::new(dst); |
33 | loop { // TODO: remove this loop |
34 | { |
35 | // Start non-boilerplate |
36 | $preamble |
37 | // End non-boilerplate |
38 | } |
39 | loop { |
40 | { |
41 | $loop_preable |
42 | } |
43 | match $source.check_available() { |
44 | Space::Full($src_consumed) => { |
45 | if last { |
46 | // Start non-boilerplate |
47 | $eof |
48 | // End non-boilerplate |
49 | } |
50 | return (DecoderResult::InputEmpty, $src_consumed, $dest.written()); |
51 | } |
52 | Space::Available(source_handle) => { |
53 | match $dest.$destination_check() { |
54 | Space::Full(dst_written) => { |
55 | return (DecoderResult::OutputFull, |
56 | source_handle.consumed(), |
57 | dst_written); |
58 | } |
59 | Space::Available($destination_handle) => { |
60 | let ($b, $unread_handle) = source_handle.read(); |
61 | // Start non-boilerplate |
62 | $body |
63 | // End non-boilerplate |
64 | } |
65 | } |
66 | } |
67 | } |
68 | } |
69 | } |
70 | }); |
71 | } |
72 | |
73 | macro_rules! decoder_functions { |
74 | ( |
75 | $preamble:block, |
76 | $loop_preable:block, |
77 | $eof:block, |
78 | $body:block, |
79 | $slf:ident, |
80 | $src_consumed:ident, |
81 | $dest:ident, |
82 | $source:ident, |
83 | $b:ident, |
84 | $destination_handle:ident, |
85 | $unread_handle:ident, |
86 | $destination_check:ident |
87 | ) => { |
88 | decoder_function!( |
89 | $preamble, |
90 | $loop_preable, |
91 | $eof, |
92 | $body, |
93 | $slf, |
94 | $src_consumed, |
95 | $dest, |
96 | $source, |
97 | $b, |
98 | $destination_handle, |
99 | $unread_handle, |
100 | $destination_check, |
101 | decode_to_utf8_raw, |
102 | u8, |
103 | Utf8Destination |
104 | ); |
105 | decoder_function!( |
106 | $preamble, |
107 | $loop_preable, |
108 | $eof, |
109 | $body, |
110 | $slf, |
111 | $src_consumed, |
112 | $dest, |
113 | $source, |
114 | $b, |
115 | $destination_handle, |
116 | $unread_handle, |
117 | $destination_check, |
118 | decode_to_utf16_raw, |
119 | u16, |
120 | Utf16Destination |
121 | ); |
122 | }; |
123 | } |
124 | |
125 | macro_rules! ascii_compatible_two_byte_decoder_function { |
126 | ($lead:block, |
127 | $trail:block, |
128 | $slf:ident, |
129 | $non_ascii:ident, |
130 | $byte:ident, |
131 | $lead_minus_offset:ident, |
132 | $unread_handle_trail:ident, |
133 | $source:ident, |
134 | $handle:ident, |
135 | $outermost:tt, |
136 | $copy_ascii:ident, |
137 | $destination_check:ident, |
138 | $name:ident, |
139 | $code_unit:ty, |
140 | $dest_struct:ident, |
141 | $ascii_punctuation:expr) => ( |
142 | pub fn $name(&mut $slf, |
143 | src: &[u8], |
144 | dst: &mut [$code_unit], |
145 | last: bool) |
146 | -> (DecoderResult, usize, usize) { |
147 | let mut $source = ByteSource::new(src); |
148 | let mut dest_prolog = $dest_struct::new(dst); |
149 | let dest = match $slf.lead { |
150 | Some(lead) => { |
151 | let $lead_minus_offset = lead; |
152 | $slf.lead = None; |
153 | // Since we don't have `goto` we could use to jump into the trail |
154 | // handling part of the main loop, we need to repeat trail handling |
155 | // here. |
156 | match $source.check_available() { |
157 | Space::Full(src_consumed_prolog) => { |
158 | if last { |
159 | return (DecoderResult::Malformed(1, 0), |
160 | src_consumed_prolog, |
161 | dest_prolog.written()); |
162 | } |
163 | return (DecoderResult::InputEmpty, src_consumed_prolog, dest_prolog.written()); |
164 | } |
165 | Space::Available(source_handle_prolog) => { |
166 | match dest_prolog.$destination_check() { |
167 | Space::Full(dst_written_prolog) => { |
168 | return (DecoderResult::OutputFull, |
169 | source_handle_prolog.consumed(), |
170 | dst_written_prolog); |
171 | } |
172 | Space::Available($handle) => { |
173 | let ($byte, $unread_handle_trail) = source_handle_prolog.read(); |
174 | // Start non-boilerplate |
175 | $trail |
176 | // End non-boilerplate |
177 | } |
178 | } |
179 | } |
180 | } |
181 | }, |
182 | None => { |
183 | &mut dest_prolog |
184 | } |
185 | }; |
186 | $outermost: loop { |
187 | match dest.$copy_ascii(&mut $source) { |
188 | CopyAsciiResult::Stop(ret) => return ret, |
189 | CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => { |
190 | 'middle: loop { |
191 | let dest_again = { |
192 | let $lead_minus_offset = { |
193 | // Start non-boilerplate |
194 | $lead |
195 | // End non-boilerplate |
196 | }; |
197 | match $source.check_available() { |
198 | Space::Full(src_consumed_trail) => { |
199 | if last { |
200 | return (DecoderResult::Malformed(1, 0), |
201 | src_consumed_trail, |
202 | $handle.written()); |
203 | } |
204 | $slf.lead = Some($lead_minus_offset); |
205 | return (DecoderResult::InputEmpty, |
206 | src_consumed_trail, |
207 | $handle.written()); |
208 | } |
209 | Space::Available(source_handle_trail) => { |
210 | let ($byte, $unread_handle_trail) = source_handle_trail.read(); |
211 | // Start non-boilerplate |
212 | $trail |
213 | // End non-boilerplate |
214 | } |
215 | } |
216 | }; |
217 | match $source.check_available() { |
218 | Space::Full(src_consumed) => { |
219 | return (DecoderResult::InputEmpty, |
220 | src_consumed, |
221 | dest_again.written()); |
222 | } |
223 | Space::Available(source_handle) => { |
224 | match dest_again.$destination_check() { |
225 | Space::Full(dst_written) => { |
226 | return (DecoderResult::OutputFull, |
227 | source_handle.consumed(), |
228 | dst_written); |
229 | } |
230 | Space::Available(mut destination_handle) => { |
231 | let (mut b, unread_handle) = source_handle.read(); |
232 | let source_again = unread_handle.commit(); |
233 | 'innermost: loop { |
234 | if b > 127 { |
235 | $non_ascii = b; |
236 | $handle = destination_handle; |
237 | continue 'middle; |
238 | } |
239 | // Testing on Haswell says that we should write the |
240 | // byte unconditionally instead of trying to unread it |
241 | // to make it part of the next SIMD stride. |
242 | let dest_again_again = |
243 | destination_handle.write_ascii(b); |
244 | if $ascii_punctuation && b < 60 { |
245 | // We've got punctuation |
246 | match source_again.check_available() { |
247 | Space::Full(src_consumed_again) => { |
248 | return (DecoderResult::InputEmpty, |
249 | src_consumed_again, |
250 | dest_again_again.written()); |
251 | } |
252 | Space::Available(source_handle_again) => { |
253 | match dest_again_again.$destination_check() { |
254 | Space::Full(dst_written_again) => { |
255 | return (DecoderResult::OutputFull, |
256 | source_handle_again.consumed(), |
257 | dst_written_again); |
258 | } |
259 | Space::Available(destination_handle_again) => { |
260 | { |
261 | let (b_again, _unread_handle_again) = |
262 | source_handle_again.read(); |
263 | b = b_again; |
264 | destination_handle = destination_handle_again; |
265 | continue 'innermost; |
266 | } |
267 | } |
268 | } |
269 | } |
270 | } |
271 | } |
272 | // We've got markup or ASCII text |
273 | continue $outermost; |
274 | } |
275 | } |
276 | } |
277 | } |
278 | } |
279 | } |
280 | } |
281 | } |
282 | } |
283 | }); |
284 | } |
285 | |
286 | macro_rules! ascii_compatible_two_byte_decoder_functions { |
287 | ( |
288 | $lead:block, |
289 | $trail:block, |
290 | $slf:ident, |
291 | $non_ascii:ident, |
292 | $byte:ident, |
293 | $lead_minus_offset:ident, |
294 | $unread_handle_trail:ident, |
295 | $source:ident, |
296 | $handle:ident, |
297 | $outermost:tt, |
298 | $copy_ascii:ident, |
299 | $destination_check:ident, |
300 | $ascii_punctuation:expr |
301 | ) => { |
302 | ascii_compatible_two_byte_decoder_function!( |
303 | $lead, |
304 | $trail, |
305 | $slf, |
306 | $non_ascii, |
307 | $byte, |
308 | $lead_minus_offset, |
309 | $unread_handle_trail, |
310 | $source, |
311 | $handle, |
312 | $outermost, |
313 | $copy_ascii, |
314 | $destination_check, |
315 | decode_to_utf8_raw, |
316 | u8, |
317 | Utf8Destination, |
318 | $ascii_punctuation |
319 | ); |
320 | ascii_compatible_two_byte_decoder_function!( |
321 | $lead, |
322 | $trail, |
323 | $slf, |
324 | $non_ascii, |
325 | $byte, |
326 | $lead_minus_offset, |
327 | $unread_handle_trail, |
328 | $source, |
329 | $handle, |
330 | $outermost, |
331 | $copy_ascii, |
332 | $destination_check, |
333 | decode_to_utf16_raw, |
334 | u16, |
335 | Utf16Destination, |
336 | $ascii_punctuation |
337 | ); |
338 | }; |
339 | } |
340 | |
341 | macro_rules! gb18030_decoder_function { |
342 | ($first_body:block, |
343 | $second_body:block, |
344 | $third_body:block, |
345 | $fourth_body:block, |
346 | $slf:ident, |
347 | $non_ascii:ident, |
348 | $first_minus_offset:ident, |
349 | $second:ident, |
350 | $second_minus_offset:ident, |
351 | $unread_handle_second:ident, |
352 | $third:ident, |
353 | $third_minus_offset:ident, |
354 | $unread_handle_third:ident, |
355 | $fourth:ident, |
356 | $fourth_minus_offset:ident, |
357 | $unread_handle_fourth:ident, |
358 | $source:ident, |
359 | $handle:ident, |
360 | $outermost:tt, |
361 | $name:ident, |
362 | $code_unit:ty, |
363 | $dest_struct:ident) => ( |
364 | #[cfg_attr(feature = "cargo-clippy" , allow(never_loop))] |
365 | pub fn $name(&mut $slf, |
366 | src: &[u8], |
367 | dst: &mut [$code_unit], |
368 | last: bool) |
369 | -> (DecoderResult, usize, usize) { |
370 | let mut $source = ByteSource::new(src); |
371 | let mut dest = $dest_struct::new(dst); |
372 | { |
373 | if let Some(ascii) = $slf.pending_ascii { |
374 | match dest.check_space_bmp() { |
375 | Space::Full(_) => { |
376 | return (DecoderResult::OutputFull, 0, 0); |
377 | } |
378 | Space::Available(pending_ascii_handle) => { |
379 | $slf.pending_ascii = None; |
380 | pending_ascii_handle.write_ascii(ascii); |
381 | } |
382 | } |
383 | } |
384 | } |
385 | while !$slf.pending.is_none() { |
386 | match $source.check_available() { |
387 | Space::Full(src_consumed) => { |
388 | if last { |
389 | // Start non-boilerplate |
390 | let count = $slf.pending.count(); |
391 | $slf.pending = Gb18030Pending::None; |
392 | return (DecoderResult::Malformed(count as u8, 0), |
393 | src_consumed, |
394 | dest.written()); |
395 | // End non-boilerplate |
396 | } |
397 | return (DecoderResult::InputEmpty, src_consumed, dest.written()); |
398 | } |
399 | Space::Available(source_handle) => { |
400 | match dest.check_space_astral() { |
401 | Space::Full(dst_written) => { |
402 | return (DecoderResult::OutputFull, |
403 | source_handle.consumed(), |
404 | dst_written); |
405 | } |
406 | Space::Available($handle) => { |
407 | let (byte, unread_handle) = source_handle.read(); |
408 | match $slf.pending { |
409 | Gb18030Pending::One($first_minus_offset) => { |
410 | $slf.pending = Gb18030Pending::None; |
411 | let $second = byte; |
412 | let $unread_handle_second = unread_handle; |
413 | // If second is between 0x40 and 0x7E, |
414 | // inclusive, subtract offset 0x40. Else if |
415 | // second is between 0x80 and 0xFE, inclusive, |
416 | // subtract offset 0x41. In both cases, |
417 | // handle as a two-byte sequence. |
418 | // Else if second is between 0x30 and 0x39, |
419 | // inclusive, subtract offset 0x30 and |
420 | // handle as a four-byte sequence. |
421 | let $second_minus_offset = $second.wrapping_sub(0x30); |
422 | // It's not optimal to do this check first, |
423 | // but this results in more readable code. |
424 | if $second_minus_offset > (0x39 - 0x30) { |
425 | // Start non-boilerplate |
426 | $second_body |
427 | // End non-boilerplate |
428 | } else { |
429 | // Four-byte! |
430 | $slf.pending = Gb18030Pending::Two($first_minus_offset, |
431 | $second_minus_offset); |
432 | $handle.commit() |
433 | } |
434 | } |
435 | Gb18030Pending::Two($first_minus_offset, $second_minus_offset) => { |
436 | $slf.pending = Gb18030Pending::None; |
437 | let $third = byte; |
438 | let $unread_handle_third = unread_handle; |
439 | let $third_minus_offset = { |
440 | // Start non-boilerplate |
441 | $third_body |
442 | // End non-boilerplate |
443 | }; |
444 | $slf.pending = Gb18030Pending::Three($first_minus_offset, |
445 | $second_minus_offset, |
446 | $third_minus_offset); |
447 | $handle.commit() |
448 | } |
449 | Gb18030Pending::Three($first_minus_offset, |
450 | $second_minus_offset, |
451 | $third_minus_offset) => { |
452 | $slf.pending = Gb18030Pending::None; |
453 | let $fourth = byte; |
454 | let $unread_handle_fourth = unread_handle; |
455 | // Start non-boilerplate |
456 | $fourth_body |
457 | // End non-boilerplate |
458 | } |
459 | Gb18030Pending::None => unreachable!("Checked in loop condition" ), |
460 | }; |
461 | } |
462 | } |
463 | } |
464 | } |
465 | } |
466 | $outermost: loop { |
467 | match dest.copy_ascii_from_check_space_astral(&mut $source) { |
468 | CopyAsciiResult::Stop(ret) => return ret, |
469 | CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => { |
470 | 'middle: loop { |
471 | let dest_again = { |
472 | let $first_minus_offset = { |
473 | // Start non-boilerplate |
474 | $first_body |
475 | // End non-boilerplate |
476 | }; |
477 | match $source.check_available() { |
478 | Space::Full(src_consumed_trail) => { |
479 | if last { |
480 | return (DecoderResult::Malformed(1, 0), |
481 | src_consumed_trail, |
482 | $handle.written()); |
483 | } |
484 | $slf.pending = Gb18030Pending::One($first_minus_offset); |
485 | return (DecoderResult::InputEmpty, |
486 | src_consumed_trail, |
487 | $handle.written()); |
488 | } |
489 | Space::Available(source_handle_trail) => { |
490 | let ($second, $unread_handle_second) = source_handle_trail.read(); |
491 | // Start non-boilerplate |
492 | // If second is between 0x40 and 0x7E, |
493 | // inclusive, subtract offset 0x40. Else if |
494 | // second is between 0x80 and 0xFE, inclusive, |
495 | // subtract offset 0x41. In both cases, |
496 | // handle as a two-byte sequence. |
497 | // Else if second is between 0x30 and 0x39, |
498 | // inclusive, subtract offset 0x30 and |
499 | // handle as a four-byte sequence. |
500 | let $second_minus_offset = $second.wrapping_sub(0x30); |
501 | // It's not optimal to do this check first, |
502 | // but this results in more readable code. |
503 | if $second_minus_offset > (0x39 - 0x30) { |
504 | // Start non-boilerplate |
505 | $second_body |
506 | // End non-boilerplate |
507 | } else { |
508 | // Four-byte! |
509 | match $unread_handle_second.commit().check_available() { |
510 | Space::Full(src_consumed_third) => { |
511 | if last { |
512 | return (DecoderResult::Malformed(2, 0), |
513 | src_consumed_third, |
514 | $handle.written()); |
515 | } |
516 | $slf.pending = |
517 | Gb18030Pending::Two($first_minus_offset, |
518 | $second_minus_offset); |
519 | return (DecoderResult::InputEmpty, |
520 | src_consumed_third, |
521 | $handle.written()); |
522 | } |
523 | Space::Available(source_handle_third) => { |
524 | let ($third, $unread_handle_third) = |
525 | source_handle_third.read(); |
526 | let $third_minus_offset = { |
527 | // Start non-boilerplate |
528 | $third_body |
529 | // End non-boilerplate |
530 | }; |
531 | match $unread_handle_third.commit() |
532 | .check_available() { |
533 | Space::Full(src_consumed_fourth) => { |
534 | if last { |
535 | return (DecoderResult::Malformed(3, 0), |
536 | src_consumed_fourth, |
537 | $handle.written()); |
538 | } |
539 | $slf.pending = Gb18030Pending::Three($first_minus_offset, $second_minus_offset, $third_minus_offset); |
540 | return (DecoderResult::InputEmpty, |
541 | src_consumed_fourth, |
542 | $handle.written()); |
543 | } |
544 | Space::Available(source_handle_fourth) => { |
545 | let ($fourth, $unread_handle_fourth) = |
546 | source_handle_fourth.read(); |
547 | // Start non-boilerplate |
548 | $fourth_body |
549 | // End non-boilerplate |
550 | } |
551 | } |
552 | } |
553 | } |
554 | } |
555 | // End non-boilerplate |
556 | } |
557 | } |
558 | }; |
559 | match $source.check_available() { |
560 | Space::Full(src_consumed) => { |
561 | return (DecoderResult::InputEmpty, |
562 | src_consumed, |
563 | dest_again.written()); |
564 | } |
565 | Space::Available(source_handle) => { |
566 | match dest_again.check_space_astral() { |
567 | Space::Full(dst_written) => { |
568 | return (DecoderResult::OutputFull, |
569 | source_handle.consumed(), |
570 | dst_written); |
571 | } |
572 | Space::Available(destination_handle) => { |
573 | let (b, _) = source_handle.read(); |
574 | loop { |
575 | if b > 127 { |
576 | $non_ascii = b; |
577 | $handle = destination_handle; |
578 | continue 'middle; |
579 | } |
580 | // Testing on Haswell says that we should write the |
581 | // byte unconditionally instead of trying to unread it |
582 | // to make it part of the next SIMD stride. |
583 | destination_handle.write_ascii(b); |
584 | // We've got markup or ASCII text |
585 | continue $outermost; |
586 | } |
587 | } |
588 | } |
589 | } |
590 | } |
591 | } |
592 | } |
593 | } |
594 | } |
595 | }); |
596 | } |
597 | |
598 | macro_rules! gb18030_decoder_functions { |
599 | ( |
600 | $first_body:block, |
601 | $second_body:block, |
602 | $third_body:block, |
603 | $fourth_body:block, |
604 | $slf:ident, |
605 | $non_ascii:ident, |
606 | $first_minus_offset:ident, |
607 | $second:ident, |
608 | $second_minus_offset:ident, |
609 | $unread_handle_second:ident, |
610 | $third:ident, |
611 | $third_minus_offset:ident, |
612 | $unread_handle_third:ident, |
613 | $fourth:ident, |
614 | $fourth_minus_offset:ident, |
615 | $unread_handle_fourth:ident, |
616 | $source:ident, |
617 | $handle:ident, |
618 | $outermost:tt |
619 | ) => { |
620 | gb18030_decoder_function!( |
621 | $first_body, |
622 | $second_body, |
623 | $third_body, |
624 | $fourth_body, |
625 | $slf, |
626 | $non_ascii, |
627 | $first_minus_offset, |
628 | $second, |
629 | $second_minus_offset, |
630 | $unread_handle_second, |
631 | $third, |
632 | $third_minus_offset, |
633 | $unread_handle_third, |
634 | $fourth, |
635 | $fourth_minus_offset, |
636 | $unread_handle_fourth, |
637 | $source, |
638 | $handle, |
639 | $outermost, |
640 | decode_to_utf8_raw, |
641 | u8, |
642 | Utf8Destination |
643 | ); |
644 | gb18030_decoder_function!( |
645 | $first_body, |
646 | $second_body, |
647 | $third_body, |
648 | $fourth_body, |
649 | $slf, |
650 | $non_ascii, |
651 | $first_minus_offset, |
652 | $second, |
653 | $second_minus_offset, |
654 | $unread_handle_second, |
655 | $third, |
656 | $third_minus_offset, |
657 | $unread_handle_third, |
658 | $fourth, |
659 | $fourth_minus_offset, |
660 | $unread_handle_fourth, |
661 | $source, |
662 | $handle, |
663 | $outermost, |
664 | decode_to_utf16_raw, |
665 | u16, |
666 | Utf16Destination |
667 | ); |
668 | }; |
669 | } |
670 | |
671 | macro_rules! euc_jp_decoder_function { |
672 | ($jis0802_trail_body:block, |
673 | $jis0812_lead_body:block, |
674 | $jis0812_trail_body:block, |
675 | $half_width_katakana_body:block, |
676 | $slf:ident, |
677 | $non_ascii:ident, |
678 | $jis0208_lead_minus_offset:ident, |
679 | $byte:ident, |
680 | $unread_handle_trail:ident, |
681 | $jis0212_lead_minus_offset:ident, |
682 | $lead:ident, |
683 | $unread_handle_jis0212:ident, |
684 | $source:ident, |
685 | $handle:ident, |
686 | $name:ident, |
687 | $code_unit:ty, |
688 | $dest_struct:ident) => ( |
689 | #[cfg_attr(feature = "cargo-clippy" , allow(never_loop))] |
690 | pub fn $name(&mut $slf, |
691 | src: &[u8], |
692 | dst: &mut [$code_unit], |
693 | last: bool) |
694 | -> (DecoderResult, usize, usize) { |
695 | let mut $source = ByteSource::new(src); |
696 | let mut dest = $dest_struct::new(dst); |
697 | while !$slf.pending.is_none() { |
698 | match $source.check_available() { |
699 | Space::Full(src_consumed) => { |
700 | if last { |
701 | // Start non-boilerplate |
702 | let count = $slf.pending.count(); |
703 | $slf.pending = EucJpPending::None; |
704 | return (DecoderResult::Malformed(count as u8, 0), |
705 | src_consumed, |
706 | dest.written()); |
707 | // End non-boilerplate |
708 | } |
709 | return (DecoderResult::InputEmpty, src_consumed, dest.written()); |
710 | } |
711 | Space::Available(source_handle) => { |
712 | match dest.check_space_bmp() { |
713 | Space::Full(dst_written) => { |
714 | return (DecoderResult::OutputFull, |
715 | source_handle.consumed(), |
716 | dst_written); |
717 | } |
718 | Space::Available($handle) => { |
719 | let ($byte, $unread_handle_trail) = source_handle.read(); |
720 | match $slf.pending { |
721 | EucJpPending::Jis0208Lead($jis0208_lead_minus_offset) => { |
722 | $slf.pending = EucJpPending::None; |
723 | // Start non-boilerplate |
724 | $jis0802_trail_body |
725 | // End non-boilerplate |
726 | } |
727 | EucJpPending::Jis0212Shift => { |
728 | $slf.pending = EucJpPending::None; |
729 | let $lead = $byte; |
730 | let $unread_handle_jis0212 = $unread_handle_trail; |
731 | let $jis0212_lead_minus_offset = { |
732 | // Start non-boilerplate |
733 | $jis0812_lead_body |
734 | // End non-boilerplate |
735 | }; |
736 | $slf.pending = |
737 | EucJpPending::Jis0212Lead($jis0212_lead_minus_offset); |
738 | $handle.commit() |
739 | } |
740 | EucJpPending::Jis0212Lead($jis0212_lead_minus_offset) => { |
741 | $slf.pending = EucJpPending::None; |
742 | // Start non-boilerplate |
743 | $jis0812_trail_body |
744 | // End non-boilerplate |
745 | } |
746 | EucJpPending::HalfWidthKatakana => { |
747 | $slf.pending = EucJpPending::None; |
748 | // Start non-boilerplate |
749 | $half_width_katakana_body |
750 | // End non-boilerplate |
751 | } |
752 | EucJpPending::None => unreachable!("Checked in loop condition" ), |
753 | }; |
754 | } |
755 | } |
756 | } |
757 | } |
758 | } |
759 | 'outermost: loop { |
760 | match dest.copy_ascii_from_check_space_bmp(&mut $source) { |
761 | CopyAsciiResult::Stop(ret) => return ret, |
762 | CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => { |
763 | 'middle: loop { |
764 | let dest_again = { |
765 | // If lead is between 0xA1 and 0xFE, inclusive, |
766 | // subtract 0xA1. Else if lead is 0x8E, handle the |
767 | // next byte as half-width Katakana. Else if lead is |
768 | // 0x8F, expect JIS 0212. |
769 | let $jis0208_lead_minus_offset = $non_ascii.wrapping_sub(0xA1); |
770 | if $jis0208_lead_minus_offset <= (0xFE - 0xA1) { |
771 | // JIS 0208 |
772 | match $source.check_available() { |
773 | Space::Full(src_consumed_trail) => { |
774 | if last { |
775 | return (DecoderResult::Malformed(1, 0), |
776 | src_consumed_trail, |
777 | $handle.written()); |
778 | } |
779 | $slf.pending = |
780 | EucJpPending::Jis0208Lead($jis0208_lead_minus_offset); |
781 | return (DecoderResult::InputEmpty, |
782 | src_consumed_trail, |
783 | $handle.written()); |
784 | } |
785 | Space::Available(source_handle_trail) => { |
786 | let ($byte, $unread_handle_trail) = |
787 | source_handle_trail.read(); |
788 | // Start non-boilerplate |
789 | $jis0802_trail_body |
790 | // End non-boilerplate |
791 | } |
792 | } |
793 | } else if $non_ascii == 0x8F { |
794 | match $source.check_available() { |
795 | Space::Full(src_consumed_jis0212) => { |
796 | if last { |
797 | return (DecoderResult::Malformed(1, 0), |
798 | src_consumed_jis0212, |
799 | $handle.written()); |
800 | } |
801 | $slf.pending = EucJpPending::Jis0212Shift; |
802 | return (DecoderResult::InputEmpty, |
803 | src_consumed_jis0212, |
804 | $handle.written()); |
805 | } |
806 | Space::Available(source_handle_jis0212) => { |
807 | let ($lead, $unread_handle_jis0212) = |
808 | source_handle_jis0212.read(); |
809 | let $jis0212_lead_minus_offset = { |
810 | // Start non-boilerplate |
811 | $jis0812_lead_body |
812 | // End non-boilerplate |
813 | }; |
814 | match $unread_handle_jis0212.commit().check_available() { |
815 | Space::Full(src_consumed_trail) => { |
816 | if last { |
817 | return (DecoderResult::Malformed(2, 0), |
818 | src_consumed_trail, |
819 | $handle.written()); |
820 | } |
821 | $slf.pending = EucJpPending::Jis0212Lead($jis0212_lead_minus_offset); |
822 | return (DecoderResult::InputEmpty, |
823 | src_consumed_trail, |
824 | $handle.written()); |
825 | } |
826 | Space::Available(source_handle_trail) => { |
827 | let ($byte, $unread_handle_trail) = |
828 | source_handle_trail.read(); |
829 | // Start non-boilerplate |
830 | $jis0812_trail_body |
831 | // End non-boilerplate |
832 | } |
833 | } |
834 | } |
835 | } |
836 | } else if $non_ascii == 0x8E { |
837 | match $source.check_available() { |
838 | Space::Full(src_consumed_trail) => { |
839 | if last { |
840 | return (DecoderResult::Malformed(1, 0), |
841 | src_consumed_trail, |
842 | $handle.written()); |
843 | } |
844 | $slf.pending = EucJpPending::HalfWidthKatakana; |
845 | return (DecoderResult::InputEmpty, |
846 | src_consumed_trail, |
847 | $handle.written()); |
848 | } |
849 | Space::Available(source_handle_trail) => { |
850 | let ($byte, $unread_handle_trail) = |
851 | source_handle_trail.read(); |
852 | // Start non-boilerplate |
853 | $half_width_katakana_body |
854 | // End non-boilerplate |
855 | } |
856 | } |
857 | } else { |
858 | return (DecoderResult::Malformed(1, 0), |
859 | $source.consumed(), |
860 | $handle.written()); |
861 | } |
862 | }; |
863 | match $source.check_available() { |
864 | Space::Full(src_consumed) => { |
865 | return (DecoderResult::InputEmpty, |
866 | src_consumed, |
867 | dest_again.written()); |
868 | } |
869 | Space::Available(source_handle) => { |
870 | match dest_again.check_space_bmp() { |
871 | Space::Full(dst_written) => { |
872 | return (DecoderResult::OutputFull, |
873 | source_handle.consumed(), |
874 | dst_written); |
875 | } |
876 | Space::Available(destination_handle) => { |
877 | let (b, _) = source_handle.read(); |
878 | loop { |
879 | if b > 127 { |
880 | $non_ascii = b; |
881 | $handle = destination_handle; |
882 | continue 'middle; |
883 | } |
884 | // Testing on Haswell says that we should write the |
885 | // byte unconditionally instead of trying to unread it |
886 | // to make it part of the next SIMD stride. |
887 | destination_handle.write_ascii(b); |
888 | // We've got markup or ASCII text |
889 | continue 'outermost; |
890 | } |
891 | } |
892 | } |
893 | } |
894 | } |
895 | } |
896 | } |
897 | } |
898 | } |
899 | }); |
900 | } |
901 | |
902 | macro_rules! euc_jp_decoder_functions { |
903 | ( |
904 | $jis0802_trail_body:block, |
905 | $jis0812_lead_body:block, |
906 | $jis0812_trail_body:block, |
907 | $half_width_katakana_body:block, |
908 | $slf:ident, |
909 | $non_ascii:ident, |
910 | $jis0208_lead_minus_offset:ident, |
911 | $byte:ident, |
912 | $unread_handle_trail:ident, |
913 | $jis0212_lead_minus_offset:ident, |
914 | $lead:ident, |
915 | $unread_handle_jis0212:ident, |
916 | $source:ident, |
917 | $handle:ident |
918 | ) => { |
919 | euc_jp_decoder_function!( |
920 | $jis0802_trail_body, |
921 | $jis0812_lead_body, |
922 | $jis0812_trail_body, |
923 | $half_width_katakana_body, |
924 | $slf, |
925 | $non_ascii, |
926 | $jis0208_lead_minus_offset, |
927 | $byte, |
928 | $unread_handle_trail, |
929 | $jis0212_lead_minus_offset, |
930 | $lead, |
931 | $unread_handle_jis0212, |
932 | $source, |
933 | $handle, |
934 | decode_to_utf8_raw, |
935 | u8, |
936 | Utf8Destination |
937 | ); |
938 | euc_jp_decoder_function!( |
939 | $jis0802_trail_body, |
940 | $jis0812_lead_body, |
941 | $jis0812_trail_body, |
942 | $half_width_katakana_body, |
943 | $slf, |
944 | $non_ascii, |
945 | $jis0208_lead_minus_offset, |
946 | $byte, |
947 | $unread_handle_trail, |
948 | $jis0212_lead_minus_offset, |
949 | $lead, |
950 | $unread_handle_jis0212, |
951 | $source, |
952 | $handle, |
953 | decode_to_utf16_raw, |
954 | u16, |
955 | Utf16Destination |
956 | ); |
957 | }; |
958 | } |
959 | |
960 | macro_rules! encoder_function { |
961 | ($eof:block, |
962 | $body:block, |
963 | $slf:ident, |
964 | $src_consumed:ident, |
965 | $source:ident, |
966 | $dest:ident, |
967 | $c:ident, |
968 | $destination_handle:ident, |
969 | $unread_handle:ident, |
970 | $destination_check:ident, |
971 | $name:ident, |
972 | $input:ty, |
973 | $source_struct:ident) => ( |
974 | pub fn $name(&mut $slf, |
975 | src: &$input, |
976 | dst: &mut [u8], |
977 | last: bool) |
978 | -> (EncoderResult, usize, usize) { |
979 | let mut $source = $source_struct::new(src); |
980 | let mut $dest = ByteDestination::new(dst); |
981 | loop { |
982 | match $source.check_available() { |
983 | Space::Full($src_consumed) => { |
984 | if last { |
985 | // Start non-boilerplate |
986 | $eof |
987 | // End non-boilerplate |
988 | } |
989 | return (EncoderResult::InputEmpty, $src_consumed, $dest.written()); |
990 | } |
991 | Space::Available(source_handle) => { |
992 | match $dest.$destination_check() { |
993 | Space::Full(dst_written) => { |
994 | return (EncoderResult::OutputFull, |
995 | source_handle.consumed(), |
996 | dst_written); |
997 | } |
998 | Space::Available($destination_handle) => { |
999 | let ($c, $unread_handle) = source_handle.read(); |
1000 | // Start non-boilerplate |
1001 | $body |
1002 | // End non-boilerplate |
1003 | } |
1004 | } |
1005 | } |
1006 | } |
1007 | } |
1008 | }); |
1009 | } |
1010 | |
1011 | macro_rules! encoder_functions { |
1012 | ( |
1013 | $eof:block, |
1014 | $body:block, |
1015 | $slf:ident, |
1016 | $src_consumed:ident, |
1017 | $source:ident, |
1018 | $dest:ident, |
1019 | $c:ident, |
1020 | $destination_handle:ident, |
1021 | $unread_handle:ident, |
1022 | $destination_check:ident |
1023 | ) => { |
1024 | encoder_function!( |
1025 | $eof, |
1026 | $body, |
1027 | $slf, |
1028 | $src_consumed, |
1029 | $source, |
1030 | $dest, |
1031 | $c, |
1032 | $destination_handle, |
1033 | $unread_handle, |
1034 | $destination_check, |
1035 | encode_from_utf8_raw, |
1036 | str, |
1037 | Utf8Source |
1038 | ); |
1039 | encoder_function!( |
1040 | $eof, |
1041 | $body, |
1042 | $slf, |
1043 | $src_consumed, |
1044 | $source, |
1045 | $dest, |
1046 | $c, |
1047 | $destination_handle, |
1048 | $unread_handle, |
1049 | $destination_check, |
1050 | encode_from_utf16_raw, |
1051 | [u16], |
1052 | Utf16Source |
1053 | ); |
1054 | }; |
1055 | } |
1056 | |
1057 | macro_rules! ascii_compatible_encoder_function { |
1058 | ($bmp_body:block, |
1059 | $astral_body:block, |
1060 | $bmp:ident, |
1061 | $astral:ident, |
1062 | $slf:ident, |
1063 | $source:ident, |
1064 | $handle:ident, |
1065 | $copy_ascii:ident, |
1066 | $destination_check:ident, |
1067 | $name:ident, |
1068 | $input:ty, |
1069 | $source_struct:ident, |
1070 | $ascii_punctuation:expr) => ( |
1071 | pub fn $name(&mut $slf, |
1072 | src: &$input, |
1073 | dst: &mut [u8], |
1074 | _last: bool) |
1075 | -> (EncoderResult, usize, usize) { |
1076 | let mut $source = $source_struct::new(src); |
1077 | let mut dest = ByteDestination::new(dst); |
1078 | 'outermost: loop { |
1079 | match $source.$copy_ascii(&mut dest) { |
1080 | CopyAsciiResult::Stop(ret) => return ret, |
1081 | CopyAsciiResult::GoOn((mut non_ascii, mut $handle)) => { |
1082 | 'middle: loop { |
1083 | let dest_again = match non_ascii { |
1084 | NonAscii::BmpExclAscii($bmp) => { |
1085 | // Start non-boilerplate |
1086 | $bmp_body |
1087 | // End non-boilerplate |
1088 | } |
1089 | NonAscii::Astral($astral) => { |
1090 | // Start non-boilerplate |
1091 | $astral_body |
1092 | // End non-boilerplate |
1093 | } |
1094 | }; |
1095 | match $source.check_available() { |
1096 | Space::Full(src_consumed) => { |
1097 | return (EncoderResult::InputEmpty, |
1098 | src_consumed, |
1099 | dest_again.written()); |
1100 | } |
1101 | Space::Available(source_handle) => { |
1102 | match dest_again.$destination_check() { |
1103 | Space::Full(dst_written) => { |
1104 | return (EncoderResult::OutputFull, |
1105 | source_handle.consumed(), |
1106 | dst_written); |
1107 | } |
1108 | Space::Available(mut destination_handle) => { |
1109 | let (mut c, unread_handle) = source_handle.read_enum(); |
1110 | let source_again = unread_handle.commit(); |
1111 | 'innermost: loop { |
1112 | let ascii = match c { |
1113 | Unicode::NonAscii(non_ascii_again) => { |
1114 | non_ascii = non_ascii_again; |
1115 | $handle = destination_handle; |
1116 | continue 'middle; |
1117 | } |
1118 | Unicode::Ascii(a) => a, |
1119 | }; |
1120 | // Testing on Haswell says that we should write the |
1121 | // byte unconditionally instead of trying to unread it |
1122 | // to make it part of the next SIMD stride. |
1123 | let dest_again_again = |
1124 | destination_handle.write_one(ascii); |
1125 | if $ascii_punctuation && ascii < 60 { |
1126 | // We've got punctuation |
1127 | match source_again.check_available() { |
1128 | Space::Full(src_consumed_again) => { |
1129 | return (EncoderResult::InputEmpty, |
1130 | src_consumed_again, |
1131 | dest_again_again.written()); |
1132 | } |
1133 | Space::Available(source_handle_again) => { |
1134 | match dest_again_again.$destination_check() { |
1135 | Space::Full(dst_written_again) => { |
1136 | return (EncoderResult::OutputFull, |
1137 | source_handle_again.consumed(), |
1138 | dst_written_again); |
1139 | } |
1140 | Space::Available(destination_handle_again) => { |
1141 | { |
1142 | let (c_again, _unread_handle_again) = |
1143 | source_handle_again.read_enum(); |
1144 | c = c_again; |
1145 | destination_handle = destination_handle_again; |
1146 | continue 'innermost; |
1147 | } |
1148 | } |
1149 | } |
1150 | } |
1151 | } |
1152 | } |
1153 | // We've got markup or ASCII text |
1154 | continue 'outermost; |
1155 | } |
1156 | } |
1157 | } |
1158 | } |
1159 | } |
1160 | } |
1161 | } |
1162 | } |
1163 | } |
1164 | }); |
1165 | } |
1166 | |
1167 | macro_rules! ascii_compatible_encoder_functions { |
1168 | ( |
1169 | $bmp_body:block, |
1170 | $astral_body:block, |
1171 | $bmp:ident, |
1172 | $astral:ident, |
1173 | $slf:ident, |
1174 | $source:ident, |
1175 | $handle:ident, |
1176 | $copy_ascii:ident, |
1177 | $destination_check:ident, |
1178 | $ascii_punctuation:expr |
1179 | ) => { |
1180 | ascii_compatible_encoder_function!( |
1181 | $bmp_body, |
1182 | $astral_body, |
1183 | $bmp, |
1184 | $astral, |
1185 | $slf, |
1186 | $source, |
1187 | $handle, |
1188 | $copy_ascii, |
1189 | $destination_check, |
1190 | encode_from_utf8_raw, |
1191 | str, |
1192 | Utf8Source, |
1193 | $ascii_punctuation |
1194 | ); |
1195 | ascii_compatible_encoder_function!( |
1196 | $bmp_body, |
1197 | $astral_body, |
1198 | $bmp, |
1199 | $astral, |
1200 | $slf, |
1201 | $source, |
1202 | $handle, |
1203 | $copy_ascii, |
1204 | $destination_check, |
1205 | encode_from_utf16_raw, |
1206 | [u16], |
1207 | Utf16Source, |
1208 | $ascii_punctuation |
1209 | ); |
1210 | }; |
1211 | } |
1212 | |
1213 | macro_rules! ascii_compatible_bmp_encoder_function { |
1214 | ( |
1215 | $bmp_body:block, |
1216 | $bmp:ident, |
1217 | $slf:ident, |
1218 | $source:ident, |
1219 | $handle:ident, |
1220 | $copy_ascii:ident, |
1221 | $destination_check:ident, |
1222 | $name:ident, |
1223 | $input:ty, |
1224 | $source_struct:ident, |
1225 | $ascii_punctuation:expr |
1226 | ) => { |
1227 | ascii_compatible_encoder_function!( |
1228 | $bmp_body, |
1229 | { |
1230 | return ( |
1231 | EncoderResult::Unmappable(astral), |
1232 | $source.consumed(), |
1233 | $handle.written(), |
1234 | ); |
1235 | }, |
1236 | $bmp, |
1237 | astral, |
1238 | $slf, |
1239 | $source, |
1240 | $handle, |
1241 | $copy_ascii, |
1242 | $destination_check, |
1243 | $name, |
1244 | $input, |
1245 | $source_struct, |
1246 | $ascii_punctuation |
1247 | ); |
1248 | }; |
1249 | } |
1250 | |
1251 | macro_rules! ascii_compatible_bmp_encoder_functions { |
1252 | ( |
1253 | $bmp_body:block, |
1254 | $bmp:ident, |
1255 | $slf:ident, |
1256 | $source:ident, |
1257 | $handle:ident, |
1258 | $copy_ascii:ident, |
1259 | $destination_check:ident, |
1260 | $ascii_punctuation:expr |
1261 | ) => { |
1262 | ascii_compatible_encoder_functions!( |
1263 | $bmp_body, |
1264 | { |
1265 | return ( |
1266 | EncoderResult::Unmappable(astral), |
1267 | $source.consumed(), |
1268 | $handle.written(), |
1269 | ); |
1270 | }, |
1271 | $bmp, |
1272 | astral, |
1273 | $slf, |
1274 | $source, |
1275 | $handle, |
1276 | $copy_ascii, |
1277 | $destination_check, |
1278 | $ascii_punctuation |
1279 | ); |
1280 | }; |
1281 | } |
1282 | |
1283 | macro_rules! public_decode_function{ |
1284 | ($(#[$meta:meta])*, |
1285 | $decode_to_utf:ident, |
1286 | $decode_to_utf_raw:ident, |
1287 | $decode_to_utf_checking_end:ident, |
1288 | $decode_to_utf_after_one_potential_bom_byte:ident, |
1289 | $decode_to_utf_after_two_potential_bom_bytes:ident, |
1290 | $decode_to_utf_checking_end_with_offset:ident, |
1291 | $code_unit:ty) => ( |
1292 | $(#[$meta])* |
1293 | pub fn $decode_to_utf(&mut self, |
1294 | src: &[u8], |
1295 | dst: &mut [$code_unit], |
1296 | last: bool) |
1297 | -> (DecoderResult, usize, usize) { |
1298 | let mut offset = 0usize; |
1299 | loop { |
1300 | match self.life_cycle { |
1301 | // The common case. (Post-sniffing.) |
1302 | DecoderLifeCycle::Converting => { |
1303 | return self.$decode_to_utf_checking_end(src, dst, last); |
1304 | } |
1305 | // The rest is all BOM sniffing! |
1306 | DecoderLifeCycle::AtStart => { |
1307 | debug_assert_eq!(offset, 0usize); |
1308 | if src.is_empty() { |
1309 | return (DecoderResult::InputEmpty, 0, 0); |
1310 | } |
1311 | match src[0] { |
1312 | 0xEFu8 => { |
1313 | self.life_cycle = DecoderLifeCycle::SeenUtf8First; |
1314 | offset += 1; |
1315 | continue; |
1316 | } |
1317 | 0xFEu8 => { |
1318 | self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst; |
1319 | offset += 1; |
1320 | continue; |
1321 | } |
1322 | 0xFFu8 => { |
1323 | self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst; |
1324 | offset += 1; |
1325 | continue; |
1326 | } |
1327 | _ => { |
1328 | self.life_cycle = DecoderLifeCycle::Converting; |
1329 | continue; |
1330 | } |
1331 | } |
1332 | } |
1333 | DecoderLifeCycle::AtUtf8Start => { |
1334 | debug_assert_eq!(offset, 0usize); |
1335 | if src.is_empty() { |
1336 | return (DecoderResult::InputEmpty, 0, 0); |
1337 | } |
1338 | match src[0] { |
1339 | 0xEFu8 => { |
1340 | self.life_cycle = DecoderLifeCycle::SeenUtf8First; |
1341 | offset += 1; |
1342 | continue; |
1343 | } |
1344 | _ => { |
1345 | self.life_cycle = DecoderLifeCycle::Converting; |
1346 | continue; |
1347 | } |
1348 | } |
1349 | } |
1350 | DecoderLifeCycle::AtUtf16BeStart => { |
1351 | debug_assert_eq!(offset, 0usize); |
1352 | if src.is_empty() { |
1353 | return (DecoderResult::InputEmpty, 0, 0); |
1354 | } |
1355 | match src[0] { |
1356 | 0xFEu8 => { |
1357 | self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst; |
1358 | offset += 1; |
1359 | continue; |
1360 | } |
1361 | _ => { |
1362 | self.life_cycle = DecoderLifeCycle::Converting; |
1363 | continue; |
1364 | } |
1365 | } |
1366 | } |
1367 | DecoderLifeCycle::AtUtf16LeStart => { |
1368 | debug_assert_eq!(offset, 0usize); |
1369 | if src.is_empty() { |
1370 | return (DecoderResult::InputEmpty, 0, 0); |
1371 | } |
1372 | match src[0] { |
1373 | 0xFFu8 => { |
1374 | self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst; |
1375 | offset += 1; |
1376 | continue; |
1377 | } |
1378 | _ => { |
1379 | self.life_cycle = DecoderLifeCycle::Converting; |
1380 | continue; |
1381 | } |
1382 | } |
1383 | } |
1384 | DecoderLifeCycle::SeenUtf8First => { |
1385 | if offset >= src.len() { |
1386 | if last { |
1387 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1388 | dst, |
1389 | last, |
1390 | offset, |
1391 | 0xEFu8); |
1392 | } |
1393 | return (DecoderResult::InputEmpty, offset, 0); |
1394 | } |
1395 | if src[offset] == 0xBBu8 { |
1396 | self.life_cycle = DecoderLifeCycle::SeenUtf8Second; |
1397 | offset += 1; |
1398 | continue; |
1399 | } |
1400 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1401 | dst, |
1402 | last, |
1403 | offset, |
1404 | 0xEFu8); |
1405 | } |
1406 | DecoderLifeCycle::SeenUtf8Second => { |
1407 | if offset >= src.len() { |
1408 | if last { |
1409 | return self.$decode_to_utf_after_two_potential_bom_bytes(src, |
1410 | dst, |
1411 | last, |
1412 | offset); |
1413 | } |
1414 | return (DecoderResult::InputEmpty, offset, 0); |
1415 | } |
1416 | if src[offset] == 0xBFu8 { |
1417 | self.life_cycle = DecoderLifeCycle::Converting; |
1418 | offset += 1; |
1419 | if self.encoding != UTF_8 { |
1420 | self.encoding = UTF_8; |
1421 | self.variant = UTF_8.new_variant_decoder(); |
1422 | } |
1423 | return self.$decode_to_utf_checking_end_with_offset(src, |
1424 | dst, |
1425 | last, |
1426 | offset); |
1427 | } |
1428 | return self.$decode_to_utf_after_two_potential_bom_bytes(src, |
1429 | dst, |
1430 | last, |
1431 | offset); |
1432 | } |
1433 | DecoderLifeCycle::SeenUtf16BeFirst => { |
1434 | if offset >= src.len() { |
1435 | if last { |
1436 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1437 | dst, |
1438 | last, |
1439 | offset, |
1440 | 0xFEu8); |
1441 | } |
1442 | return (DecoderResult::InputEmpty, offset, 0); |
1443 | } |
1444 | if src[offset] == 0xFFu8 { |
1445 | self.life_cycle = DecoderLifeCycle::Converting; |
1446 | offset += 1; |
1447 | if self.encoding != UTF_16BE { |
1448 | self.encoding = UTF_16BE; |
1449 | self.variant = UTF_16BE.new_variant_decoder(); |
1450 | } |
1451 | return self.$decode_to_utf_checking_end_with_offset(src, |
1452 | dst, |
1453 | last, |
1454 | offset); |
1455 | } |
1456 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1457 | dst, |
1458 | last, |
1459 | offset, |
1460 | 0xFEu8); |
1461 | } |
1462 | DecoderLifeCycle::SeenUtf16LeFirst => { |
1463 | if offset >= src.len() { |
1464 | if last { |
1465 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1466 | dst, |
1467 | last, |
1468 | offset, |
1469 | 0xFFu8); |
1470 | } |
1471 | return (DecoderResult::InputEmpty, offset, 0); |
1472 | } |
1473 | if src[offset] == 0xFEu8 { |
1474 | self.life_cycle = DecoderLifeCycle::Converting; |
1475 | offset += 1; |
1476 | if self.encoding != UTF_16LE { |
1477 | self.encoding = UTF_16LE; |
1478 | self.variant = UTF_16LE.new_variant_decoder(); |
1479 | } |
1480 | return self.$decode_to_utf_checking_end_with_offset(src, |
1481 | dst, |
1482 | last, |
1483 | offset); |
1484 | } |
1485 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1486 | dst, |
1487 | last, |
1488 | offset, |
1489 | 0xFFu8); |
1490 | } |
1491 | DecoderLifeCycle::ConvertingWithPendingBB => { |
1492 | debug_assert_eq!(offset, 0usize); |
1493 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1494 | dst, |
1495 | last, |
1496 | 0usize, |
1497 | 0xBBu8); |
1498 | } |
1499 | DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished." ), |
1500 | } |
1501 | } |
1502 | } |
1503 | |
1504 | fn $decode_to_utf_after_one_potential_bom_byte(&mut self, |
1505 | src: &[u8], |
1506 | dst: &mut [$code_unit], |
1507 | last: bool, |
1508 | offset: usize, |
1509 | first_byte: u8) |
1510 | -> (DecoderResult, usize, usize) { |
1511 | self.life_cycle = DecoderLifeCycle::Converting; |
1512 | if offset == 0usize { |
1513 | // First byte was seen previously. |
1514 | let first = [first_byte]; |
1515 | let mut out_read = 0usize; |
1516 | let (mut first_result, _, mut first_written) = |
1517 | self.variant |
1518 | .$decode_to_utf_raw(&first[..], dst, false); |
1519 | match first_result { |
1520 | DecoderResult::InputEmpty => { |
1521 | let (result, read, written) = |
1522 | self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last); |
1523 | first_result = result; |
1524 | out_read = read; // Overwrite, don't add! |
1525 | first_written += written; |
1526 | } |
1527 | DecoderResult::Malformed(_, _) => { |
1528 | // Wasn't read from `src`!, leave out_read to 0 |
1529 | } |
1530 | DecoderResult::OutputFull => { |
1531 | panic!("Output buffer must have been too small." ); |
1532 | } |
1533 | } |
1534 | return (first_result, out_read, first_written); |
1535 | } |
1536 | debug_assert_eq!(offset, 1usize); |
1537 | // The first byte is in `src`, so no need to push it separately. |
1538 | self.$decode_to_utf_checking_end(src, dst, last) |
1539 | } |
1540 | |
1541 | fn $decode_to_utf_after_two_potential_bom_bytes(&mut self, |
1542 | src: &[u8], |
1543 | dst: &mut [$code_unit], |
1544 | last: bool, |
1545 | offset: usize) |
1546 | -> (DecoderResult, usize, usize) { |
1547 | self.life_cycle = DecoderLifeCycle::Converting; |
1548 | if offset == 0usize { |
1549 | // The first two bytes are not in the current buffer.. |
1550 | let ef_bb = [0xEFu8, 0xBBu8]; |
1551 | let (mut first_result, mut first_read, mut first_written) = |
1552 | self.variant |
1553 | .$decode_to_utf_raw(&ef_bb[..], dst, false); |
1554 | match first_result { |
1555 | DecoderResult::InputEmpty => { |
1556 | let (result, read, written) = |
1557 | self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last); |
1558 | first_result = result; |
1559 | first_read = read; // Overwrite, don't add! |
1560 | first_written += written; |
1561 | } |
1562 | DecoderResult::Malformed(_, _) => { |
1563 | if first_read == 1usize { |
1564 | // The first byte was malformed. We need to handle |
1565 | // the second one, which isn't in `src`, later. |
1566 | self.life_cycle = DecoderLifeCycle::ConvertingWithPendingBB; |
1567 | } |
1568 | first_read = 0usize; // Wasn't read from `src`! |
1569 | } |
1570 | DecoderResult::OutputFull => { |
1571 | panic!("Output buffer must have been too small." ); |
1572 | } |
1573 | } |
1574 | return (first_result, first_read, first_written); |
1575 | } |
1576 | if offset == 1usize { |
1577 | // The first byte isn't in the current buffer but the second one |
1578 | // is. |
1579 | return self.$decode_to_utf_after_one_potential_bom_byte(src, |
1580 | dst, |
1581 | last, |
1582 | 0usize, |
1583 | 0xEFu8); |
1584 | |
1585 | } |
1586 | debug_assert_eq!(offset, 2usize); |
1587 | // The first two bytes are in `src`, so no need to push them separately. |
1588 | self.$decode_to_utf_checking_end(src, dst, last) |
1589 | } |
1590 | |
1591 | /// Calls `$decode_to_utf_checking_end` with `offset` bytes omitted from |
1592 | /// the start of `src` but adjusting the return values to show those bytes |
1593 | /// as having been consumed. |
1594 | fn $decode_to_utf_checking_end_with_offset(&mut self, |
1595 | src: &[u8], |
1596 | dst: &mut [$code_unit], |
1597 | last: bool, |
1598 | offset: usize) |
1599 | -> (DecoderResult, usize, usize) { |
1600 | debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting); |
1601 | let (result, read, written) = self.$decode_to_utf_checking_end(&src[offset..], dst, last); |
1602 | (result, read + offset, written) |
1603 | } |
1604 | |
1605 | /// Calls through to the delegate and adjusts life cycle iff `last` is |
1606 | /// `true` and result is `DecoderResult::InputEmpty`. |
1607 | fn $decode_to_utf_checking_end(&mut self, |
1608 | src: &[u8], |
1609 | dst: &mut [$code_unit], |
1610 | last: bool) |
1611 | -> (DecoderResult, usize, usize) { |
1612 | debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting); |
1613 | let (result, read, written) = self.variant |
1614 | .$decode_to_utf_raw(src, dst, last); |
1615 | if last { |
1616 | if let DecoderResult::InputEmpty = result { |
1617 | self.life_cycle = DecoderLifeCycle::Finished; |
1618 | } |
1619 | } |
1620 | (result, read, written) |
1621 | }); |
1622 | } |
1623 | |