1// Copyright Mozilla Foundation. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10macro_rules! decoder_function {
11 ($preamble:block,
12 $loop_preable:block,
13 $eof:block,
14 $body:block,
15 $slf:ident,
16 $src_consumed:ident,
17 $dest:ident,
18 $source:ident,
19 $b:ident,
20 $destination_handle:ident,
21 $unread_handle:ident,
22 $destination_check:ident,
23 $name:ident,
24 $code_unit:ty,
25 $dest_struct:ident) => (
26 pub fn $name(&mut $slf,
27 src: &[u8],
28 dst: &mut [$code_unit],
29 last: bool)
30 -> (DecoderResult, usize, usize) {
31 let mut $source = ByteSource::new(src);
32 let mut $dest = $dest_struct::new(dst);
33 loop { // TODO: remove this loop
34 {
35 // Start non-boilerplate
36 $preamble
37 // End non-boilerplate
38 }
39 loop {
40 {
41 $loop_preable
42 }
43 match $source.check_available() {
44 Space::Full($src_consumed) => {
45 if last {
46 // Start non-boilerplate
47 $eof
48 // End non-boilerplate
49 }
50 return (DecoderResult::InputEmpty, $src_consumed, $dest.written());
51 }
52 Space::Available(source_handle) => {
53 match $dest.$destination_check() {
54 Space::Full(dst_written) => {
55 return (DecoderResult::OutputFull,
56 source_handle.consumed(),
57 dst_written);
58 }
59 Space::Available($destination_handle) => {
60 let ($b, $unread_handle) = source_handle.read();
61 // Start non-boilerplate
62 $body
63 // End non-boilerplate
64 }
65 }
66 }
67 }
68 }
69 }
70 });
71}
72
73macro_rules! decoder_functions {
74 (
75 $preamble:block,
76 $loop_preable:block,
77 $eof:block,
78 $body:block,
79 $slf:ident,
80 $src_consumed:ident,
81 $dest:ident,
82 $source:ident,
83 $b:ident,
84 $destination_handle:ident,
85 $unread_handle:ident,
86 $destination_check:ident
87 ) => {
88 decoder_function!(
89 $preamble,
90 $loop_preable,
91 $eof,
92 $body,
93 $slf,
94 $src_consumed,
95 $dest,
96 $source,
97 $b,
98 $destination_handle,
99 $unread_handle,
100 $destination_check,
101 decode_to_utf8_raw,
102 u8,
103 Utf8Destination
104 );
105 decoder_function!(
106 $preamble,
107 $loop_preable,
108 $eof,
109 $body,
110 $slf,
111 $src_consumed,
112 $dest,
113 $source,
114 $b,
115 $destination_handle,
116 $unread_handle,
117 $destination_check,
118 decode_to_utf16_raw,
119 u16,
120 Utf16Destination
121 );
122 };
123}
124
125macro_rules! ascii_compatible_two_byte_decoder_function {
126 ($lead:block,
127 $trail:block,
128 $slf:ident,
129 $non_ascii:ident,
130 $byte:ident,
131 $lead_minus_offset:ident,
132 $unread_handle_trail:ident,
133 $source:ident,
134 $handle:ident,
135 $outermost:tt,
136 $copy_ascii:ident,
137 $destination_check:ident,
138 $name:ident,
139 $code_unit:ty,
140 $dest_struct:ident,
141 $ascii_punctuation:expr) => (
142 pub fn $name(&mut $slf,
143 src: &[u8],
144 dst: &mut [$code_unit],
145 last: bool)
146 -> (DecoderResult, usize, usize) {
147 let mut $source = ByteSource::new(src);
148 let mut dest_prolog = $dest_struct::new(dst);
149 let dest = match $slf.lead {
150 Some(lead) => {
151 let $lead_minus_offset = lead;
152 $slf.lead = None;
153 // Since we don't have `goto` we could use to jump into the trail
154 // handling part of the main loop, we need to repeat trail handling
155 // here.
156 match $source.check_available() {
157 Space::Full(src_consumed_prolog) => {
158 if last {
159 return (DecoderResult::Malformed(1, 0),
160 src_consumed_prolog,
161 dest_prolog.written());
162 }
163 return (DecoderResult::InputEmpty, src_consumed_prolog, dest_prolog.written());
164 }
165 Space::Available(source_handle_prolog) => {
166 match dest_prolog.$destination_check() {
167 Space::Full(dst_written_prolog) => {
168 return (DecoderResult::OutputFull,
169 source_handle_prolog.consumed(),
170 dst_written_prolog);
171 }
172 Space::Available($handle) => {
173 let ($byte, $unread_handle_trail) = source_handle_prolog.read();
174 // Start non-boilerplate
175 $trail
176 // End non-boilerplate
177 }
178 }
179 }
180 }
181 },
182 None => {
183 &mut dest_prolog
184 }
185 };
186 $outermost: loop {
187 match dest.$copy_ascii(&mut $source) {
188 CopyAsciiResult::Stop(ret) => return ret,
189 CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => {
190 'middle: loop {
191 let dest_again = {
192 let $lead_minus_offset = {
193 // Start non-boilerplate
194 $lead
195 // End non-boilerplate
196 };
197 match $source.check_available() {
198 Space::Full(src_consumed_trail) => {
199 if last {
200 return (DecoderResult::Malformed(1, 0),
201 src_consumed_trail,
202 $handle.written());
203 }
204 $slf.lead = Some($lead_minus_offset);
205 return (DecoderResult::InputEmpty,
206 src_consumed_trail,
207 $handle.written());
208 }
209 Space::Available(source_handle_trail) => {
210 let ($byte, $unread_handle_trail) = source_handle_trail.read();
211 // Start non-boilerplate
212 $trail
213 // End non-boilerplate
214 }
215 }
216 };
217 match $source.check_available() {
218 Space::Full(src_consumed) => {
219 return (DecoderResult::InputEmpty,
220 src_consumed,
221 dest_again.written());
222 }
223 Space::Available(source_handle) => {
224 match dest_again.$destination_check() {
225 Space::Full(dst_written) => {
226 return (DecoderResult::OutputFull,
227 source_handle.consumed(),
228 dst_written);
229 }
230 Space::Available(mut destination_handle) => {
231 let (mut b, unread_handle) = source_handle.read();
232 let source_again = unread_handle.commit();
233 'innermost: loop {
234 if b > 127 {
235 $non_ascii = b;
236 $handle = destination_handle;
237 continue 'middle;
238 }
239 // Testing on Haswell says that we should write the
240 // byte unconditionally instead of trying to unread it
241 // to make it part of the next SIMD stride.
242 let dest_again_again =
243 destination_handle.write_ascii(b);
244 if $ascii_punctuation && b < 60 {
245 // We've got punctuation
246 match source_again.check_available() {
247 Space::Full(src_consumed_again) => {
248 return (DecoderResult::InputEmpty,
249 src_consumed_again,
250 dest_again_again.written());
251 }
252 Space::Available(source_handle_again) => {
253 match dest_again_again.$destination_check() {
254 Space::Full(dst_written_again) => {
255 return (DecoderResult::OutputFull,
256 source_handle_again.consumed(),
257 dst_written_again);
258 }
259 Space::Available(destination_handle_again) => {
260 {
261 let (b_again, _unread_handle_again) =
262 source_handle_again.read();
263 b = b_again;
264 destination_handle = destination_handle_again;
265 continue 'innermost;
266 }
267 }
268 }
269 }
270 }
271 }
272 // We've got markup or ASCII text
273 continue $outermost;
274 }
275 }
276 }
277 }
278 }
279 }
280 }
281 }
282 }
283 });
284}
285
286macro_rules! ascii_compatible_two_byte_decoder_functions {
287 (
288 $lead:block,
289 $trail:block,
290 $slf:ident,
291 $non_ascii:ident,
292 $byte:ident,
293 $lead_minus_offset:ident,
294 $unread_handle_trail:ident,
295 $source:ident,
296 $handle:ident,
297 $outermost:tt,
298 $copy_ascii:ident,
299 $destination_check:ident,
300 $ascii_punctuation:expr
301 ) => {
302 ascii_compatible_two_byte_decoder_function!(
303 $lead,
304 $trail,
305 $slf,
306 $non_ascii,
307 $byte,
308 $lead_minus_offset,
309 $unread_handle_trail,
310 $source,
311 $handle,
312 $outermost,
313 $copy_ascii,
314 $destination_check,
315 decode_to_utf8_raw,
316 u8,
317 Utf8Destination,
318 $ascii_punctuation
319 );
320 ascii_compatible_two_byte_decoder_function!(
321 $lead,
322 $trail,
323 $slf,
324 $non_ascii,
325 $byte,
326 $lead_minus_offset,
327 $unread_handle_trail,
328 $source,
329 $handle,
330 $outermost,
331 $copy_ascii,
332 $destination_check,
333 decode_to_utf16_raw,
334 u16,
335 Utf16Destination,
336 $ascii_punctuation
337 );
338 };
339}
340
341macro_rules! gb18030_decoder_function {
342 ($first_body:block,
343 $second_body:block,
344 $third_body:block,
345 $fourth_body:block,
346 $slf:ident,
347 $non_ascii:ident,
348 $first_minus_offset:ident,
349 $second:ident,
350 $second_minus_offset:ident,
351 $unread_handle_second:ident,
352 $third:ident,
353 $third_minus_offset:ident,
354 $unread_handle_third:ident,
355 $fourth:ident,
356 $fourth_minus_offset:ident,
357 $unread_handle_fourth:ident,
358 $source:ident,
359 $handle:ident,
360 $outermost:tt,
361 $name:ident,
362 $code_unit:ty,
363 $dest_struct:ident) => (
364 #[cfg_attr(feature = "cargo-clippy", allow(never_loop))]
365 pub fn $name(&mut $slf,
366 src: &[u8],
367 dst: &mut [$code_unit],
368 last: bool)
369 -> (DecoderResult, usize, usize) {
370 let mut $source = ByteSource::new(src);
371 let mut dest = $dest_struct::new(dst);
372 {
373 if let Some(ascii) = $slf.pending_ascii {
374 match dest.check_space_bmp() {
375 Space::Full(_) => {
376 return (DecoderResult::OutputFull, 0, 0);
377 }
378 Space::Available(pending_ascii_handle) => {
379 $slf.pending_ascii = None;
380 pending_ascii_handle.write_ascii(ascii);
381 }
382 }
383 }
384 }
385 while !$slf.pending.is_none() {
386 match $source.check_available() {
387 Space::Full(src_consumed) => {
388 if last {
389 // Start non-boilerplate
390 let count = $slf.pending.count();
391 $slf.pending = Gb18030Pending::None;
392 return (DecoderResult::Malformed(count as u8, 0),
393 src_consumed,
394 dest.written());
395 // End non-boilerplate
396 }
397 return (DecoderResult::InputEmpty, src_consumed, dest.written());
398 }
399 Space::Available(source_handle) => {
400 match dest.check_space_astral() {
401 Space::Full(dst_written) => {
402 return (DecoderResult::OutputFull,
403 source_handle.consumed(),
404 dst_written);
405 }
406 Space::Available($handle) => {
407 let (byte, unread_handle) = source_handle.read();
408 match $slf.pending {
409 Gb18030Pending::One($first_minus_offset) => {
410 $slf.pending = Gb18030Pending::None;
411 let $second = byte;
412 let $unread_handle_second = unread_handle;
413 // If second is between 0x40 and 0x7E,
414 // inclusive, subtract offset 0x40. Else if
415 // second is between 0x80 and 0xFE, inclusive,
416 // subtract offset 0x41. In both cases,
417 // handle as a two-byte sequence.
418 // Else if second is between 0x30 and 0x39,
419 // inclusive, subtract offset 0x30 and
420 // handle as a four-byte sequence.
421 let $second_minus_offset = $second.wrapping_sub(0x30);
422 // It's not optimal to do this check first,
423 // but this results in more readable code.
424 if $second_minus_offset > (0x39 - 0x30) {
425 // Start non-boilerplate
426 $second_body
427 // End non-boilerplate
428 } else {
429 // Four-byte!
430 $slf.pending = Gb18030Pending::Two($first_minus_offset,
431 $second_minus_offset);
432 $handle.commit()
433 }
434 }
435 Gb18030Pending::Two($first_minus_offset, $second_minus_offset) => {
436 $slf.pending = Gb18030Pending::None;
437 let $third = byte;
438 let $unread_handle_third = unread_handle;
439 let $third_minus_offset = {
440 // Start non-boilerplate
441 $third_body
442 // End non-boilerplate
443 };
444 $slf.pending = Gb18030Pending::Three($first_minus_offset,
445 $second_minus_offset,
446 $third_minus_offset);
447 $handle.commit()
448 }
449 Gb18030Pending::Three($first_minus_offset,
450 $second_minus_offset,
451 $third_minus_offset) => {
452 $slf.pending = Gb18030Pending::None;
453 let $fourth = byte;
454 let $unread_handle_fourth = unread_handle;
455 // Start non-boilerplate
456 $fourth_body
457 // End non-boilerplate
458 }
459 Gb18030Pending::None => unreachable!("Checked in loop condition"),
460 };
461 }
462 }
463 }
464 }
465 }
466 $outermost: loop {
467 match dest.copy_ascii_from_check_space_astral(&mut $source) {
468 CopyAsciiResult::Stop(ret) => return ret,
469 CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => {
470 'middle: loop {
471 let dest_again = {
472 let $first_minus_offset = {
473 // Start non-boilerplate
474 $first_body
475 // End non-boilerplate
476 };
477 match $source.check_available() {
478 Space::Full(src_consumed_trail) => {
479 if last {
480 return (DecoderResult::Malformed(1, 0),
481 src_consumed_trail,
482 $handle.written());
483 }
484 $slf.pending = Gb18030Pending::One($first_minus_offset);
485 return (DecoderResult::InputEmpty,
486 src_consumed_trail,
487 $handle.written());
488 }
489 Space::Available(source_handle_trail) => {
490 let ($second, $unread_handle_second) = source_handle_trail.read();
491 // Start non-boilerplate
492 // If second is between 0x40 and 0x7E,
493 // inclusive, subtract offset 0x40. Else if
494 // second is between 0x80 and 0xFE, inclusive,
495 // subtract offset 0x41. In both cases,
496 // handle as a two-byte sequence.
497 // Else if second is between 0x30 and 0x39,
498 // inclusive, subtract offset 0x30 and
499 // handle as a four-byte sequence.
500 let $second_minus_offset = $second.wrapping_sub(0x30);
501 // It's not optimal to do this check first,
502 // but this results in more readable code.
503 if $second_minus_offset > (0x39 - 0x30) {
504 // Start non-boilerplate
505 $second_body
506 // End non-boilerplate
507 } else {
508 // Four-byte!
509 match $unread_handle_second.commit().check_available() {
510 Space::Full(src_consumed_third) => {
511 if last {
512 return (DecoderResult::Malformed(2, 0),
513 src_consumed_third,
514 $handle.written());
515 }
516 $slf.pending =
517 Gb18030Pending::Two($first_minus_offset,
518 $second_minus_offset);
519 return (DecoderResult::InputEmpty,
520 src_consumed_third,
521 $handle.written());
522 }
523 Space::Available(source_handle_third) => {
524 let ($third, $unread_handle_third) =
525 source_handle_third.read();
526 let $third_minus_offset = {
527 // Start non-boilerplate
528 $third_body
529 // End non-boilerplate
530 };
531 match $unread_handle_third.commit()
532 .check_available() {
533 Space::Full(src_consumed_fourth) => {
534 if last {
535 return (DecoderResult::Malformed(3, 0),
536 src_consumed_fourth,
537 $handle.written());
538 }
539 $slf.pending = Gb18030Pending::Three($first_minus_offset, $second_minus_offset, $third_minus_offset);
540 return (DecoderResult::InputEmpty,
541 src_consumed_fourth,
542 $handle.written());
543 }
544 Space::Available(source_handle_fourth) => {
545 let ($fourth, $unread_handle_fourth) =
546 source_handle_fourth.read();
547 // Start non-boilerplate
548 $fourth_body
549 // End non-boilerplate
550 }
551 }
552 }
553 }
554 }
555 // End non-boilerplate
556 }
557 }
558 };
559 match $source.check_available() {
560 Space::Full(src_consumed) => {
561 return (DecoderResult::InputEmpty,
562 src_consumed,
563 dest_again.written());
564 }
565 Space::Available(source_handle) => {
566 match dest_again.check_space_astral() {
567 Space::Full(dst_written) => {
568 return (DecoderResult::OutputFull,
569 source_handle.consumed(),
570 dst_written);
571 }
572 Space::Available(destination_handle) => {
573 let (b, _) = source_handle.read();
574 loop {
575 if b > 127 {
576 $non_ascii = b;
577 $handle = destination_handle;
578 continue 'middle;
579 }
580 // Testing on Haswell says that we should write the
581 // byte unconditionally instead of trying to unread it
582 // to make it part of the next SIMD stride.
583 destination_handle.write_ascii(b);
584 // We've got markup or ASCII text
585 continue $outermost;
586 }
587 }
588 }
589 }
590 }
591 }
592 }
593 }
594 }
595 });
596}
597
598macro_rules! gb18030_decoder_functions {
599 (
600 $first_body:block,
601 $second_body:block,
602 $third_body:block,
603 $fourth_body:block,
604 $slf:ident,
605 $non_ascii:ident,
606 $first_minus_offset:ident,
607 $second:ident,
608 $second_minus_offset:ident,
609 $unread_handle_second:ident,
610 $third:ident,
611 $third_minus_offset:ident,
612 $unread_handle_third:ident,
613 $fourth:ident,
614 $fourth_minus_offset:ident,
615 $unread_handle_fourth:ident,
616 $source:ident,
617 $handle:ident,
618 $outermost:tt
619 ) => {
620 gb18030_decoder_function!(
621 $first_body,
622 $second_body,
623 $third_body,
624 $fourth_body,
625 $slf,
626 $non_ascii,
627 $first_minus_offset,
628 $second,
629 $second_minus_offset,
630 $unread_handle_second,
631 $third,
632 $third_minus_offset,
633 $unread_handle_third,
634 $fourth,
635 $fourth_minus_offset,
636 $unread_handle_fourth,
637 $source,
638 $handle,
639 $outermost,
640 decode_to_utf8_raw,
641 u8,
642 Utf8Destination
643 );
644 gb18030_decoder_function!(
645 $first_body,
646 $second_body,
647 $third_body,
648 $fourth_body,
649 $slf,
650 $non_ascii,
651 $first_minus_offset,
652 $second,
653 $second_minus_offset,
654 $unread_handle_second,
655 $third,
656 $third_minus_offset,
657 $unread_handle_third,
658 $fourth,
659 $fourth_minus_offset,
660 $unread_handle_fourth,
661 $source,
662 $handle,
663 $outermost,
664 decode_to_utf16_raw,
665 u16,
666 Utf16Destination
667 );
668 };
669}
670
671macro_rules! euc_jp_decoder_function {
672 ($jis0802_trail_body:block,
673 $jis0812_lead_body:block,
674 $jis0812_trail_body:block,
675 $half_width_katakana_body:block,
676 $slf:ident,
677 $non_ascii:ident,
678 $jis0208_lead_minus_offset:ident,
679 $byte:ident,
680 $unread_handle_trail:ident,
681 $jis0212_lead_minus_offset:ident,
682 $lead:ident,
683 $unread_handle_jis0212:ident,
684 $source:ident,
685 $handle:ident,
686 $name:ident,
687 $code_unit:ty,
688 $dest_struct:ident) => (
689 #[cfg_attr(feature = "cargo-clippy", allow(never_loop))]
690 pub fn $name(&mut $slf,
691 src: &[u8],
692 dst: &mut [$code_unit],
693 last: bool)
694 -> (DecoderResult, usize, usize) {
695 let mut $source = ByteSource::new(src);
696 let mut dest = $dest_struct::new(dst);
697 while !$slf.pending.is_none() {
698 match $source.check_available() {
699 Space::Full(src_consumed) => {
700 if last {
701 // Start non-boilerplate
702 let count = $slf.pending.count();
703 $slf.pending = EucJpPending::None;
704 return (DecoderResult::Malformed(count as u8, 0),
705 src_consumed,
706 dest.written());
707 // End non-boilerplate
708 }
709 return (DecoderResult::InputEmpty, src_consumed, dest.written());
710 }
711 Space::Available(source_handle) => {
712 match dest.check_space_bmp() {
713 Space::Full(dst_written) => {
714 return (DecoderResult::OutputFull,
715 source_handle.consumed(),
716 dst_written);
717 }
718 Space::Available($handle) => {
719 let ($byte, $unread_handle_trail) = source_handle.read();
720 match $slf.pending {
721 EucJpPending::Jis0208Lead($jis0208_lead_minus_offset) => {
722 $slf.pending = EucJpPending::None;
723 // Start non-boilerplate
724 $jis0802_trail_body
725 // End non-boilerplate
726 }
727 EucJpPending::Jis0212Shift => {
728 $slf.pending = EucJpPending::None;
729 let $lead = $byte;
730 let $unread_handle_jis0212 = $unread_handle_trail;
731 let $jis0212_lead_minus_offset = {
732 // Start non-boilerplate
733 $jis0812_lead_body
734 // End non-boilerplate
735 };
736 $slf.pending =
737 EucJpPending::Jis0212Lead($jis0212_lead_minus_offset);
738 $handle.commit()
739 }
740 EucJpPending::Jis0212Lead($jis0212_lead_minus_offset) => {
741 $slf.pending = EucJpPending::None;
742 // Start non-boilerplate
743 $jis0812_trail_body
744 // End non-boilerplate
745 }
746 EucJpPending::HalfWidthKatakana => {
747 $slf.pending = EucJpPending::None;
748 // Start non-boilerplate
749 $half_width_katakana_body
750 // End non-boilerplate
751 }
752 EucJpPending::None => unreachable!("Checked in loop condition"),
753 };
754 }
755 }
756 }
757 }
758 }
759 'outermost: loop {
760 match dest.copy_ascii_from_check_space_bmp(&mut $source) {
761 CopyAsciiResult::Stop(ret) => return ret,
762 CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => {
763 'middle: loop {
764 let dest_again = {
765 // If lead is between 0xA1 and 0xFE, inclusive,
766 // subtract 0xA1. Else if lead is 0x8E, handle the
767 // next byte as half-width Katakana. Else if lead is
768 // 0x8F, expect JIS 0212.
769 let $jis0208_lead_minus_offset = $non_ascii.wrapping_sub(0xA1);
770 if $jis0208_lead_minus_offset <= (0xFE - 0xA1) {
771 // JIS 0208
772 match $source.check_available() {
773 Space::Full(src_consumed_trail) => {
774 if last {
775 return (DecoderResult::Malformed(1, 0),
776 src_consumed_trail,
777 $handle.written());
778 }
779 $slf.pending =
780 EucJpPending::Jis0208Lead($jis0208_lead_minus_offset);
781 return (DecoderResult::InputEmpty,
782 src_consumed_trail,
783 $handle.written());
784 }
785 Space::Available(source_handle_trail) => {
786 let ($byte, $unread_handle_trail) =
787 source_handle_trail.read();
788 // Start non-boilerplate
789 $jis0802_trail_body
790 // End non-boilerplate
791 }
792 }
793 } else if $non_ascii == 0x8F {
794 match $source.check_available() {
795 Space::Full(src_consumed_jis0212) => {
796 if last {
797 return (DecoderResult::Malformed(1, 0),
798 src_consumed_jis0212,
799 $handle.written());
800 }
801 $slf.pending = EucJpPending::Jis0212Shift;
802 return (DecoderResult::InputEmpty,
803 src_consumed_jis0212,
804 $handle.written());
805 }
806 Space::Available(source_handle_jis0212) => {
807 let ($lead, $unread_handle_jis0212) =
808 source_handle_jis0212.read();
809 let $jis0212_lead_minus_offset = {
810 // Start non-boilerplate
811 $jis0812_lead_body
812 // End non-boilerplate
813 };
814 match $unread_handle_jis0212.commit().check_available() {
815 Space::Full(src_consumed_trail) => {
816 if last {
817 return (DecoderResult::Malformed(2, 0),
818 src_consumed_trail,
819 $handle.written());
820 }
821 $slf.pending = EucJpPending::Jis0212Lead($jis0212_lead_minus_offset);
822 return (DecoderResult::InputEmpty,
823 src_consumed_trail,
824 $handle.written());
825 }
826 Space::Available(source_handle_trail) => {
827 let ($byte, $unread_handle_trail) =
828 source_handle_trail.read();
829 // Start non-boilerplate
830 $jis0812_trail_body
831 // End non-boilerplate
832 }
833 }
834 }
835 }
836 } else if $non_ascii == 0x8E {
837 match $source.check_available() {
838 Space::Full(src_consumed_trail) => {
839 if last {
840 return (DecoderResult::Malformed(1, 0),
841 src_consumed_trail,
842 $handle.written());
843 }
844 $slf.pending = EucJpPending::HalfWidthKatakana;
845 return (DecoderResult::InputEmpty,
846 src_consumed_trail,
847 $handle.written());
848 }
849 Space::Available(source_handle_trail) => {
850 let ($byte, $unread_handle_trail) =
851 source_handle_trail.read();
852 // Start non-boilerplate
853 $half_width_katakana_body
854 // End non-boilerplate
855 }
856 }
857 } else {
858 return (DecoderResult::Malformed(1, 0),
859 $source.consumed(),
860 $handle.written());
861 }
862 };
863 match $source.check_available() {
864 Space::Full(src_consumed) => {
865 return (DecoderResult::InputEmpty,
866 src_consumed,
867 dest_again.written());
868 }
869 Space::Available(source_handle) => {
870 match dest_again.check_space_bmp() {
871 Space::Full(dst_written) => {
872 return (DecoderResult::OutputFull,
873 source_handle.consumed(),
874 dst_written);
875 }
876 Space::Available(destination_handle) => {
877 let (b, _) = source_handle.read();
878 loop {
879 if b > 127 {
880 $non_ascii = b;
881 $handle = destination_handle;
882 continue 'middle;
883 }
884 // Testing on Haswell says that we should write the
885 // byte unconditionally instead of trying to unread it
886 // to make it part of the next SIMD stride.
887 destination_handle.write_ascii(b);
888 // We've got markup or ASCII text
889 continue 'outermost;
890 }
891 }
892 }
893 }
894 }
895 }
896 }
897 }
898 }
899 });
900}
901
902macro_rules! euc_jp_decoder_functions {
903 (
904 $jis0802_trail_body:block,
905 $jis0812_lead_body:block,
906 $jis0812_trail_body:block,
907 $half_width_katakana_body:block,
908 $slf:ident,
909 $non_ascii:ident,
910 $jis0208_lead_minus_offset:ident,
911 $byte:ident,
912 $unread_handle_trail:ident,
913 $jis0212_lead_minus_offset:ident,
914 $lead:ident,
915 $unread_handle_jis0212:ident,
916 $source:ident,
917 $handle:ident
918 ) => {
919 euc_jp_decoder_function!(
920 $jis0802_trail_body,
921 $jis0812_lead_body,
922 $jis0812_trail_body,
923 $half_width_katakana_body,
924 $slf,
925 $non_ascii,
926 $jis0208_lead_minus_offset,
927 $byte,
928 $unread_handle_trail,
929 $jis0212_lead_minus_offset,
930 $lead,
931 $unread_handle_jis0212,
932 $source,
933 $handle,
934 decode_to_utf8_raw,
935 u8,
936 Utf8Destination
937 );
938 euc_jp_decoder_function!(
939 $jis0802_trail_body,
940 $jis0812_lead_body,
941 $jis0812_trail_body,
942 $half_width_katakana_body,
943 $slf,
944 $non_ascii,
945 $jis0208_lead_minus_offset,
946 $byte,
947 $unread_handle_trail,
948 $jis0212_lead_minus_offset,
949 $lead,
950 $unread_handle_jis0212,
951 $source,
952 $handle,
953 decode_to_utf16_raw,
954 u16,
955 Utf16Destination
956 );
957 };
958}
959
960macro_rules! encoder_function {
961 ($eof:block,
962 $body:block,
963 $slf:ident,
964 $src_consumed:ident,
965 $source:ident,
966 $dest:ident,
967 $c:ident,
968 $destination_handle:ident,
969 $unread_handle:ident,
970 $destination_check:ident,
971 $name:ident,
972 $input:ty,
973 $source_struct:ident) => (
974 pub fn $name(&mut $slf,
975 src: &$input,
976 dst: &mut [u8],
977 last: bool)
978 -> (EncoderResult, usize, usize) {
979 let mut $source = $source_struct::new(src);
980 let mut $dest = ByteDestination::new(dst);
981 loop {
982 match $source.check_available() {
983 Space::Full($src_consumed) => {
984 if last {
985 // Start non-boilerplate
986 $eof
987 // End non-boilerplate
988 }
989 return (EncoderResult::InputEmpty, $src_consumed, $dest.written());
990 }
991 Space::Available(source_handle) => {
992 match $dest.$destination_check() {
993 Space::Full(dst_written) => {
994 return (EncoderResult::OutputFull,
995 source_handle.consumed(),
996 dst_written);
997 }
998 Space::Available($destination_handle) => {
999 let ($c, $unread_handle) = source_handle.read();
1000 // Start non-boilerplate
1001 $body
1002 // End non-boilerplate
1003 }
1004 }
1005 }
1006 }
1007 }
1008 });
1009}
1010
1011macro_rules! encoder_functions {
1012 (
1013 $eof:block,
1014 $body:block,
1015 $slf:ident,
1016 $src_consumed:ident,
1017 $source:ident,
1018 $dest:ident,
1019 $c:ident,
1020 $destination_handle:ident,
1021 $unread_handle:ident,
1022 $destination_check:ident
1023 ) => {
1024 encoder_function!(
1025 $eof,
1026 $body,
1027 $slf,
1028 $src_consumed,
1029 $source,
1030 $dest,
1031 $c,
1032 $destination_handle,
1033 $unread_handle,
1034 $destination_check,
1035 encode_from_utf8_raw,
1036 str,
1037 Utf8Source
1038 );
1039 encoder_function!(
1040 $eof,
1041 $body,
1042 $slf,
1043 $src_consumed,
1044 $source,
1045 $dest,
1046 $c,
1047 $destination_handle,
1048 $unread_handle,
1049 $destination_check,
1050 encode_from_utf16_raw,
1051 [u16],
1052 Utf16Source
1053 );
1054 };
1055}
1056
1057macro_rules! ascii_compatible_encoder_function {
1058 ($bmp_body:block,
1059 $astral_body:block,
1060 $bmp:ident,
1061 $astral:ident,
1062 $slf:ident,
1063 $source:ident,
1064 $handle:ident,
1065 $copy_ascii:ident,
1066 $destination_check:ident,
1067 $name:ident,
1068 $input:ty,
1069 $source_struct:ident,
1070 $ascii_punctuation:expr) => (
1071 pub fn $name(&mut $slf,
1072 src: &$input,
1073 dst: &mut [u8],
1074 _last: bool)
1075 -> (EncoderResult, usize, usize) {
1076 let mut $source = $source_struct::new(src);
1077 let mut dest = ByteDestination::new(dst);
1078 'outermost: loop {
1079 match $source.$copy_ascii(&mut dest) {
1080 CopyAsciiResult::Stop(ret) => return ret,
1081 CopyAsciiResult::GoOn((mut non_ascii, mut $handle)) => {
1082 'middle: loop {
1083 let dest_again = match non_ascii {
1084 NonAscii::BmpExclAscii($bmp) => {
1085 // Start non-boilerplate
1086 $bmp_body
1087 // End non-boilerplate
1088 }
1089 NonAscii::Astral($astral) => {
1090 // Start non-boilerplate
1091 $astral_body
1092 // End non-boilerplate
1093 }
1094 };
1095 match $source.check_available() {
1096 Space::Full(src_consumed) => {
1097 return (EncoderResult::InputEmpty,
1098 src_consumed,
1099 dest_again.written());
1100 }
1101 Space::Available(source_handle) => {
1102 match dest_again.$destination_check() {
1103 Space::Full(dst_written) => {
1104 return (EncoderResult::OutputFull,
1105 source_handle.consumed(),
1106 dst_written);
1107 }
1108 Space::Available(mut destination_handle) => {
1109 let (mut c, unread_handle) = source_handle.read_enum();
1110 let source_again = unread_handle.commit();
1111 'innermost: loop {
1112 let ascii = match c {
1113 Unicode::NonAscii(non_ascii_again) => {
1114 non_ascii = non_ascii_again;
1115 $handle = destination_handle;
1116 continue 'middle;
1117 }
1118 Unicode::Ascii(a) => a,
1119 };
1120 // Testing on Haswell says that we should write the
1121 // byte unconditionally instead of trying to unread it
1122 // to make it part of the next SIMD stride.
1123 let dest_again_again =
1124 destination_handle.write_one(ascii);
1125 if $ascii_punctuation && ascii < 60 {
1126 // We've got punctuation
1127 match source_again.check_available() {
1128 Space::Full(src_consumed_again) => {
1129 return (EncoderResult::InputEmpty,
1130 src_consumed_again,
1131 dest_again_again.written());
1132 }
1133 Space::Available(source_handle_again) => {
1134 match dest_again_again.$destination_check() {
1135 Space::Full(dst_written_again) => {
1136 return (EncoderResult::OutputFull,
1137 source_handle_again.consumed(),
1138 dst_written_again);
1139 }
1140 Space::Available(destination_handle_again) => {
1141 {
1142 let (c_again, _unread_handle_again) =
1143 source_handle_again.read_enum();
1144 c = c_again;
1145 destination_handle = destination_handle_again;
1146 continue 'innermost;
1147 }
1148 }
1149 }
1150 }
1151 }
1152 }
1153 // We've got markup or ASCII text
1154 continue 'outermost;
1155 }
1156 }
1157 }
1158 }
1159 }
1160 }
1161 }
1162 }
1163 }
1164 });
1165}
1166
1167macro_rules! ascii_compatible_encoder_functions {
1168 (
1169 $bmp_body:block,
1170 $astral_body:block,
1171 $bmp:ident,
1172 $astral:ident,
1173 $slf:ident,
1174 $source:ident,
1175 $handle:ident,
1176 $copy_ascii:ident,
1177 $destination_check:ident,
1178 $ascii_punctuation:expr
1179 ) => {
1180 ascii_compatible_encoder_function!(
1181 $bmp_body,
1182 $astral_body,
1183 $bmp,
1184 $astral,
1185 $slf,
1186 $source,
1187 $handle,
1188 $copy_ascii,
1189 $destination_check,
1190 encode_from_utf8_raw,
1191 str,
1192 Utf8Source,
1193 $ascii_punctuation
1194 );
1195 ascii_compatible_encoder_function!(
1196 $bmp_body,
1197 $astral_body,
1198 $bmp,
1199 $astral,
1200 $slf,
1201 $source,
1202 $handle,
1203 $copy_ascii,
1204 $destination_check,
1205 encode_from_utf16_raw,
1206 [u16],
1207 Utf16Source,
1208 $ascii_punctuation
1209 );
1210 };
1211}
1212
1213macro_rules! ascii_compatible_bmp_encoder_function {
1214 (
1215 $bmp_body:block,
1216 $bmp:ident,
1217 $slf:ident,
1218 $source:ident,
1219 $handle:ident,
1220 $copy_ascii:ident,
1221 $destination_check:ident,
1222 $name:ident,
1223 $input:ty,
1224 $source_struct:ident,
1225 $ascii_punctuation:expr
1226 ) => {
1227 ascii_compatible_encoder_function!(
1228 $bmp_body,
1229 {
1230 return (
1231 EncoderResult::Unmappable(astral),
1232 $source.consumed(),
1233 $handle.written(),
1234 );
1235 },
1236 $bmp,
1237 astral,
1238 $slf,
1239 $source,
1240 $handle,
1241 $copy_ascii,
1242 $destination_check,
1243 $name,
1244 $input,
1245 $source_struct,
1246 $ascii_punctuation
1247 );
1248 };
1249}
1250
1251macro_rules! ascii_compatible_bmp_encoder_functions {
1252 (
1253 $bmp_body:block,
1254 $bmp:ident,
1255 $slf:ident,
1256 $source:ident,
1257 $handle:ident,
1258 $copy_ascii:ident,
1259 $destination_check:ident,
1260 $ascii_punctuation:expr
1261 ) => {
1262 ascii_compatible_encoder_functions!(
1263 $bmp_body,
1264 {
1265 return (
1266 EncoderResult::Unmappable(astral),
1267 $source.consumed(),
1268 $handle.written(),
1269 );
1270 },
1271 $bmp,
1272 astral,
1273 $slf,
1274 $source,
1275 $handle,
1276 $copy_ascii,
1277 $destination_check,
1278 $ascii_punctuation
1279 );
1280 };
1281}
1282
1283macro_rules! public_decode_function{
1284 ($(#[$meta:meta])*,
1285 $decode_to_utf:ident,
1286 $decode_to_utf_raw:ident,
1287 $decode_to_utf_checking_end:ident,
1288 $decode_to_utf_after_one_potential_bom_byte:ident,
1289 $decode_to_utf_after_two_potential_bom_bytes:ident,
1290 $decode_to_utf_checking_end_with_offset:ident,
1291 $code_unit:ty) => (
1292 $(#[$meta])*
1293 pub fn $decode_to_utf(&mut self,
1294 src: &[u8],
1295 dst: &mut [$code_unit],
1296 last: bool)
1297 -> (DecoderResult, usize, usize) {
1298 let mut offset = 0usize;
1299 loop {
1300 match self.life_cycle {
1301 // The common case. (Post-sniffing.)
1302 DecoderLifeCycle::Converting => {
1303 return self.$decode_to_utf_checking_end(src, dst, last);
1304 }
1305 // The rest is all BOM sniffing!
1306 DecoderLifeCycle::AtStart => {
1307 debug_assert_eq!(offset, 0usize);
1308 if src.is_empty() {
1309 return (DecoderResult::InputEmpty, 0, 0);
1310 }
1311 match src[0] {
1312 0xEFu8 => {
1313 self.life_cycle = DecoderLifeCycle::SeenUtf8First;
1314 offset += 1;
1315 continue;
1316 }
1317 0xFEu8 => {
1318 self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst;
1319 offset += 1;
1320 continue;
1321 }
1322 0xFFu8 => {
1323 self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst;
1324 offset += 1;
1325 continue;
1326 }
1327 _ => {
1328 self.life_cycle = DecoderLifeCycle::Converting;
1329 continue;
1330 }
1331 }
1332 }
1333 DecoderLifeCycle::AtUtf8Start => {
1334 debug_assert_eq!(offset, 0usize);
1335 if src.is_empty() {
1336 return (DecoderResult::InputEmpty, 0, 0);
1337 }
1338 match src[0] {
1339 0xEFu8 => {
1340 self.life_cycle = DecoderLifeCycle::SeenUtf8First;
1341 offset += 1;
1342 continue;
1343 }
1344 _ => {
1345 self.life_cycle = DecoderLifeCycle::Converting;
1346 continue;
1347 }
1348 }
1349 }
1350 DecoderLifeCycle::AtUtf16BeStart => {
1351 debug_assert_eq!(offset, 0usize);
1352 if src.is_empty() {
1353 return (DecoderResult::InputEmpty, 0, 0);
1354 }
1355 match src[0] {
1356 0xFEu8 => {
1357 self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst;
1358 offset += 1;
1359 continue;
1360 }
1361 _ => {
1362 self.life_cycle = DecoderLifeCycle::Converting;
1363 continue;
1364 }
1365 }
1366 }
1367 DecoderLifeCycle::AtUtf16LeStart => {
1368 debug_assert_eq!(offset, 0usize);
1369 if src.is_empty() {
1370 return (DecoderResult::InputEmpty, 0, 0);
1371 }
1372 match src[0] {
1373 0xFFu8 => {
1374 self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst;
1375 offset += 1;
1376 continue;
1377 }
1378 _ => {
1379 self.life_cycle = DecoderLifeCycle::Converting;
1380 continue;
1381 }
1382 }
1383 }
1384 DecoderLifeCycle::SeenUtf8First => {
1385 if offset >= src.len() {
1386 if last {
1387 return self.$decode_to_utf_after_one_potential_bom_byte(src,
1388 dst,
1389 last,
1390 offset,
1391 0xEFu8);
1392 }
1393 return (DecoderResult::InputEmpty, offset, 0);
1394 }
1395 if src[offset] == 0xBBu8 {
1396 self.life_cycle = DecoderLifeCycle::SeenUtf8Second;
1397 offset += 1;
1398 continue;
1399 }
1400 return self.$decode_to_utf_after_one_potential_bom_byte(src,
1401 dst,
1402 last,
1403 offset,
1404 0xEFu8);
1405 }
1406 DecoderLifeCycle::SeenUtf8Second => {
1407 if offset >= src.len() {
1408 if last {
1409 return self.$decode_to_utf_after_two_potential_bom_bytes(src,
1410 dst,
1411 last,
1412 offset);
1413 }
1414 return (DecoderResult::InputEmpty, offset, 0);
1415 }
1416 if src[offset] == 0xBFu8 {
1417 self.life_cycle = DecoderLifeCycle::Converting;
1418 offset += 1;
1419 if self.encoding != UTF_8 {
1420 self.encoding = UTF_8;
1421 self.variant = UTF_8.new_variant_decoder();
1422 }
1423 return self.$decode_to_utf_checking_end_with_offset(src,
1424 dst,
1425 last,
1426 offset);
1427 }
1428 return self.$decode_to_utf_after_two_potential_bom_bytes(src,
1429 dst,
1430 last,
1431 offset);
1432 }
1433 DecoderLifeCycle::SeenUtf16BeFirst => {
1434 if offset >= src.len() {
1435 if last {
1436 return self.$decode_to_utf_after_one_potential_bom_byte(src,
1437 dst,
1438 last,
1439 offset,
1440 0xFEu8);
1441 }
1442 return (DecoderResult::InputEmpty, offset, 0);
1443 }
1444 if src[offset] == 0xFFu8 {
1445 self.life_cycle = DecoderLifeCycle::Converting;
1446 offset += 1;
1447 if self.encoding != UTF_16BE {
1448 self.encoding = UTF_16BE;
1449 self.variant = UTF_16BE.new_variant_decoder();
1450 }
1451 return self.$decode_to_utf_checking_end_with_offset(src,
1452 dst,
1453 last,
1454 offset);
1455 }
1456 return self.$decode_to_utf_after_one_potential_bom_byte(src,
1457 dst,
1458 last,
1459 offset,
1460 0xFEu8);
1461 }
1462 DecoderLifeCycle::SeenUtf16LeFirst => {
1463 if offset >= src.len() {
1464 if last {
1465 return self.$decode_to_utf_after_one_potential_bom_byte(src,
1466 dst,
1467 last,
1468 offset,
1469 0xFFu8);
1470 }
1471 return (DecoderResult::InputEmpty, offset, 0);
1472 }
1473 if src[offset] == 0xFEu8 {
1474 self.life_cycle = DecoderLifeCycle::Converting;
1475 offset += 1;
1476 if self.encoding != UTF_16LE {
1477 self.encoding = UTF_16LE;
1478 self.variant = UTF_16LE.new_variant_decoder();
1479 }
1480 return self.$decode_to_utf_checking_end_with_offset(src,
1481 dst,
1482 last,
1483 offset);
1484 }
1485 return self.$decode_to_utf_after_one_potential_bom_byte(src,
1486 dst,
1487 last,
1488 offset,
1489 0xFFu8);
1490 }
1491 DecoderLifeCycle::ConvertingWithPendingBB => {
1492 debug_assert_eq!(offset, 0usize);
1493 return self.$decode_to_utf_after_one_potential_bom_byte(src,
1494 dst,
1495 last,
1496 0usize,
1497 0xBBu8);
1498 }
1499 DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),
1500 }
1501 }
1502 }
1503
1504 fn $decode_to_utf_after_one_potential_bom_byte(&mut self,
1505 src: &[u8],
1506 dst: &mut [$code_unit],
1507 last: bool,
1508 offset: usize,
1509 first_byte: u8)
1510 -> (DecoderResult, usize, usize) {
1511 self.life_cycle = DecoderLifeCycle::Converting;
1512 if offset == 0usize {
1513 // First byte was seen previously.
1514 let first = [first_byte];
1515 let mut out_read = 0usize;
1516 let (mut first_result, _, mut first_written) =
1517 self.variant
1518 .$decode_to_utf_raw(&first[..], dst, false);
1519 match first_result {
1520 DecoderResult::InputEmpty => {
1521 let (result, read, written) =
1522 self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last);
1523 first_result = result;
1524 out_read = read; // Overwrite, don't add!
1525 first_written += written;
1526 }
1527 DecoderResult::Malformed(_, _) => {
1528 // Wasn't read from `src`!, leave out_read to 0
1529 }
1530 DecoderResult::OutputFull => {
1531 panic!("Output buffer must have been too small.");
1532 }
1533 }
1534 return (first_result, out_read, first_written);
1535 }
1536 debug_assert_eq!(offset, 1usize);
1537 // The first byte is in `src`, so no need to push it separately.
1538 self.$decode_to_utf_checking_end(src, dst, last)
1539 }
1540
1541 fn $decode_to_utf_after_two_potential_bom_bytes(&mut self,
1542 src: &[u8],
1543 dst: &mut [$code_unit],
1544 last: bool,
1545 offset: usize)
1546 -> (DecoderResult, usize, usize) {
1547 self.life_cycle = DecoderLifeCycle::Converting;
1548 if offset == 0usize {
1549 // The first two bytes are not in the current buffer..
1550 let ef_bb = [0xEFu8, 0xBBu8];
1551 let (mut first_result, mut first_read, mut first_written) =
1552 self.variant
1553 .$decode_to_utf_raw(&ef_bb[..], dst, false);
1554 match first_result {
1555 DecoderResult::InputEmpty => {
1556 let (result, read, written) =
1557 self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last);
1558 first_result = result;
1559 first_read = read; // Overwrite, don't add!
1560 first_written += written;
1561 }
1562 DecoderResult::Malformed(_, _) => {
1563 if first_read == 1usize {
1564 // The first byte was malformed. We need to handle
1565 // the second one, which isn't in `src`, later.
1566 self.life_cycle = DecoderLifeCycle::ConvertingWithPendingBB;
1567 }
1568 first_read = 0usize; // Wasn't read from `src`!
1569 }
1570 DecoderResult::OutputFull => {
1571 panic!("Output buffer must have been too small.");
1572 }
1573 }
1574 return (first_result, first_read, first_written);
1575 }
1576 if offset == 1usize {
1577 // The first byte isn't in the current buffer but the second one
1578 // is.
1579 return self.$decode_to_utf_after_one_potential_bom_byte(src,
1580 dst,
1581 last,
1582 0usize,
1583 0xEFu8);
1584
1585 }
1586 debug_assert_eq!(offset, 2usize);
1587 // The first two bytes are in `src`, so no need to push them separately.
1588 self.$decode_to_utf_checking_end(src, dst, last)
1589 }
1590
1591 /// Calls `$decode_to_utf_checking_end` with `offset` bytes omitted from
1592 /// the start of `src` but adjusting the return values to show those bytes
1593 /// as having been consumed.
1594 fn $decode_to_utf_checking_end_with_offset(&mut self,
1595 src: &[u8],
1596 dst: &mut [$code_unit],
1597 last: bool,
1598 offset: usize)
1599 -> (DecoderResult, usize, usize) {
1600 debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting);
1601 let (result, read, written) = self.$decode_to_utf_checking_end(&src[offset..], dst, last);
1602 (result, read + offset, written)
1603 }
1604
1605 /// Calls through to the delegate and adjusts life cycle iff `last` is
1606 /// `true` and result is `DecoderResult::InputEmpty`.
1607 fn $decode_to_utf_checking_end(&mut self,
1608 src: &[u8],
1609 dst: &mut [$code_unit],
1610 last: bool)
1611 -> (DecoderResult, usize, usize) {
1612 debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting);
1613 let (result, read, written) = self.variant
1614 .$decode_to_utf_raw(src, dst, last);
1615 if last {
1616 if let DecoderResult::InputEmpty = result {
1617 self.life_cycle = DecoderLifeCycle::Finished;
1618 }
1619 }
1620 (result, read, written)
1621 });
1622}
1623