1 | // Copyright Mozilla Foundation. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | use super::*; |
11 | use crate::data::*; |
12 | use crate::handles::*; |
13 | use crate::variant::*; |
14 | // Rust 1.14.0 requires the following despite the asterisk above. |
15 | use super::in_inclusive_range16; |
16 | |
17 | #[derive (Copy, Clone, PartialEq)] |
18 | enum Iso2022JpDecoderState { |
19 | Ascii, |
20 | Roman, |
21 | Katakana, |
22 | LeadByte, |
23 | TrailByte, |
24 | EscapeStart, |
25 | Escape, |
26 | } |
27 | |
28 | pub struct Iso2022JpDecoder { |
29 | decoder_state: Iso2022JpDecoderState, |
30 | output_state: Iso2022JpDecoderState, // only takes 1 of first 4 values |
31 | lead: u8, |
32 | output_flag: bool, |
33 | pending_prepended: bool, |
34 | } |
35 | |
36 | impl Iso2022JpDecoder { |
37 | pub fn new() -> VariantDecoder { |
38 | VariantDecoder::Iso2022Jp(Iso2022JpDecoder { |
39 | decoder_state: Iso2022JpDecoderState::Ascii, |
40 | output_state: Iso2022JpDecoderState::Ascii, |
41 | lead: 0u8, |
42 | output_flag: false, |
43 | pending_prepended: false, |
44 | }) |
45 | } |
46 | |
47 | pub fn in_neutral_state(&self) -> bool { |
48 | self.decoder_state == Iso2022JpDecoderState::Ascii |
49 | && self.output_state == Iso2022JpDecoderState::Ascii |
50 | && self.lead == 0u8 |
51 | && !self.output_flag |
52 | && !self.pending_prepended |
53 | } |
54 | |
55 | fn extra_to_input_from_state(&self, byte_length: usize) -> Option<usize> { |
56 | byte_length.checked_add( |
57 | if self.lead == 0 || self.pending_prepended { |
58 | 0 |
59 | } else { |
60 | 1 |
61 | } + match self.decoder_state { |
62 | Iso2022JpDecoderState::Escape | Iso2022JpDecoderState::EscapeStart => 1, |
63 | _ => 0, |
64 | }, |
65 | ) |
66 | } |
67 | |
68 | fn extra_to_output_from_state(&self) -> usize { |
69 | if self.lead != 0 && self.pending_prepended { |
70 | 1 + self.output_flag as usize |
71 | } else { |
72 | self.output_flag as usize |
73 | } |
74 | } |
75 | |
76 | pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> { |
77 | checked_add( |
78 | self.extra_to_output_from_state(), |
79 | self.extra_to_input_from_state(byte_length), |
80 | ) |
81 | } |
82 | |
83 | pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> { |
84 | // worst case: 1 to 3 (half-width katakana) |
85 | self.max_utf8_buffer_length(byte_length) |
86 | } |
87 | |
88 | pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> { |
89 | checked_mul( |
90 | 3, |
91 | checked_add( |
92 | self.extra_to_output_from_state(), |
93 | self.extra_to_input_from_state(byte_length), |
94 | ), |
95 | ) |
96 | } |
97 | |
98 | decoder_functions!( |
99 | { |
100 | if self.pending_prepended { |
101 | // lead was set in EscapeStart and "prepended" |
102 | // in Escape. |
103 | debug_assert!(self.lead == 0x24u8 || self.lead == 0x28u8); |
104 | match dest.check_space_bmp() { |
105 | Space::Full(_) => { |
106 | return (DecoderResult::OutputFull, 0, 0); |
107 | } |
108 | Space::Available(destination_handle) => { |
109 | self.pending_prepended = false; |
110 | self.output_flag = false; |
111 | match self.decoder_state { |
112 | Iso2022JpDecoderState::Ascii | Iso2022JpDecoderState::Roman => { |
113 | destination_handle.write_ascii(self.lead); |
114 | self.lead = 0x0u8; |
115 | } |
116 | Iso2022JpDecoderState::Katakana => { |
117 | destination_handle |
118 | .write_upper_bmp(u16::from(self.lead) - 0x21u16 + 0xFF61u16); |
119 | self.lead = 0x0u8; |
120 | } |
121 | Iso2022JpDecoderState::LeadByte => { |
122 | self.decoder_state = Iso2022JpDecoderState::TrailByte; |
123 | } |
124 | _ => unreachable!(), |
125 | } |
126 | } |
127 | } |
128 | } |
129 | }, |
130 | {}, |
131 | { |
132 | match self.decoder_state { |
133 | Iso2022JpDecoderState::TrailByte | Iso2022JpDecoderState::EscapeStart => { |
134 | self.decoder_state = self.output_state; |
135 | return (DecoderResult::Malformed(1, 0), src_consumed, dest.written()); |
136 | } |
137 | Iso2022JpDecoderState::Escape => { |
138 | self.pending_prepended = true; |
139 | self.decoder_state = self.output_state; |
140 | return (DecoderResult::Malformed(1, 1), src_consumed, dest.written()); |
141 | } |
142 | _ => {} |
143 | } |
144 | }, |
145 | { |
146 | match self.decoder_state { |
147 | Iso2022JpDecoderState::Ascii => { |
148 | if b == 0x1Bu8 { |
149 | self.decoder_state = Iso2022JpDecoderState::EscapeStart; |
150 | continue; |
151 | } |
152 | self.output_flag = false; |
153 | if b > 0x7Fu8 || b == 0x0Eu8 || b == 0x0Fu8 { |
154 | return ( |
155 | DecoderResult::Malformed(1, 0), |
156 | unread_handle.consumed(), |
157 | destination_handle.written(), |
158 | ); |
159 | } |
160 | destination_handle.write_ascii(b); |
161 | continue; |
162 | } |
163 | Iso2022JpDecoderState::Roman => { |
164 | if b == 0x1Bu8 { |
165 | self.decoder_state = Iso2022JpDecoderState::EscapeStart; |
166 | continue; |
167 | } |
168 | self.output_flag = false; |
169 | if b == 0x5Cu8 { |
170 | destination_handle.write_mid_bmp(0x00A5u16); |
171 | continue; |
172 | } |
173 | if b == 0x7Eu8 { |
174 | destination_handle.write_upper_bmp(0x203Eu16); |
175 | continue; |
176 | } |
177 | if b > 0x7Fu8 || b == 0x0Eu8 || b == 0x0Fu8 { |
178 | return ( |
179 | DecoderResult::Malformed(1, 0), |
180 | unread_handle.consumed(), |
181 | destination_handle.written(), |
182 | ); |
183 | } |
184 | destination_handle.write_ascii(b); |
185 | continue; |
186 | } |
187 | Iso2022JpDecoderState::Katakana => { |
188 | if b == 0x1Bu8 { |
189 | self.decoder_state = Iso2022JpDecoderState::EscapeStart; |
190 | continue; |
191 | } |
192 | self.output_flag = false; |
193 | if b >= 0x21u8 && b <= 0x5Fu8 { |
194 | destination_handle.write_upper_bmp(u16::from(b) - 0x21u16 + 0xFF61u16); |
195 | continue; |
196 | } |
197 | return ( |
198 | DecoderResult::Malformed(1, 0), |
199 | unread_handle.consumed(), |
200 | destination_handle.written(), |
201 | ); |
202 | } |
203 | Iso2022JpDecoderState::LeadByte => { |
204 | if b == 0x1Bu8 { |
205 | self.decoder_state = Iso2022JpDecoderState::EscapeStart; |
206 | continue; |
207 | } |
208 | self.output_flag = false; |
209 | if b >= 0x21u8 && b <= 0x7Eu8 { |
210 | self.lead = b; |
211 | self.decoder_state = Iso2022JpDecoderState::TrailByte; |
212 | continue; |
213 | } |
214 | return ( |
215 | DecoderResult::Malformed(1, 0), |
216 | unread_handle.consumed(), |
217 | destination_handle.written(), |
218 | ); |
219 | } |
220 | Iso2022JpDecoderState::TrailByte => { |
221 | if b == 0x1Bu8 { |
222 | self.decoder_state = Iso2022JpDecoderState::EscapeStart; |
223 | // The byte in error is the previous |
224 | // lead byte. |
225 | return ( |
226 | DecoderResult::Malformed(1, 1), |
227 | unread_handle.consumed(), |
228 | destination_handle.written(), |
229 | ); |
230 | } |
231 | self.decoder_state = Iso2022JpDecoderState::LeadByte; |
232 | let jis0208_lead_minus_offset = self.lead - 0x21; |
233 | let byte = b; |
234 | let handle = destination_handle; |
235 | // The code below uses else after continue in |
236 | // order to retain the structure seen in EUC-JP. |
237 | let trail_minus_offset = byte.wrapping_sub(0x21); |
238 | // Fast-track Hiragana (60% according to Lunde) |
239 | // and Katakana (10% acconding to Lunde). |
240 | if jis0208_lead_minus_offset == 0x03 && trail_minus_offset < 0x53 { |
241 | // Hiragana |
242 | handle.write_upper_bmp(0x3041 + u16::from(trail_minus_offset)); |
243 | continue; |
244 | } else if jis0208_lead_minus_offset == 0x04 && trail_minus_offset < 0x56 { |
245 | // Katakana |
246 | handle.write_upper_bmp(0x30A1 + u16::from(trail_minus_offset)); |
247 | continue; |
248 | } else if trail_minus_offset > (0xFE - 0xA1) { |
249 | return ( |
250 | DecoderResult::Malformed(2, 0), |
251 | unread_handle.consumed(), |
252 | handle.written(), |
253 | ); |
254 | } else { |
255 | let pointer = |
256 | mul_94(jis0208_lead_minus_offset) + trail_minus_offset as usize; |
257 | let level1_pointer = pointer.wrapping_sub(1410); |
258 | if level1_pointer < JIS0208_LEVEL1_KANJI.len() { |
259 | handle.write_upper_bmp(JIS0208_LEVEL1_KANJI[level1_pointer]); |
260 | continue; |
261 | } else { |
262 | let level2_pointer = pointer.wrapping_sub(4418); |
263 | if level2_pointer < JIS0208_LEVEL2_AND_ADDITIONAL_KANJI.len() { |
264 | handle.write_upper_bmp( |
265 | JIS0208_LEVEL2_AND_ADDITIONAL_KANJI[level2_pointer], |
266 | ); |
267 | continue; |
268 | } else { |
269 | let ibm_pointer = pointer.wrapping_sub(8272); |
270 | if ibm_pointer < IBM_KANJI.len() { |
271 | handle.write_upper_bmp(IBM_KANJI[ibm_pointer]); |
272 | continue; |
273 | } else if let Some(bmp) = jis0208_symbol_decode(pointer) { |
274 | handle.write_bmp_excl_ascii(bmp); |
275 | continue; |
276 | } else if let Some(bmp) = jis0208_range_decode(pointer) { |
277 | handle.write_bmp_excl_ascii(bmp); |
278 | continue; |
279 | } else { |
280 | return ( |
281 | DecoderResult::Malformed(2, 0), |
282 | unread_handle.consumed(), |
283 | handle.written(), |
284 | ); |
285 | } |
286 | } |
287 | } |
288 | } |
289 | } |
290 | Iso2022JpDecoderState::EscapeStart => { |
291 | if b == 0x24u8 || b == 0x28u8 { |
292 | self.lead = b; |
293 | self.decoder_state = Iso2022JpDecoderState::Escape; |
294 | continue; |
295 | } |
296 | self.output_flag = false; |
297 | self.decoder_state = self.output_state; |
298 | return ( |
299 | DecoderResult::Malformed(1, 0), |
300 | unread_handle.unread(), |
301 | destination_handle.written(), |
302 | ); |
303 | } |
304 | Iso2022JpDecoderState::Escape => { |
305 | let mut state: Option<Iso2022JpDecoderState> = None; |
306 | if self.lead == 0x28u8 && b == 0x42u8 { |
307 | state = Some(Iso2022JpDecoderState::Ascii); |
308 | } else if self.lead == 0x28u8 && b == 0x4Au8 { |
309 | state = Some(Iso2022JpDecoderState::Roman); |
310 | } else if self.lead == 0x28u8 && b == 0x49u8 { |
311 | state = Some(Iso2022JpDecoderState::Katakana); |
312 | } else if self.lead == 0x24u8 && (b == 0x40u8 || b == 0x42u8) { |
313 | state = Some(Iso2022JpDecoderState::LeadByte); |
314 | } |
315 | match state { |
316 | Some(s) => { |
317 | self.lead = 0x0u8; |
318 | self.decoder_state = s; |
319 | self.output_state = s; |
320 | let flag = self.output_flag; |
321 | self.output_flag = true; |
322 | if flag { |
323 | // We had an escape sequence |
324 | // immediately following another |
325 | // escape sequence. Therefore, |
326 | // the first one of these was |
327 | // useless. |
328 | return ( |
329 | DecoderResult::Malformed(3, 3), |
330 | unread_handle.consumed(), |
331 | destination_handle.written(), |
332 | ); |
333 | } |
334 | continue; |
335 | } |
336 | None => { |
337 | // self.lead is still the previous |
338 | // byte. It will be processed in |
339 | // the preabmle upon next call. |
340 | self.pending_prepended = true; |
341 | self.output_flag = false; |
342 | self.decoder_state = self.output_state; |
343 | // The byte in error is not the |
344 | // current or the previous byte but |
345 | // the one before those (lone 0x1B). |
346 | return ( |
347 | DecoderResult::Malformed(1, 1), |
348 | unread_handle.unread(), |
349 | destination_handle.written(), |
350 | ); |
351 | } |
352 | } |
353 | } |
354 | } |
355 | }, |
356 | self, |
357 | src_consumed, |
358 | dest, |
359 | source, |
360 | b, |
361 | destination_handle, |
362 | unread_handle, |
363 | check_space_bmp |
364 | ); |
365 | } |
366 | |
367 | #[cfg (feature = "fast-kanji-encode" )] |
368 | #[inline (always)] |
369 | fn is_kanji_mapped(bmp: u16) -> bool { |
370 | // Use the shift_jis variant, because we don't care about the |
371 | // byte values here. |
372 | jis0208_kanji_shift_jis_encode(bmp).is_some() |
373 | } |
374 | |
375 | #[cfg (not(feature = "fast-kanji-encode" ))] |
376 | #[cfg_attr ( |
377 | feature = "cargo-clippy" , |
378 | allow(if_let_redundant_pattern_matching, if_same_then_else) |
379 | )] |
380 | #[inline (always)] |
381 | fn is_kanji_mapped(bmp: u16) -> bool { |
382 | if 0x4EDD == bmp { |
383 | true |
384 | } else if let Some(_) = jis0208_level1_kanji_shift_jis_encode(bmp) { |
385 | // Use the shift_jis variant, because we don't care about the |
386 | // byte values here. |
387 | true |
388 | } else if let Some(_) = jis0208_level2_and_additional_kanji_encode(bmp) { |
389 | true |
390 | } else if let Some(_) = position(&IBM_KANJI[..], needle:bmp) { |
391 | true |
392 | } else { |
393 | false |
394 | } |
395 | } |
396 | |
397 | #[cfg_attr ( |
398 | feature = "cargo-clippy" , |
399 | allow(if_let_redundant_pattern_matching, if_same_then_else) |
400 | )] |
401 | fn is_mapped_for_two_byte_encode(bmp: u16) -> bool { |
402 | // The code below uses else after return to |
403 | // keep the same structure as in EUC-JP. |
404 | // Lunde says 60% Hiragana, 30% Kanji, 10% Katakana |
405 | let bmp_minus_hiragana = bmp.wrapping_sub(0x3041); |
406 | if bmp_minus_hiragana < 0x53 { |
407 | true |
408 | } else if in_inclusive_range16(bmp, 0x4E00, 0x9FA0) { |
409 | is_kanji_mapped(bmp) |
410 | } else { |
411 | let bmp_minus_katakana = bmp.wrapping_sub(0x30A1); |
412 | if bmp_minus_katakana < 0x56 { |
413 | true |
414 | } else { |
415 | let bmp_minus_space = bmp.wrapping_sub(0x3000); |
416 | if bmp_minus_space < 3 { |
417 | // fast-track common punctuation |
418 | true |
419 | } else if in_inclusive_range16(bmp, 0xFF61, 0xFF9F) { |
420 | true |
421 | } else if bmp == 0x2212 { |
422 | true |
423 | } else if let Some(_) = jis0208_range_encode(bmp) { |
424 | true |
425 | } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) || bmp == 0xF929 || bmp == 0xF9DC { |
426 | true |
427 | } else if let Some(_) = ibm_symbol_encode(bmp) { |
428 | true |
429 | } else if let Some(_) = jis0208_symbol_encode(bmp) { |
430 | true |
431 | } else { |
432 | false |
433 | } |
434 | } |
435 | } |
436 | } |
437 | |
438 | #[cfg (feature = "fast-kanji-encode" )] |
439 | #[inline (always)] |
440 | fn encode_kanji(bmp: u16) -> Option<(u8, u8)> { |
441 | jis0208_kanji_iso_2022_jp_encode(bmp) |
442 | } |
443 | |
444 | #[cfg (not(feature = "fast-kanji-encode" ))] |
445 | #[inline (always)] |
446 | fn encode_kanji(bmp: u16) -> Option<(u8, u8)> { |
447 | if 0x4EDD == bmp { |
448 | // Ideograph on the symbol row! |
449 | Some((0x21, 0xB8 - 0x80)) |
450 | } else if let Some((lead: u8, trail: u8)) = jis0208_level1_kanji_iso_2022_jp_encode(bmp) { |
451 | Some((lead, trail)) |
452 | } else if let Some(pos: usize) = jis0208_level2_and_additional_kanji_encode(bmp) { |
453 | let lead: usize = (pos / 94) + (0xD0 - 0x80); |
454 | let trail: usize = (pos % 94) + 0x21; |
455 | Some((lead as u8, trail as u8)) |
456 | } else if let Some(pos: usize) = position(&IBM_KANJI[..], needle:bmp) { |
457 | let lead: usize = (pos / 94) + (0xF9 - 0x80); |
458 | let trail: usize = (pos % 94) + 0x21; |
459 | Some((lead as u8, trail as u8)) |
460 | } else { |
461 | None |
462 | } |
463 | } |
464 | |
465 | enum Iso2022JpEncoderState { |
466 | Ascii, |
467 | Roman, |
468 | Jis0208, |
469 | } |
470 | |
471 | pub struct Iso2022JpEncoder { |
472 | state: Iso2022JpEncoderState, |
473 | } |
474 | |
475 | impl Iso2022JpEncoder { |
476 | pub fn new(encoding: &'static Encoding) -> Encoder { |
477 | Encoder::new( |
478 | encoding, |
479 | VariantEncoder::Iso2022Jp(Iso2022JpEncoder { |
480 | state: Iso2022JpEncoderState::Ascii, |
481 | }), |
482 | ) |
483 | } |
484 | |
485 | pub fn has_pending_state(&self) -> bool { |
486 | match self.state { |
487 | Iso2022JpEncoderState::Ascii => false, |
488 | _ => true, |
489 | } |
490 | } |
491 | |
492 | pub fn max_buffer_length_from_utf16_without_replacement( |
493 | &self, |
494 | u16_length: usize, |
495 | ) -> Option<usize> { |
496 | // Worst case: every other character is ASCII/Roman and every other |
497 | // JIS0208. |
498 | // Two UTF-16 input units: |
499 | // Transition to Roman: 3 |
500 | // Roman/ASCII: 1 |
501 | // Transition to JIS0208: 3 |
502 | // JIS0208: 2 |
503 | // End transition: 3 |
504 | checked_add_opt( |
505 | checked_add(3, u16_length.checked_mul(4)), |
506 | checked_div(u16_length.checked_add(1), 2), |
507 | ) |
508 | } |
509 | |
510 | pub fn max_buffer_length_from_utf8_without_replacement( |
511 | &self, |
512 | byte_length: usize, |
513 | ) -> Option<usize> { |
514 | // Worst case: every other character is ASCII/Roman and every other |
515 | // JIS0208. |
516 | // Three UTF-8 input units: 1 ASCII, 2 JIS0208 |
517 | // Transition to ASCII: 3 |
518 | // Roman/ASCII: 1 |
519 | // Transition to JIS0208: 3 |
520 | // JIS0208: 2 |
521 | // End transition: 3 |
522 | checked_add(3, byte_length.checked_mul(3)) |
523 | } |
524 | |
525 | encoder_functions!( |
526 | { |
527 | match self.state { |
528 | Iso2022JpEncoderState::Ascii => {} |
529 | _ => match dest.check_space_three() { |
530 | Space::Full(dst_written) => { |
531 | return (EncoderResult::OutputFull, src_consumed, dst_written); |
532 | } |
533 | Space::Available(destination_handle) => { |
534 | self.state = Iso2022JpEncoderState::Ascii; |
535 | destination_handle.write_three(0x1Bu8, 0x28u8, 0x42u8); |
536 | } |
537 | }, |
538 | } |
539 | }, |
540 | { |
541 | match self.state { |
542 | Iso2022JpEncoderState::Ascii => { |
543 | if c == ' \u{0E}' || c == ' \u{0F}' || c == ' \u{1B}' { |
544 | return ( |
545 | EncoderResult::Unmappable(' \u{FFFD}' ), |
546 | unread_handle.consumed(), |
547 | destination_handle.written(), |
548 | ); |
549 | } |
550 | if c <= ' \u{7F}' { |
551 | destination_handle.write_one(c as u8); |
552 | continue; |
553 | } |
554 | if c == ' \u{A5}' || c == ' \u{203E}' { |
555 | self.state = Iso2022JpEncoderState::Roman; |
556 | destination_handle.write_three(0x1Bu8, 0x28u8, 0x4Au8); |
557 | unread_handle.unread(); |
558 | continue; |
559 | } |
560 | if c > ' \u{FFFF}' { |
561 | return ( |
562 | EncoderResult::Unmappable(c), |
563 | unread_handle.consumed(), |
564 | destination_handle.written(), |
565 | ); |
566 | } |
567 | // Yes, if c is in index, we'll search |
568 | // again in the Jis0208 state, but this |
569 | // encoder is not worth optimizing. |
570 | if is_mapped_for_two_byte_encode(c as u16) { |
571 | self.state = Iso2022JpEncoderState::Jis0208; |
572 | destination_handle.write_three(0x1Bu8, 0x24u8, 0x42u8); |
573 | unread_handle.unread(); |
574 | continue; |
575 | } |
576 | return ( |
577 | EncoderResult::Unmappable(c), |
578 | unread_handle.consumed(), |
579 | destination_handle.written(), |
580 | ); |
581 | } |
582 | Iso2022JpEncoderState::Roman => { |
583 | if c == ' \u{0E}' || c == ' \u{0F}' || c == ' \u{1B}' { |
584 | return ( |
585 | EncoderResult::Unmappable(' \u{FFFD}' ), |
586 | unread_handle.consumed(), |
587 | destination_handle.written(), |
588 | ); |
589 | } |
590 | if c == ' \u{5C}' || c == ' \u{7E}' { |
591 | self.state = Iso2022JpEncoderState::Ascii; |
592 | destination_handle.write_three(0x1Bu8, 0x28u8, 0x42u8); |
593 | unread_handle.unread(); |
594 | continue; |
595 | } |
596 | if c <= ' \u{7F}' { |
597 | destination_handle.write_one(c as u8); |
598 | continue; |
599 | } |
600 | if c == ' \u{A5}' { |
601 | destination_handle.write_one(0x5Cu8); |
602 | continue; |
603 | } |
604 | if c == ' \u{203E}' { |
605 | destination_handle.write_one(0x7Eu8); |
606 | continue; |
607 | } |
608 | if c > ' \u{FFFF}' { |
609 | return ( |
610 | EncoderResult::Unmappable(c), |
611 | unread_handle.consumed(), |
612 | destination_handle.written(), |
613 | ); |
614 | } |
615 | // Yes, if c is in index, we'll search |
616 | // again in the Jis0208 state, but this |
617 | // encoder is not worth optimizing. |
618 | if is_mapped_for_two_byte_encode(c as u16) { |
619 | self.state = Iso2022JpEncoderState::Jis0208; |
620 | destination_handle.write_three(0x1Bu8, 0x24u8, 0x42u8); |
621 | unread_handle.unread(); |
622 | continue; |
623 | } |
624 | return ( |
625 | EncoderResult::Unmappable(c), |
626 | unread_handle.consumed(), |
627 | destination_handle.written(), |
628 | ); |
629 | } |
630 | Iso2022JpEncoderState::Jis0208 => { |
631 | if c <= ' \u{7F}' { |
632 | self.state = Iso2022JpEncoderState::Ascii; |
633 | destination_handle.write_three(0x1Bu8, 0x28u8, 0x42u8); |
634 | unread_handle.unread(); |
635 | continue; |
636 | } |
637 | if c == ' \u{A5}' || c == ' \u{203E}' { |
638 | self.state = Iso2022JpEncoderState::Roman; |
639 | destination_handle.write_three(0x1Bu8, 0x28u8, 0x4Au8); |
640 | unread_handle.unread(); |
641 | continue; |
642 | } |
643 | if c > ' \u{FFFF}' { |
644 | // Transition to ASCII here in order |
645 | // not to make it the responsibility |
646 | // of the caller. |
647 | self.state = Iso2022JpEncoderState::Ascii; |
648 | return ( |
649 | EncoderResult::Unmappable(c), |
650 | unread_handle.consumed(), |
651 | destination_handle.write_three_return_written(0x1Bu8, 0x28u8, 0x42u8), |
652 | ); |
653 | } |
654 | let bmp = c as u16; |
655 | let handle = destination_handle; |
656 | // The code below uses else after continue to |
657 | // keep the same structure as in EUC-JP. |
658 | // Lunde says 60% Hiragana, 30% Kanji, 10% Katakana |
659 | let bmp_minus_hiragana = bmp.wrapping_sub(0x3041); |
660 | if bmp_minus_hiragana < 0x53 { |
661 | handle.write_two(0x24, 0x21 + bmp_minus_hiragana as u8); |
662 | continue; |
663 | } else if in_inclusive_range16(bmp, 0x4E00, 0x9FA0) { |
664 | if let Some((lead, trail)) = encode_kanji(bmp) { |
665 | handle.write_two(lead, trail); |
666 | continue; |
667 | } else { |
668 | self.state = Iso2022JpEncoderState::Ascii; |
669 | return ( |
670 | EncoderResult::Unmappable(c), |
671 | unread_handle.consumed(), |
672 | handle.write_three_return_written(0x1Bu8, 0x28u8, 0x42u8), |
673 | ); |
674 | } |
675 | } else { |
676 | let bmp_minus_katakana = bmp.wrapping_sub(0x30A1); |
677 | if bmp_minus_katakana < 0x56 { |
678 | handle.write_two(0x25, 0x21 + bmp_minus_katakana as u8); |
679 | continue; |
680 | } else { |
681 | let bmp_minus_space = bmp.wrapping_sub(0x3000); |
682 | if bmp_minus_space < 3 { |
683 | // fast-track common punctuation |
684 | handle.write_two(0x21, 0x21 + bmp_minus_space as u8); |
685 | continue; |
686 | } |
687 | let bmp_minus_half_width = bmp.wrapping_sub(0xFF61); |
688 | if bmp_minus_half_width <= (0xFF9F - 0xFF61) { |
689 | // We have half-width katakana. The lead is either |
690 | // row 1 or 5 of JIS X 0208, so the lookup table |
691 | // only stores the trail. |
692 | let lead = |
693 | if bmp != 0xFF70 && in_inclusive_range16(bmp, 0xFF66, 0xFF9D) { |
694 | 0x25u8 |
695 | } else { |
696 | 0x21u8 |
697 | }; |
698 | let trail = |
699 | ISO_2022_JP_HALF_WIDTH_TRAIL[bmp_minus_half_width as usize]; |
700 | handle.write_two(lead, trail); |
701 | continue; |
702 | } else if bmp == 0x2212 { |
703 | handle.write_two(0x21, 0x5D); |
704 | continue; |
705 | } else if let Some(pointer) = jis0208_range_encode(bmp) { |
706 | let lead = (pointer / 94) + 0x21; |
707 | let trail = (pointer % 94) + 0x21; |
708 | handle.write_two(lead as u8, trail as u8); |
709 | continue; |
710 | } else if in_inclusive_range16(bmp, 0xFA0E, 0xFA2D) |
711 | || bmp == 0xF929 |
712 | || bmp == 0xF9DC |
713 | { |
714 | // Guaranteed to be found in IBM_KANJI |
715 | let pos = position(&IBM_KANJI[..], bmp).unwrap(); |
716 | let lead = (pos / 94) + (0xF9 - 0x80); |
717 | let trail = (pos % 94) + 0x21; |
718 | handle.write_two(lead as u8, trail as u8); |
719 | continue; |
720 | } else if let Some(pointer) = ibm_symbol_encode(bmp) { |
721 | let lead = (pointer / 94) + 0x21; |
722 | let trail = (pointer % 94) + 0x21; |
723 | handle.write_two(lead as u8, trail as u8); |
724 | continue; |
725 | } else if let Some(pointer) = jis0208_symbol_encode(bmp) { |
726 | let lead = (pointer / 94) + 0x21; |
727 | let trail = (pointer % 94) + 0x21; |
728 | handle.write_two(lead as u8, trail as u8); |
729 | continue; |
730 | } else { |
731 | self.state = Iso2022JpEncoderState::Ascii; |
732 | return ( |
733 | EncoderResult::Unmappable(c), |
734 | unread_handle.consumed(), |
735 | handle.write_three_return_written(0x1Bu8, 0x28u8, 0x42u8), |
736 | ); |
737 | } |
738 | } |
739 | } |
740 | } |
741 | } |
742 | }, |
743 | self, |
744 | src_consumed, |
745 | source, |
746 | dest, |
747 | c, |
748 | destination_handle, |
749 | unread_handle, |
750 | check_space_three |
751 | ); |
752 | } |
753 | |
754 | // Any copyright to the test code below this comment is dedicated to the |
755 | // Public Domain. http://creativecommons.org/publicdomain/zero/1.0/ |
756 | |
757 | #[cfg (all(test, feature = "alloc" ))] |
758 | mod tests { |
759 | use super::super::testing::*; |
760 | use super::super::*; |
761 | |
762 | fn decode_iso_2022_jp(bytes: &[u8], expect: &str) { |
763 | decode(ISO_2022_JP, bytes, expect); |
764 | } |
765 | |
766 | fn encode_iso_2022_jp(string: &str, expect: &[u8]) { |
767 | encode(ISO_2022_JP, string, expect); |
768 | } |
769 | |
770 | #[test ] |
771 | fn test_iso_2022_jp_decode() { |
772 | // Empty |
773 | decode_iso_2022_jp(b"" , &"" ); |
774 | |
775 | // ASCII |
776 | decode_iso_2022_jp(b" \x61\x62" , " \u{0061}\u{0062}" ); |
777 | decode_iso_2022_jp(b" \x7F\x0E\x0F" , " \u{007F}\u{FFFD}\u{FFFD}" ); |
778 | |
779 | // Partial escapes |
780 | decode_iso_2022_jp(b" \x1B" , " \u{FFFD}" ); |
781 | decode_iso_2022_jp(b" \x1B$" , " \u{FFFD}$" ); |
782 | decode_iso_2022_jp(b" \x1B(" , " \u{FFFD}(" ); |
783 | decode_iso_2022_jp(b" \x1B." , " \u{FFFD}." ); |
784 | |
785 | // ISO escapes |
786 | decode_iso_2022_jp(b" \x1B(B" , "" ); // ASCII |
787 | decode_iso_2022_jp(b" \x1B(J" , "" ); // Roman |
788 | decode_iso_2022_jp(b" \x1B$@" , "" ); // 0208 |
789 | decode_iso_2022_jp(b" \x1B$B" , "" ); // 0208 |
790 | decode_iso_2022_jp(b" \x1B$(D" , " \u{FFFD}$(D" ); // 2012 |
791 | decode_iso_2022_jp(b" \x1B$A" , " \u{FFFD}$A" ); // GB2312 |
792 | decode_iso_2022_jp(b" \x1B$(C" , " \u{FFFD}$(C" ); // KR |
793 | decode_iso_2022_jp(b" \x1B.A" , " \u{FFFD}.A" ); // Latin-1 |
794 | decode_iso_2022_jp(b" \x1B.F" , " \u{FFFD}.F" ); // Greek |
795 | decode_iso_2022_jp(b" \x1B(I" , "" ); // Half-width Katakana |
796 | decode_iso_2022_jp(b" \x1B$(O" , " \u{FFFD}$(O" ); // 2013 |
797 | decode_iso_2022_jp(b" \x1B$(P" , " \u{FFFD}$(P" ); // 2013 |
798 | decode_iso_2022_jp(b" \x1B$(Q" , " \u{FFFD}$(Q" ); // 2013 |
799 | decode_iso_2022_jp(b" \x1B$)C" , " \u{FFFD}$)C" ); // KR |
800 | decode_iso_2022_jp(b" \x1B$)A" , " \u{FFFD}$)A" ); // GB2312 |
801 | decode_iso_2022_jp(b" \x1B$)G" , " \u{FFFD}$)G" ); // CNS |
802 | decode_iso_2022_jp(b" \x1B$*H" , " \u{FFFD}$*H" ); // CNS |
803 | decode_iso_2022_jp(b" \x1B$)E" , " \u{FFFD}$)E" ); // IR |
804 | decode_iso_2022_jp(b" \x1B$+I" , " \u{FFFD}$+I" ); // CNS |
805 | decode_iso_2022_jp(b" \x1B$+J" , " \u{FFFD}$+J" ); // CNS |
806 | decode_iso_2022_jp(b" \x1B$+K" , " \u{FFFD}$+K" ); // CNS |
807 | decode_iso_2022_jp(b" \x1B$+L" , " \u{FFFD}$+L" ); // CNS |
808 | decode_iso_2022_jp(b" \x1B$+M" , " \u{FFFD}$+M" ); // CNS |
809 | decode_iso_2022_jp(b" \x1B$(@" , " \u{FFFD}$(@" ); // 0208 |
810 | decode_iso_2022_jp(b" \x1B$(A" , " \u{FFFD}$(A" ); // GB2312 |
811 | decode_iso_2022_jp(b" \x1B$(B" , " \u{FFFD}$(B" ); // 0208 |
812 | decode_iso_2022_jp(b" \x1B%G" , " \u{FFFD}%G" ); // UTF-8 |
813 | |
814 | // ASCII |
815 | decode_iso_2022_jp(b" \x5B" , " \u{005B}" ); |
816 | decode_iso_2022_jp(b" \x5C" , " \u{005C}" ); |
817 | decode_iso_2022_jp(b" \x7E" , " \u{007E}" ); |
818 | decode_iso_2022_jp(b" \x0E" , " \u{FFFD}" ); |
819 | decode_iso_2022_jp(b" \x0F" , " \u{FFFD}" ); |
820 | decode_iso_2022_jp(b" \x80" , " \u{FFFD}" ); |
821 | decode_iso_2022_jp(b" \xFF" , " \u{FFFD}" ); |
822 | decode_iso_2022_jp(b" \x1B(B \x5B" , " \u{005B}" ); |
823 | decode_iso_2022_jp(b" \x1B(B \x5C" , " \u{005C}" ); |
824 | decode_iso_2022_jp(b" \x1B(B \x7E" , " \u{007E}" ); |
825 | decode_iso_2022_jp(b" \x1B(B \x0E" , " \u{FFFD}" ); |
826 | decode_iso_2022_jp(b" \x1B(B \x0F" , " \u{FFFD}" ); |
827 | decode_iso_2022_jp(b" \x1B(B \x80" , " \u{FFFD}" ); |
828 | decode_iso_2022_jp(b" \x1B(B \xFF" , " \u{FFFD}" ); |
829 | |
830 | // Roman |
831 | decode_iso_2022_jp(b" \x1B(J \x5B" , " \u{005B}" ); |
832 | decode_iso_2022_jp(b" \x1B(J \x5C" , " \u{00A5}" ); |
833 | decode_iso_2022_jp(b" \x1B(J \x7E" , " \u{203E}" ); |
834 | decode_iso_2022_jp(b" \x1B(J \x0E" , " \u{FFFD}" ); |
835 | decode_iso_2022_jp(b" \x1B(J \x0F" , " \u{FFFD}" ); |
836 | decode_iso_2022_jp(b" \x1B(J \x80" , " \u{FFFD}" ); |
837 | decode_iso_2022_jp(b" \x1B(J \xFF" , " \u{FFFD}" ); |
838 | |
839 | // Katakana |
840 | decode_iso_2022_jp(b" \x1B(I \x20" , " \u{FFFD}" ); |
841 | decode_iso_2022_jp(b" \x1B(I \x21" , " \u{FF61}" ); |
842 | decode_iso_2022_jp(b" \x1B(I \x5F" , " \u{FF9F}" ); |
843 | decode_iso_2022_jp(b" \x1B(I \x60" , " \u{FFFD}" ); |
844 | decode_iso_2022_jp(b" \x1B(I \x0E" , " \u{FFFD}" ); |
845 | decode_iso_2022_jp(b" \x1B(I \x0F" , " \u{FFFD}" ); |
846 | decode_iso_2022_jp(b" \x1B(I \x80" , " \u{FFFD}" ); |
847 | decode_iso_2022_jp(b" \x1B(I \xFF" , " \u{FFFD}" ); |
848 | |
849 | // 0208 differences from 1978 to 1983 |
850 | decode_iso_2022_jp(b" \x1B$@ \x54\x64" , " \u{58FA}" ); |
851 | decode_iso_2022_jp(b" \x1B$@ \x44\x5B" , " \u{58F7}" ); |
852 | decode_iso_2022_jp(b" \x1B$@ \x74\x21" , " \u{582F}" ); |
853 | decode_iso_2022_jp(b" \x1B$@ \x36\x46" , " \u{5C2D}" ); |
854 | decode_iso_2022_jp(b" \x1B$@ \x28\x2E" , " \u{250F}" ); |
855 | decode_iso_2022_jp(b" \x1B$B \x54\x64" , " \u{58FA}" ); |
856 | decode_iso_2022_jp(b" \x1B$B \x44\x5B" , " \u{58F7}" ); |
857 | decode_iso_2022_jp(b" \x1B$B \x74\x21" , " \u{582F}" ); |
858 | decode_iso_2022_jp(b" \x1B$B \x36\x46" , " \u{5C2D}" ); |
859 | decode_iso_2022_jp(b" \x1B$B \x28\x2E" , " \u{250F}" ); |
860 | |
861 | // Broken 0208 |
862 | decode_iso_2022_jp(b" \x1B$B \x28\x41" , " \u{FFFD}" ); |
863 | decode_iso_2022_jp(b" \x1B$@ \x80\x54\x64" , " \u{FFFD}\u{58FA}" ); |
864 | decode_iso_2022_jp(b" \x1B$B \x28\x80" , " \u{FFFD}" ); |
865 | |
866 | if cfg!(miri) { |
867 | // Miri is too slow |
868 | return; |
869 | } |
870 | |
871 | // Transitions |
872 | decode_iso_2022_jp(b" \x1B(B \x5C\x1B(J \x5C" , " \u{005C}\u{00A5}" ); |
873 | decode_iso_2022_jp(b" \x1B(B \x5C\x1B(I \x21" , " \u{005C}\u{FF61}" ); |
874 | decode_iso_2022_jp(b" \x1B(B \x5C\x1B$@ \x54\x64" , " \u{005C}\u{58FA}" ); |
875 | decode_iso_2022_jp(b" \x1B(B \x5C\x1B$B \x54\x64" , " \u{005C}\u{58FA}" ); |
876 | |
877 | decode_iso_2022_jp(b" \x1B(J \x5C\x1B(B \x5C" , " \u{00A5}\u{005C}" ); |
878 | decode_iso_2022_jp(b" \x1B(J \x5C\x1B(I \x21" , " \u{00A5}\u{FF61}" ); |
879 | decode_iso_2022_jp(b" \x1B(J \x5C\x1B$@ \x54\x64" , " \u{00A5}\u{58FA}" ); |
880 | decode_iso_2022_jp(b" \x1B(J \x5C\x1B$B \x54\x64" , " \u{00A5}\u{58FA}" ); |
881 | |
882 | decode_iso_2022_jp(b" \x1B(I \x21\x1B(J \x5C" , " \u{FF61}\u{00A5}" ); |
883 | decode_iso_2022_jp(b" \x1B(I \x21\x1B(B \x5C" , " \u{FF61}\u{005C}" ); |
884 | decode_iso_2022_jp(b" \x1B(I \x21\x1B$@ \x54\x64" , " \u{FF61}\u{58FA}" ); |
885 | decode_iso_2022_jp(b" \x1B(I \x21\x1B$B \x54\x64" , " \u{FF61}\u{58FA}" ); |
886 | |
887 | decode_iso_2022_jp(b" \x1B$@ \x54\x64\x1B(J \x5C" , " \u{58FA}\u{00A5}" ); |
888 | decode_iso_2022_jp(b" \x1B$@ \x54\x64\x1B(I \x21" , " \u{58FA}\u{FF61}" ); |
889 | decode_iso_2022_jp(b" \x1B$@ \x54\x64\x1B(B \x5C" , " \u{58FA}\u{005C}" ); |
890 | decode_iso_2022_jp(b" \x1B$@ \x54\x64\x1B$B \x54\x64" , " \u{58FA}\u{58FA}" ); |
891 | |
892 | decode_iso_2022_jp(b" \x1B$B \x54\x64\x1B(J \x5C" , " \u{58FA}\u{00A5}" ); |
893 | decode_iso_2022_jp(b" \x1B$B \x54\x64\x1B(I \x21" , " \u{58FA}\u{FF61}" ); |
894 | decode_iso_2022_jp(b" \x1B$B \x54\x64\x1B$@ \x54\x64" , " \u{58FA}\u{58FA}" ); |
895 | decode_iso_2022_jp(b" \x1B$B \x54\x64\x1B(B \x5C" , " \u{58FA}\u{005C}" ); |
896 | |
897 | // Empty transitions |
898 | decode_iso_2022_jp(b" \x1B(B \x1B(J" , " \u{FFFD}" ); |
899 | decode_iso_2022_jp(b" \x1B(B \x1B(I" , " \u{FFFD}" ); |
900 | decode_iso_2022_jp(b" \x1B(B \x1B$@" , " \u{FFFD}" ); |
901 | decode_iso_2022_jp(b" \x1B(B \x1B$B" , " \u{FFFD}" ); |
902 | |
903 | decode_iso_2022_jp(b" \x1B(J \x1B(B" , " \u{FFFD}" ); |
904 | decode_iso_2022_jp(b" \x1B(J \x1B(I" , " \u{FFFD}" ); |
905 | decode_iso_2022_jp(b" \x1B(J \x1B$@" , " \u{FFFD}" ); |
906 | decode_iso_2022_jp(b" \x1B(J \x1B$B" , " \u{FFFD}" ); |
907 | |
908 | decode_iso_2022_jp(b" \x1B(I \x1B(J" , " \u{FFFD}" ); |
909 | decode_iso_2022_jp(b" \x1B(I \x1B(B" , " \u{FFFD}" ); |
910 | decode_iso_2022_jp(b" \x1B(I \x1B$@" , " \u{FFFD}" ); |
911 | decode_iso_2022_jp(b" \x1B(I \x1B$B" , " \u{FFFD}" ); |
912 | |
913 | decode_iso_2022_jp(b" \x1B$@ \x1B(J" , " \u{FFFD}" ); |
914 | decode_iso_2022_jp(b" \x1B$@ \x1B(I" , " \u{FFFD}" ); |
915 | decode_iso_2022_jp(b" \x1B$@ \x1B(B" , " \u{FFFD}" ); |
916 | decode_iso_2022_jp(b" \x1B$@ \x1B$B" , " \u{FFFD}" ); |
917 | |
918 | decode_iso_2022_jp(b" \x1B$B \x1B(J" , " \u{FFFD}" ); |
919 | decode_iso_2022_jp(b" \x1B$B \x1B(I" , " \u{FFFD}" ); |
920 | decode_iso_2022_jp(b" \x1B$B \x1B$@" , " \u{FFFD}" ); |
921 | decode_iso_2022_jp(b" \x1B$B \x1B(B" , " \u{FFFD}" ); |
922 | |
923 | // Transitions to self |
924 | decode_iso_2022_jp(b" \x1B(B \x5C\x1B(B \x5C" , " \u{005C}\u{005C}" ); |
925 | decode_iso_2022_jp(b" \x1B(J \x5C\x1B(J \x5C" , " \u{00A5}\u{00A5}" ); |
926 | decode_iso_2022_jp(b" \x1B(I \x21\x1B(I \x21" , " \u{FF61}\u{FF61}" ); |
927 | decode_iso_2022_jp(b" \x1B$@ \x54\x64\x1B$@ \x54\x64" , " \u{58FA}\u{58FA}" ); |
928 | decode_iso_2022_jp(b" \x1B$B \x54\x64\x1B$B \x54\x64" , " \u{58FA}\u{58FA}" ); |
929 | } |
930 | |
931 | #[test ] |
932 | fn test_iso_2022_jp_encode() { |
933 | // Empty |
934 | encode_iso_2022_jp("" , b"" ); |
935 | |
936 | // ASCII |
937 | encode_iso_2022_jp("ab" , b"ab" ); |
938 | encode_iso_2022_jp(" \u{1F4A9}" , b"💩" ); |
939 | encode_iso_2022_jp(" \x1B" , b"�" ); |
940 | encode_iso_2022_jp(" \x0E" , b"�" ); |
941 | encode_iso_2022_jp(" \x0F" , b"�" ); |
942 | |
943 | // Roman |
944 | encode_iso_2022_jp("a \u{00A5}b" , b"a \x1B(J \x5Cb \x1B(B" ); |
945 | encode_iso_2022_jp("a \u{203E}b" , b"a \x1B(J \x7Eb \x1B(B" ); |
946 | if !cfg!(miri) { |
947 | // Miri is too slow |
948 | encode_iso_2022_jp("a \u{00A5}b \x5C" , b"a \x1B(J \x5Cb \x1B(B \x5C" ); |
949 | encode_iso_2022_jp("a \u{203E}b \x7E" , b"a \x1B(J \x7Eb \x1B(B \x7E" ); |
950 | encode_iso_2022_jp(" \u{00A5}\u{1F4A9}" , b" \x1B(J \x5C💩 \x1B(B" ); |
951 | encode_iso_2022_jp(" \u{00A5}\x1B" , b" \x1B(J \x5C� \x1B(B" ); |
952 | encode_iso_2022_jp(" \u{00A5}\x0E" , b" \x1B(J \x5C� \x1B(B" ); |
953 | encode_iso_2022_jp(" \u{00A5}\x0F" , b" \x1B(J \x5C� \x1B(B" ); |
954 | encode_iso_2022_jp(" \u{00A5}\u{58FA}" , b" \x1B(J \x5C\x1B$B \x54\x64\x1B(B" ); |
955 | } |
956 | |
957 | // Half-width Katakana |
958 | encode_iso_2022_jp(" \u{FF61}" , b" \x1B$B \x21\x23\x1B(B" ); |
959 | encode_iso_2022_jp(" \u{FF65}" , b" \x1B$B \x21\x26\x1B(B" ); |
960 | if !cfg!(miri) { |
961 | // Miri is too slow |
962 | encode_iso_2022_jp(" \u{FF66}" , b" \x1B$B \x25\x72\x1B(B" ); |
963 | encode_iso_2022_jp(" \u{FF70}" , b" \x1B$B \x21\x3C\x1B(B" ); |
964 | encode_iso_2022_jp(" \u{FF9D}" , b" \x1B$B \x25\x73\x1B(B" ); |
965 | encode_iso_2022_jp(" \u{FF9E}" , b" \x1B$B \x21\x2B\x1B(B" ); |
966 | encode_iso_2022_jp(" \u{FF9F}" , b" \x1B$B \x21\x2C\x1B(B" ); |
967 | } |
968 | |
969 | // 0208 |
970 | encode_iso_2022_jp(" \u{58FA}" , b" \x1B$B \x54\x64\x1B(B" ); |
971 | encode_iso_2022_jp(" \u{58FA}\u{250F}" , b" \x1B$B \x54\x64\x28\x2E\x1B(B" ); |
972 | if !cfg!(miri) { |
973 | // Miri is too slow |
974 | encode_iso_2022_jp(" \u{58FA}\u{1F4A9}" , b" \x1B$B \x54\x64\x1B(B💩" ); |
975 | encode_iso_2022_jp(" \u{58FA}\x1B" , b" \x1B$B \x54\x64\x1B(B�" ); |
976 | encode_iso_2022_jp(" \u{58FA}\x0E" , b" \x1B$B \x54\x64\x1B(B�" ); |
977 | encode_iso_2022_jp(" \u{58FA}\x0F" , b" \x1B$B \x54\x64\x1B(B�" ); |
978 | encode_iso_2022_jp(" \u{58FA}\u{00A5}" , b" \x1B$B \x54\x64\x1B(J \x5C\x1B(B" ); |
979 | encode_iso_2022_jp(" \u{58FA}a" , b" \x1B$B \x54\x64\x1B(Ba" ); |
980 | } |
981 | } |
982 | |
983 | #[test ] |
984 | #[cfg_attr (miri, ignore)] // Miri is too slow |
985 | fn test_iso_2022_jp_decode_all() { |
986 | let input = include_bytes!("test_data/iso_2022_jp_in.txt" ); |
987 | let expectation = include_str!("test_data/iso_2022_jp_in_ref.txt" ); |
988 | let (cow, had_errors) = ISO_2022_JP.decode_without_bom_handling(input); |
989 | assert!(had_errors, "Should have had errors." ); |
990 | assert_eq!(&cow[..], expectation); |
991 | } |
992 | |
993 | #[test ] |
994 | #[cfg_attr (miri, ignore)] // Miri is too slow |
995 | fn test_iso_2022_jp_encode_all() { |
996 | let input = include_str!("test_data/iso_2022_jp_out.txt" ); |
997 | let expectation = include_bytes!("test_data/iso_2022_jp_out_ref.txt" ); |
998 | let (cow, encoding, had_errors) = ISO_2022_JP.encode(input); |
999 | assert!(!had_errors, "Should not have had errors." ); |
1000 | assert_eq!(encoding, ISO_2022_JP); |
1001 | assert_eq!(&cow[..], &expectation[..]); |
1002 | } |
1003 | |
1004 | #[test ] |
1005 | fn test_iso_2022_jp_half_width_katakana_length() { |
1006 | let mut output = [0u8; 20]; |
1007 | let mut decoder = ISO_2022_JP.new_decoder(); |
1008 | { |
1009 | let (result, read, written) = |
1010 | decoder.decode_to_utf8_without_replacement(b" \x1B\x28\x49" , &mut output, false); |
1011 | assert_eq!(result, DecoderResult::InputEmpty); |
1012 | assert_eq!(read, 3); |
1013 | assert_eq!(written, 0); |
1014 | } |
1015 | { |
1016 | let needed = decoder |
1017 | .max_utf8_buffer_length_without_replacement(1) |
1018 | .unwrap(); |
1019 | let (result, read, written) = |
1020 | decoder.decode_to_utf8_without_replacement(b" \x21" , &mut output[..needed], true); |
1021 | assert_eq!(result, DecoderResult::InputEmpty); |
1022 | assert_eq!(read, 1); |
1023 | assert_eq!(written, 3); |
1024 | assert_eq!(output[0], 0xEF); |
1025 | assert_eq!(output[1], 0xBD); |
1026 | assert_eq!(output[2], 0xA1); |
1027 | } |
1028 | } |
1029 | |
1030 | #[test ] |
1031 | fn test_iso_2022_jp_length_after_escape() { |
1032 | let mut output = [0u16; 20]; |
1033 | let mut decoder = ISO_2022_JP.new_decoder(); |
1034 | { |
1035 | let (result, read, written, had_errors) = |
1036 | decoder.decode_to_utf16(b" \x1B" , &mut output, false); |
1037 | assert_eq!(result, CoderResult::InputEmpty); |
1038 | assert_eq!(read, 1); |
1039 | assert_eq!(written, 0); |
1040 | assert!(!had_errors); |
1041 | } |
1042 | { |
1043 | let needed = decoder.max_utf16_buffer_length(1).unwrap(); |
1044 | let (result, read, written, had_errors) = |
1045 | decoder.decode_to_utf16(b"A" , &mut output[..needed], true); |
1046 | assert_eq!(result, CoderResult::InputEmpty); |
1047 | assert_eq!(read, 1); |
1048 | assert_eq!(written, 2); |
1049 | assert!(had_errors); |
1050 | assert_eq!(output[0], 0xFFFD); |
1051 | assert_eq!(output[1], 0x0041); |
1052 | } |
1053 | } |
1054 | |
1055 | #[test ] |
1056 | fn test_iso_2022_jp_encode_from_two_low_surrogates() { |
1057 | let expectation = b"��" ; |
1058 | let mut output = [0u8; 40]; |
1059 | let mut encoder = ISO_2022_JP.new_encoder(); |
1060 | let (result, read, written, had_errors) = |
1061 | encoder.encode_from_utf16(&[0xDC00u16, 0xDEDEu16], &mut output[..], true); |
1062 | assert_eq!(result, CoderResult::InputEmpty); |
1063 | assert_eq!(read, 2); |
1064 | assert_eq!(written, expectation.len()); |
1065 | assert!(had_errors); |
1066 | assert_eq!(&output[..written], expectation); |
1067 | } |
1068 | } |
1069 | |