lib.rs source code [crates/encoding_rs-0.8.32/src/lib.rs]

1	// Copyright Mozilla Foundation. See the COPYRIGHT
2	// file at the top-level directory of this distribution.
3	//
4	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5	// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6	// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7	// option. This file may not be copied, modified, or distributed
8	// except according to those terms.
9
10	#![cfg_attr(
11	feature = "cargo-clippy",
12	allow(doc_markdown, inline_always, new_ret_no_self)
13	)]
14
15	//! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
16	//! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
17	//! Gecko-oriented means that converting to and from UTF-16 is supported in
18	//! addition to converting to and from UTF-8, that the performance and
19	//! streamability goals are browser-oriented, and that FFI-friendliness is a
20	//! goal.
21	//!
22	//! Additionally, the `mem` module provides functions that are useful for
23	//! applications that need to be able to deal with legacy in-memory
24	//! representations of Unicode.
25	//!
26	//! For expectation setting, please be sure to read the sections
27	//! [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes),
28	//! [_ISO-8859-1_](#iso-8859-1) and [_Web / Browser Focus_](#web--browser-focus) below.
29	//!
30	//! There is a [long-form write-up](https://hsivonen.fi/encoding_rs/) about the
31	//! design and internals of the crate.
32	//!
33	//! # Availability
34	//!
35	//! The code is available under the
36	//! [Apache license, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0)
37	//! or the [MIT license](https://opensource.org/licenses/MIT), at your option.
38	//! See the
39	//! [`COPYRIGHT`](https://github.com/hsivonen/encoding_rs/blob/master/COPYRIGHT)
40	//! file for details.
41	//! The [repository is on GitHub](https://github.com/hsivonen/encoding_rs). The
42	//! [crate is available on crates.io](https://crates.io/crates/encoding_rs).
43	//!
44	//! # Integration with `std::io`
45	//!
46	//! This crate doesn't implement traits from `std::io`. However, for the case of
47	//! wrapping a `std::io::Read` in a decoder that implements `std::io::Read` and
48	//! presents the data from the wrapped `std::io::Read` as UTF-8 is addressed by
49	//! the [`encoding_rs_io`](https://docs.rs/encoding_rs_io/) crate.
50	//!
51	//! # Examples
52	//!
53	//! Example programs:
54	//!
55	//! [Rust](https://github.com/hsivonen/recode_rs)*
56	//! [C](https://github.com/hsivonen/recode_c)*
57	//! [C++](https://github.com/hsivonen/recode_cpp)*
58	//!
59	//! Decode using the non-streaming API:
60	//!
61	//! ```
62	//! #[cfg(feature = "alloc")] {
63	//! use encoding_rs::*;
64	//!
65	//! let expectation = "`\u{30CF}\u{30ED}\u{30FC}\u{30FB}\u{30EF}\u{30FC}\u{30EB}\u{30C9}`";
66	//! let bytes = b"`\x83`n`\x83\x8D\x81`[`\x81`E`\x83\x8F\x81`[`\x83\x8B\x83`h";
67	//!
68	//! let (cow, encoding_used, had_errors) = SHIFT_JIS.decode(bytes);
69	//! assert_eq!(&cow[..], expectation);
70	//! assert_eq!(encoding_used, SHIFT_JIS);
71	//! assert!(!had_errors);
72	//! }
73	//! ```
74	//!
75	//! Decode using the streaming API with minimal `unsafe`:
76	//!
77	//! ```
78	//! use encoding_rs::*;
79	//!
80	//! let expectation = "`\u{30CF}\u{30ED}\u{30FC}\u{30FB}\u{30EF}\u{30FC}\u{30EB}\u{30C9}`";
81	//!
82	//! // Use an array of byte slices to demonstrate content arriving piece by
83	//! // piece from the network.
84	//! let bytes: [&'static [u8]; `4`] = [b"`\x83`",
85	//! b"n`\x83\x8D\x81`",
86	//! b"[`\x81`E`\x83\x8F\x81`[`\x83`",
87	//! b"`\x8B\x83`h"];
88	//!
89	//! // Very short output buffer to demonstrate the output buffer getting full.
90	//! // Normally, you'd use something like `[0u8; 2048]`.
91	//! let mut buffer_bytes = [`0u8`; `8`];
92	//! let mut buffer: &mut str = std::str::from_utf8_mut(&mut buffer_bytes[..]).unwrap();
93	//!
94	//! // How many bytes in the buffer currently hold significant data.
95	//! let mut bytes_in_buffer = `0usize`;
96	//!
97	//! // Collect the output to a string for demonstration purposes.
98	//! let mut output = String::new();
99	//!
100	//! // The `Decoder`
101	//! let mut decoder = SHIFT_JIS.new_decoder();
102	//!
103	//! // Track whether we see errors.
104	//! let mut total_had_errors = `false`;
105	//!
106	//! // Decode using a fixed-size intermediate buffer (for demonstrating the
107	//! // use of a fixed-size buffer; normally when the output of an incremental
108	//! // decode goes to a `String` one would use `Decoder.decode_to_string()` to
109	//! // avoid the intermediate buffer).
110	//! for input in &bytes[..] {
111	//! // The number of bytes already read from current `input` in total.
112	//! let mut total_read_from_current_input = `0usize`;
113	//!
114	//! loop {
115	//! let (result, read, written, had_errors) =
116	//! decoder.decode_to_str(&input[total_read_from_current_input..],
117	//! &mut buffer[bytes_in_buffer..],
118	//! `false`);
119	//! total_read_from_current_input += read;
120	//! bytes_in_buffer += written;
121	//! total_had_errors \|= had_errors;
122	//! match result {
123	//! CoderResult::InputEmpty => {
124	//! // We have consumed the current input buffer. Break out of
125	//! // the inner loop to get the next input buffer from the
126	//! // outer loop.
127	//! break;
128	//! },
129	//! CoderResult::OutputFull => {
130	//! // Write the current buffer out and consider the buffer
131	//! // empty.
132	//! output.push_str(&buffer[..bytes_in_buffer]);
133	//! bytes_in_buffer = `0usize`;
134	//! continue;
135	//! }
136	//! }
137	//! }
138	//! }
139	//!
140	//! // Process EOF
141	//! loop {
142	//! let (result, _, written, had_errors) =
143	//! decoder.decode_to_str(b"",
144	//! &mut buffer[bytes_in_buffer..],
145	//! `true`);
146	//! bytes_in_buffer += written;
147	//! total_had_errors \|= had_errors;
148	//! // Write the current buffer out and consider the buffer empty.
149	//! // Need to do this here for both `match` arms, because we exit the
150	//! // loop on `CoderResult::InputEmpty`.
151	//! output.push_str(&buffer[..bytes_in_buffer]);
152	//! bytes_in_buffer = `0usize`;
153	//! match result {
154	//! CoderResult::InputEmpty => {
155	//! // Done!
156	//! break;
157	//! },
158	//! CoderResult::OutputFull => {
159	//! continue;
160	//! }
161	//! }
162	//! }
163	//!
164	//! assert_eq!(&output[..], expectation);
165	//! assert!(!total_had_errors);
166	//! ```
167	//!
168	//! ## UTF-16LE, UTF-16BE and Unicode Encoding Schemes
169	//!
170	//! The Encoding Standard doesn't specify encoders for UTF-16LE and UTF-16BE,
171	//! __so this crate does not provide encoders for those encodings__!
172	//! Along with the replacement encoding, their _output encoding_ (i.e. the
173	//! encoding used for form submission and error handling in the query string
174	//! of URLs) is UTF-8, so you get an UTF-8 encoder if you request an encoder
175	//! for them.
176	//!
177	//! Additionally, the Encoding Standard factors BOM handling into wrapper
178	//! algorithms so that BOM handling isn't part of the definition of the
179	//! encodings themselves. The Unicode _encoding schemes_ in the Unicode
180	//! Standard define BOM handling or lack thereof as part of the encoding
181	//! scheme.
182	//!
183	//! When used with the `_without_bom_handling` entry points, the UTF-16LE
184	//! and UTF-16BE _encodings_ match the same-named _encoding schemes_ from
185	//! the Unicode Standard.
186	//!
187	//! When used with the `_with_bom_removal` entry points, the UTF-8
188	//! _encoding_ matches the UTF-8 _encoding scheme_ from the Unicode
189	//! Standard.
190	//!
191	//! This crate does not provide a mode that matches the UTF-16 _encoding
192	//! scheme_ from the Unicode Stardard. The UTF-16BE encoding used with
193	//! the entry points without `_bom_` qualifiers is the closest match,
194	//! but in that case, the UTF-8 BOM triggers UTF-8 decoding, which is
195	//! not part of the behavior of the UTF-16 _encoding scheme_ per the
196	//! Unicode Standard.
197	//!
198	//! The UTF-32 family of Unicode encoding schemes is not supported
199	//! by this crate. The Encoding Standard doesn't define any UTF-32
200	//! family encodings, since they aren't necessary for consuming Web
201	//! content.
202	//!
203	//! While gb18030 is capable of representing U+FEFF, the Encoding
204	//! Standard does not treat the gb18030 byte representation of U+FEFF
205	//! as a BOM, so neither does this crate.
206	//!
207	//! ## ISO-8859-1
208	//!
209	//! ISO-8859-1 does not exist as a distinct encoding from windows-1252 in
210	//! the Encoding Standard. Therefore, an encoding that maps the unsigned
211	//! byte value to the same Unicode scalar value is not available via
212	//! `Encoding` in this crate.
213	//!
214	//! However, the functions whose name starts with `convert` and contains
215	//! `latin1` in the `mem` module support such conversions, which are known as
216	//! [_isomorphic decode_](https://infra.spec.whatwg.org/#isomorphic-decode)
217	//! and [_isomorphic encode_](https://infra.spec.whatwg.org/#isomorphic-encode)
218	//! in the [Infra Standard](https://infra.spec.whatwg.org/).
219	//!
220	//! ## Web / Browser Focus
221	//!
222	//! Both in terms of scope and performance, the focus is on the Web. For scope,
223	//! this means that encoding_rs implements the Encoding Standard fully and
224	//! doesn't implement encodings that are not specified in the Encoding
225	//! Standard. For performance, this means that decoding performance is
226	//! important as well as performance for encoding into UTF-8 or encoding the
227	//! Basic Latin range (ASCII) into legacy encodings. Non-Basic Latin needs to
228	//! be encoded into legacy encodings in only two places in the Web platform: in
229	//! the query part of URLs, in which case it's a matter of relatively rare
230	//! error handling, and in form submission, in which case the user action and
231	//! networking tend to hide the performance of the encoder.
232	//!
233	//! Deemphasizing performance of encoding non-Basic Latin text into legacy
234	//! encodings enables smaller code size thanks to the encoder side using the
235	//! decode-optimized data tables without having encode-optimized data tables at
236	//! all. Even in decoders, smaller lookup table size is preferred over avoiding
237	//! multiplication operations.
238	//!
239	//! Additionally, performance is a non-goal for the ASCII-incompatible
240	//! ISO-2022-JP encoding, which are rarely used on the Web. Instead of
241	//! performance, the decoder for ISO-2022-JP optimizes for ease/clarity
242	//! of implementation.
243	//!
244	//! Despite the browser focus, the hope is that non-browser applications
245	//! that wish to consume Web content or submit Web forms in a Web-compatible
246	//! way will find encoding_rs useful. While encoding_rs does not try to match
247	//! Windows behavior, many of the encodings are close enough to legacy
248	//! encodings implemented by Windows that applications that need to consume
249	//! data in legacy Windows encodins may find encoding_rs useful. The
250	//! [codepage](https://crates.io/crates/codepage) crate maps from Windows
251	//! code page identifiers onto encoding_rs `Encoding`s and vice versa.
252	//!
253	//! For decoding email, UTF-7 support is needed (unfortunately) in additition
254	//! to the encodings defined in the Encoding Standard. The
255	//! [charset](https://crates.io/crates/charset) wraps encoding_rs and adds
256	//! UTF-7 decoding for email purposes.
257	//!
258	//! For single-byte DOS encodings beyond the ones supported by the Encoding
259	//! Standard, there is the [`oem_cp`](https://crates.io/crates/oem_cp) crate.
260	//!
261	//! # Preparing Text for the Encoders
262	//!
263	//! Normalizing text into Unicode Normalization Form C prior to encoding text
264	//! into a legacy encoding minimizes unmappable characters. Text can be
265	//! normalized to Unicode Normalization Form C using the
266	//! [`icu_normalizer`](https://crates.io/crates/icu_normalizer) crate, which
267	//! is part of [ICU4X](https://icu4x.unicode.org/).
268	//!
269	//! The exception is windows-1258, which after normalizing to Unicode
270	//! Normalization Form C requires tone marks to be decomposed in order to
271	//! minimize unmappable characters. Vietnamese tone marks can be decomposed
272	//! using the [`detone`](https://crates.io/crates/detone) crate.
273	//!
274	//! # Streaming & Non-Streaming; Rust & C/C++
275	//!
276	//! The API in Rust has two modes of operation: streaming and non-streaming.
277	//! The streaming API is the foundation of the implementation and should be
278	//! used when processing data that arrives piecemeal from an i/o stream. The
279	//! streaming API has an FFI wrapper (as a [separate crate][1]) that exposes it
280	//! to C callers. The non-streaming part of the API is for Rust callers only and
281	//! is smart about borrowing instead of copying when possible. When
282	//! streamability is not needed, the non-streaming API should be preferrer in
283	//! order to avoid copying data when a borrow suffices.
284	//!
285	//! There is no analogous C API exposed via FFI, mainly because C doesn't have
286	//! standard types for growable byte buffers and Unicode strings that know
287	//! their length.
288	//!
289	//! The C API (header file generated at `target/include/encoding_rs.h` when
290	//! building encoding_rs) can, in turn, be wrapped for use from C++. Such a
291	//! C++ wrapper can re-create the non-streaming API in C++ for C++ callers.
292	//! The C binding comes with a [C++17 wrapper][2] that uses standard library +
293	//! [GSL][3] types and that recreates the non-streaming API in C++ on top of
294	//! the streaming API. A C++ wrapper with XPCOM/MFBT types is available as
295	//! [`mozilla::Encoding`][4].
296	//!
297	//! The `Encoding` type is common to both the streaming and non-streaming
298	//! modes. In the streaming mode, decoding operations are performed with a
299	//! `Decoder` and encoding operations with an `Encoder` object obtained via
300	//! `Encoding`. In the non-streaming mode, decoding and encoding operations are
301	//! performed using methods on `Encoding` objects themselves, so the `Decoder`
302	//! and `Encoder` objects are not used at all.
303	//!
304	//! [1]: https://github.com/hsivonen/encoding_c
305	//! [2]: https://github.com/hsivonen/encoding_c/blob/master/include/encoding_rs_cpp.h
306	//! [3]: https://github.com/Microsoft/GSL/
307	//! [4]: https://searchfox.org/mozilla-central/source/intl/Encoding.h
308	//!
309	//! # Memory management
310	//!
311	//! The non-streaming mode never performs heap allocations (even the methods
312	//! that write into a `Vec<u8>` or a `String` by taking them as arguments do
313	//! not reallocate the backing buffer of the `Vec<u8>` or the `String`). That
314	//! is, the non-streaming mode uses caller-allocated buffers exclusively.
315	//!
316	//! The methods of the streaming mode that return a `Vec<u8>` or a `String`
317	//! perform heap allocations but only to allocate the backing buffer of the
318	//! `Vec<u8>` or the `String`.
319	//!
320	//! `Encoding` is always statically allocated. `Decoder` and `Encoder` need no
321	//! `Drop` cleanup.
322	//!
323	//! # Buffer reading and writing behavior
324	//!
325	//! Based on experience gained with the `java.nio.charset` encoding converter
326	//! API and with the Gecko uconv encoding converter API, the buffer reading
327	//! and writing behaviors of encoding_rs are asymmetric: input buffers are
328	//! fully drained but output buffers are not always fully filled.
329	//!
330	//! When reading from an input buffer, encoding_rs always consumes all input
331	//! up to the next error or to the end of the buffer. In particular, when
332	//! decoding, even if the input buffer ends in the middle of a byte sequence
333	//! for a character, the decoder consumes all input. This has the benefit that
334	//! the caller of the API can always fill the next buffer from the start from
335	//! whatever source the bytes come from and never has to first copy the last
336	//! bytes of the previous buffer to the start of the next buffer. However, when
337	//! encoding, the UTF-8 input buffers have to end at a character boundary, which
338	//! is a requirement for the Rust `str` type anyway, and UTF-16 input buffer
339	//! boundaries falling in the middle of a surrogate pair result in both
340	//! suggorates being treated individually as unpaired surrogates.
341	//!
342	//! Additionally, decoders guarantee that they can be fed even one byte at a
343	//! time and encoders guarantee that they can be fed even one code point at a
344	//! time. This has the benefit of not placing restrictions on the size of
345	//! chunks the content arrives e.g. from network.
346	//!
347	//! When writing into an output buffer, encoding_rs makes sure that the code
348	//! unit sequence for a character is never split across output buffer
349	//! boundaries. This may result in wasted space at the end of an output buffer,
350	//! but the advantages are that the output side of both decoders and encoders
351	//! is greatly simplified compared to designs that attempt to fill output
352	//! buffers exactly even when that entails splitting a code unit sequence and
353	//! when encoding_rs methods return to the caller, the output produces thus
354	//! far is always valid taken as whole. (In the case of encoding to ISO-2022-JP,
355	//! the output needs to be considered as a whole, because the latest output
356	//! buffer taken alone might not be valid taken alone if the transition away
357	//! from the ASCII state occurred in an earlier output buffer. However, since
358	//! the ISO-2022-JP decoder doesn't treat streams that don't end in the ASCII
359	//! state as being in error despite the encoder generating a transition to the
360	//! ASCII state at the end, the claim about the partial output taken as a whole
361	//! being valid is true even for ISO-2022-JP.)
362	//!
363	//! # Error Reporting
364	//!
365	//! Based on experience gained with the `java.nio.charset` encoding converter
366	//! API and with the Gecko uconv encoding converter API, the error reporting
367	//! behaviors of encoding_rs are asymmetric: decoder errors include offsets
368	//! that leave it up to the caller to extract the erroneous bytes from the
369	//! input stream if the caller wishes to do so but encoder errors provide the
370	//! code point associated with the error without requiring the caller to
371	//! extract it from the input on its own.
372	//!
373	//! On the encoder side, an error is always triggered by the most recently
374	//! pushed Unicode scalar, which makes it simple to pass the `char` to the
375	//! caller. Also, it's very typical for the caller to wish to do something with
376	//! this data: generate a numeric escape for the character. Additionally, the
377	//! ISO-2022-JP encoder reports U+FFFD instead of the actual input character in
378	//! certain cases, so requiring the caller to extract the character from the
379	//! input buffer would require the caller to handle ISO-2022-JP details.
380	//! Furthermore, requiring the caller to extract the character from the input
381	//! buffer would require the caller to implement UTF-8 or UTF-16 math, which is
382	//! the job of an encoding conversion library.
383	//!
384	//! On the decoder side, errors are triggered in more complex ways. For
385	//! example, when decoding the sequence ESC, '$', _buffer boundary_, 'A' as
386	//! ISO-2022-JP, the ESC byte is in error, but this is discovered only after
387	//! the buffer boundary when processing 'A'. Thus, the bytes in error might not
388	//! be the ones most recently pushed to the decoder and the error might not even
389	//! be in the current buffer.
390	//!
391	//! Some encoding conversion APIs address the problem by not acknowledging
392	//! trailing bytes of an input buffer as consumed if it's still possible for
393	//! future bytes to cause the trailing bytes to be in error. This way, error
394	//! reporting can always refer to the most recently pushed buffer. This has the
395	//! problem that the caller of the API has to copy the unconsumed trailing
396	//! bytes to the start of the next buffer before being able to fill the rest
397	//! of the next buffer. This is annoying, error-prone and inefficient.
398	//!
399	//! A possible solution would be making the decoder remember recently consumed
400	//! bytes in order to be able to include a copy of the erroneous bytes when
401	//! reporting an error. This has two problem: First, callers a rarely
402	//! interested in the erroneous bytes, so attempts to identify them are most
403	//! often just overhead anyway. Second, the rare applications that are
404	//! interested typically care about the location of the error in the input
405	//! stream.
406	//!
407	//! To keep the API convenient for common uses and the overhead low while making
408	//! it possible to develop applications, such as HTML validators, that care
409	//! about which bytes were in error, encoding_rs reports the length of the
410	//! erroneous sequence and the number of bytes consumed after the erroneous
411	//! sequence. As long as the caller doesn't discard the 6 most recent bytes,
412	//! this makes it possible for callers that care about the erroneous bytes to
413	//! locate them.
414	//!
415	//! # No Convenience API for Custom Replacements
416	//!
417	//! The Web Platform and, therefore, the Encoding Standard supports only one
418	//! error recovery mode for decoders and only one error recovery mode for
419	//! encoders. The supported error recovery mode for decoders is emitting the
420	//! REPLACEMENT CHARACTER on error. The supported error recovery mode for
421	//! encoders is emitting an HTML decimal numeric character reference for
422	//! unmappable characters.
423	//!
424	//! Since encoding_rs is Web-focused, these are the only error recovery modes
425	//! for which convenient support is provided. Moreover, on the decoder side,
426	//! there aren't really good alternatives for emitting the REPLACEMENT CHARACTER
427	//! on error (other than treating errors as fatal). In particular, simply
428	//! ignoring errors is a
429	//! [security problem](http://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences),
430	//! so it would be a bad idea for encoding_rs to provide a mode that encouraged
431	//! callers to ignore errors.
432	//!
433	//! On the encoder side, there are plausible alternatives for HTML decimal
434	//! numeric character references. For example, when outputting CSS, CSS-style
435	//! escapes would seem to make sense. However, instead of facilitating the
436	//! output of CSS, JS, etc. in non-UTF-8 encodings, encoding_rs takes the design
437	//! position that you shouldn't generate output in encodings other than UTF-8,
438	//! except where backward compatibility with interacting with the legacy Web
439	//! requires it. The legacy Web requires it only when parsing the query strings
440	//! of URLs and when submitting forms, and those two both use HTML decimal
441	//! numeric character references.
442	//!
443	//! While encoding_rs doesn't make encoder replacements other than HTML decimal
444	//! numeric character references easy, it does make them _possible_.
445	//! `encode_from_utf8()`, which emits HTML decimal numeric character references
446	//! for unmappable characters, is implemented on top of
447	//! `encode_from_utf8_without_replacement()`. Applications that really, really
448	//! want other replacement schemes for unmappable characters can likewise
449	//! implement them on top of `encode_from_utf8_without_replacement()`.
450	//!
451	//! # No Extensibility by Design
452	//!
453	//! The set of encodings supported by encoding_rs is not extensible by design.
454	//! That is, `Encoding`, `Decoder` and `Encoder` are intentionally `struct`s
455	//! rather than `trait`s. encoding_rs takes the design position that all future
456	//! text interchange should be done using UTF-8, which can represent all of
457	//! Unicode. (It is, in fact, the only encoding supported by the Encoding
458	//! Standard and encoding_rs that can represent all of Unicode and that has
459	//! encoder support. UTF-16LE and UTF-16BE don't have encoder support, and
460	//! gb18030 cannot encode U+E5E5.) The other encodings are supported merely for
461	//! legacy compatibility and not due to non-UTF-8 encodings having benefits
462	//! other than being able to consume legacy content.
463	//!
464	//! Considering that UTF-8 can represent all of Unicode and is already supported
465	//! by all Web browsers, introducing a new encoding wouldn't add to the
466	//! expressiveness but would add to compatibility problems. In that sense,
467	//! adding new encodings to the Web Platform doesn't make sense, and, in fact,
468	//! post-UTF-8 attempts at encodings, such as BOCU-1, have been rejected from
469	//! the Web Platform. On the other hand, the set of legacy encodings that must
470	//! be supported for a Web browser to be able to be successful is not going to
471	//! expand. Empirically, the set of encodings specified in the Encoding Standard
472	//! is already sufficient and the set of legacy encodings won't grow
473	//! retroactively.
474	//!
475	//! Since extensibility doesn't make sense considering the Web focus of
476	//! encoding_rs and adding encodings to Web clients would be actively harmful,
477	//! it makes sense to make the set of encodings that encoding_rs supports
478	//! non-extensible and to take the (admittedly small) benefits arising from
479	//! that, such as the size of `Decoder` and `Encoder` objects being known ahead
480	//! of time, which enables stack allocation thereof.
481	//!
482	//! This does have downsides for applications that might want to put encoding_rs
483	//! to non-Web uses if those non-Web uses involve legacy encodings that aren't
484	//! needed for Web uses. The needs of such applications should not complicate
485	//! encoding_rs itself, though. It is up to those applications to provide a
486	//! framework that delegates the operations with encodings that encoding_rs
487	//! supports to encoding_rs and operations with other encodings to something
488	//! else (as opposed to encoding_rs itself providing an extensibility
489	//! framework).
490	//!
491	//! # Panics
492	//!
493	//! Methods in encoding_rs can panic if the API is used against the requirements
494	//! stated in the documentation, if a state that's supposed to be impossible
495	//! is reached due to an internal bug or on integer overflow. When used
496	//! according to documentation with buffer sizes that stay below integer
497	//! overflow, in the absence of internal bugs, encoding_rs does not panic.
498	//!
499	//! Panics arising from API misuse aren't documented beyond this on individual
500	//! methods.
501	//!
502	//! # At-Risk Parts of the API
503	//!
504	//! The foreseeable source of partially backward-incompatible API change is the
505	//! way the instances of `Encoding` are made available.
506	//!
507	//! If Rust changes to allow the entries of `[&'static Encoding; N]` to be
508	//! initialized with `static`s of type `&'static Encoding`, the non-reference
509	//! `FOO_INIT` public `Encoding` instances will be removed from the public API.
510	//!
511	//! If Rust changes to make the referent of `pub const FOO: &'static Encoding`
512	//! unique when the constant is used in different crates, the reference-typed
513	//! `static`s for the encoding instances will be changed from `static` to
514	//! `const` and the non-reference-typed `_INIT` instances will be removed.
515	//!
516	//! # Mapping Spec Concepts onto the API
517	//!
518	//! <table>
519	//! <thead>
520	//! <tr><th>Spec Concept</th><th>Streaming</th><th>Non-Streaming</th></tr>
521	//! </thead>
522	//! <tbody>
523	//! <tr><td><a href="https://encoding.spec.whatwg.org/#encoding">encoding</a></td><td><code>&'static Encoding</code></td><td><code>&'static Encoding</code></td></tr>
524	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8">UTF-8 encoding</a></td><td><code>UTF_8</code></td><td><code>UTF_8</code></td></tr>
525	//! <tr><td><a href="https://encoding.spec.whatwg.org/#concept-encoding-get">get an encoding</a></td><td><code>Encoding::for_label(<var>label</var>)</code></td><td><code>Encoding::for_label(<var>label</var>)</code></td></tr>
526	//! <tr><td><a href="https://encoding.spec.whatwg.org/#name">name</a></td><td><code><var>encoding</var>.name()</code></td><td><code><var>encoding</var>.name()</code></td></tr>
527	//! <tr><td><a href="https://encoding.spec.whatwg.org/#get-an-output-encoding">get an output encoding</a></td><td><code><var>encoding</var>.output_encoding()</code></td><td><code><var>encoding</var>.output_encoding()</code></td></tr>
528	//! <tr><td><a href="https://encoding.spec.whatwg.org/#decode">decode</a></td><td><code>let d = <var>encoding</var>.new_decoder();<br>let res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, false);<br>// …</br>let last_res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, true);</code></td><td><code><var>encoding</var>.decode(<var>src</var>)</code></td></tr>
529	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8-decode">UTF-8 decode</a></td><td><code>let d = UTF_8.new_decoder_with_bom_removal();<br>let res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, false);<br>// …</br>let last_res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, true);</code></td><td><code>UTF_8.decode_with_bom_removal(<var>src</var>)</code></td></tr>
530	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8-decode-without-bom">UTF-8 decode without BOM</a></td><td><code>let d = UTF_8.new_decoder_without_bom_handling();<br>let res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, false);<br>// …</br>let last_res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, true);</code></td><td><code>UTF_8.decode_without_bom_handling(<var>src</var>)</code></td></tr>
531	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8-decode-without-bom-or-fail">UTF-8 decode without BOM or fail</a></td><td><code>let d = UTF_8.new_decoder_without_bom_handling();<br>let res = d.decode_to_<var></var>_without_replacement(<var>src</var>, <var>dst</var>, false);<br>// … (fail if malformed)</br>let last_res = d.decode_to_<var></var>_without_replacement(<var>src</var>, <var>dst</var>, true);<br>// (fail if malformed)</code></td><td><code>UTF_8.decode_without_bom_handling_and_without_replacement(<var>src</var>)</code></td></tr>
532	//! <tr><td><a href="https://encoding.spec.whatwg.org/#encode">encode</a></td><td><code>let e = <var>encoding</var>.new_encoder();<br>let res = e.encode_to_<var></var>(<var>src</var>, <var>dst</var>, false);<br>// …</br>let last_res = e.encode_to_<var></var>(<var>src</var>, <var>dst</var>, true);</code></td><td><code><var>encoding</var>.encode(<var>src</var>)</code></td></tr>
533	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8-encode">UTF-8 encode</a></td><td>Use the UTF-8 nature of Rust strings directly:<br><code><var>write</var>(<var>src</var>.as_bytes());<br>// refill src<br><var>write</var>(<var>src</var>.as_bytes());<br>// refill src<br><var>write</var>(<var>src</var>.as_bytes());<br>// …</code></td><td>Use the UTF-8 nature of Rust strings directly:<br><code><var>src</var>.as_bytes()</code></td></tr>
534	//! </tbody>
535	//! </table>
536	//!
537	//! # Compatibility with the rust-encoding API
538	//!
539	//! The crate
540	//! [encoding_rs_compat](https://github.com/hsivonen/encoding_rs_compat/)
541	//! is a drop-in replacement for rust-encoding 0.2.32 that implements (most of)
542	//! the API of rust-encoding 0.2.32 on top of encoding_rs.
543	//!
544	//! # Mapping rust-encoding concepts to encoding_rs concepts
545	//!
546	//! The following table provides a mapping from rust-encoding constructs to
547	//! encoding_rs ones.
548	//!
549	//! <table>
550	//! <thead>
551	//! <tr><th>rust-encoding</th><th>encoding_rs</th></tr>
552	//! </thead>
553	//! <tbody>
554	//! <tr><td><code>encoding::EncodingRef</code></td><td><code>&'static encoding_rs::Encoding</code></td></tr>
555	//! <tr><td><code>encoding::all::<var>WINDOWS_31J</var></code> (not based on the WHATWG name for some encodings)</td><td><code>encoding_rs::<var>SHIFT_JIS</var></code> (always the WHATWG name uppercased and hyphens replaced with underscores)</td></tr>
556	//! <tr><td><code>encoding::all::ERROR</code></td><td>Not available because not in the Encoding Standard</td></tr>
557	//! <tr><td><code>encoding::all::ASCII</code></td><td>Not available because not in the Encoding Standard</td></tr>
558	//! <tr><td><code>encoding::all::ISO_8859_1</code></td><td>Not available because not in the Encoding Standard</td></tr>
559	//! <tr><td><code>encoding::all::HZ</code></td><td>Not available because not in the Encoding Standard</td></tr>
560	//! <tr><td><code>encoding::label::encoding_from_whatwg_label(<var>string</var>)</code></td><td><code>encoding_rs::Encoding::for_label(<var>string</var>)</code></td></tr>
561	//! <tr><td><code><var>enc</var>.whatwg_name()</code> (always lower case)</td><td><code><var>enc</var>.name()</code> (potentially mixed case)</td></tr>
562	//! <tr><td><code><var>enc</var>.name()</code></td><td>Not available because not in the Encoding Standard</td></tr>
563	//! <tr><td><code>encoding::decode(<var>bytes</var>, encoding::DecoderTrap::Replace, <var>enc</var>)</code></td><td><code><var>enc</var>.decode(<var>bytes</var>)</code></td></tr>
564	//! <tr><td><code><var>enc</var>.decode(<var>bytes</var>, encoding::DecoderTrap::Replace)</code></td><td><code><var>enc</var>.decode_without_bom_handling(<var>bytes</var>)</code></td></tr>
565	//! <tr><td><code><var>enc</var>.encode(<var>string</var>, encoding::EncoderTrap::NcrEscape)</code></td><td><code><var>enc</var>.encode(<var>string</var>)</code></td></tr>
566	//! <tr><td><code><var>enc</var>.raw_decoder()</code></td><td><code><var>enc</var>.new_decoder_without_bom_handling()</code></td></tr>
567	//! <tr><td><code><var>enc</var>.raw_encoder()</code></td><td><code><var>enc</var>.new_encoder()</code></td></tr>
568	//! <tr><td><code>encoding::RawDecoder</code></td><td><code>encoding_rs::Decoder</code></td></tr>
569	//! <tr><td><code>encoding::RawEncoder</code></td><td><code>encoding_rs::Encoder</code></td></tr>
570	//! <tr><td><code><var>raw_decoder</var>.raw_feed(<var>src</var>, <var>dst_string</var>)</code></td><td><code><var>dst_string</var>.reserve(<var>decoder</var>.max_utf8_buffer_length_without_replacement(<var>src</var>.len()));<br><var>decoder</var>.decode_to_string_without_replacement(<var>src</var>, <var>dst_string</var>, false)</code></td></tr>
571	//! <tr><td><code><var>raw_encoder</var>.raw_feed(<var>src</var>, <var>dst_vec</var>)</code></td><td><code><var>dst_vec</var>.reserve(<var>encoder</var>.max_buffer_length_from_utf8_without_replacement(<var>src</var>.len()));<br><var>encoder</var>.encode_from_utf8_to_vec_without_replacement(<var>src</var>, <var>dst_vec</var>, false)</code></td></tr>
572	//! <tr><td><code><var>raw_decoder</var>.raw_finish(<var>dst</var>)</code></td><td><code><var>dst_string</var>.reserve(<var>decoder</var>.max_utf8_buffer_length_without_replacement(0));<br><var>decoder</var>.decode_to_string_without_replacement(b"", <var>dst</var>, true)</code></td></tr>
573	//! <tr><td><code><var>raw_encoder</var>.raw_finish(<var>dst</var>)</code></td><td><code><var>dst_vec</var>.reserve(<var>encoder</var>.max_buffer_length_from_utf8_without_replacement(0));<br><var>encoder</var>.encode_from_utf8_to_vec_without_replacement("", <var>dst</var>, true)</code></td></tr>
574	//! <tr><td><code>encoding::DecoderTrap::Strict</code></td><td><code>decode*</code> methods that have <code>_without_replacement</code> in their name (and treating the `Malformed` result as fatal).</td></tr>
575	//! <tr><td><code>encoding::DecoderTrap::Replace</code></td><td><code>decode</code> methods that <i>do not</i> have <code>_without_replacement</code> in their name.</td></tr>*
576	//! <tr><td><code>encoding::DecoderTrap::Ignore</code></td><td>It is a bad idea to ignore errors due to security issues, but this could be implemented using <code>decode*</code> methods that have <code>_without_replacement</code> in their name.</td></tr>
577	//! <tr><td><code>encoding::DecoderTrap::Call(DecoderTrapFunc)</code></td><td>Can be implemented using <code>decode</code> methods that have <code>_without_replacement</code> in their name.</td></tr>*
578	//! <tr><td><code>encoding::EncoderTrap::Strict</code></td><td><code>encode*</code> methods that have <code>_without_replacement</code> in their name (and treating the `Unmappable` result as fatal).</td></tr>
579	//! <tr><td><code>encoding::EncoderTrap::Replace</code></td><td>Can be implemented using <code>encode</code> methods that have <code>_without_replacement</code> in their name.</td></tr>*
580	//! <tr><td><code>encoding::EncoderTrap::Ignore</code></td><td>It is a bad idea to ignore errors due to security issues, but this could be implemented using <code>encode*</code> methods that have <code>_without_replacement</code> in their name.</td></tr>
581	//! <tr><td><code>encoding::EncoderTrap::NcrEscape</code></td><td><code>encode</code> methods that <i>do not</i> have <code>_without_replacement</code> in their name.</td></tr>*
582	//! <tr><td><code>encoding::EncoderTrap::Call(EncoderTrapFunc)</code></td><td>Can be implemented using <code>encode</code> methods that have <code>_without_replacement</code> in their name.</td></tr>*
583	//! </tbody>
584	//! </table>
585	//!
586	//! # Relationship with Windows Code Pages
587	//!
588	//! Despite the Web and browser focus, the encodings defined by the Encoding
589	//! Standard and implemented by this crate may be useful for decoding legacy
590	//! data that uses Windows code pages. The following table names the single-byte
591	//! encodings
592	//! that have a closely related Windows code page, the number of the closest
593	//! code page, a column indicating whether Windows maps unassigned code points
594	//! to the Unicode Private Use Area instead of U+FFFD and a remark number
595	//! indicating remarks in the list after the table.
596	//!
597	//! <table>
598	//! <thead>
599	//! <tr><th>Encoding</th><th>Code Page</th><th>PUA</th><th>Remarks</th></tr>
600	//! </thead>
601	//! <tbody>
602	//! <tr><td>Shift_JIS</td><td>932</td><td></td><td></td></tr>
603	//! <tr><td>GBK</td><td>936</td><td></td><td></td></tr>
604	//! <tr><td>EUC-KR</td><td>949</td><td></td><td></td></tr>
605	//! <tr><td>Big5</td><td>950</td><td></td><td></td></tr>
606	//! <tr><td>IBM866</td><td>866</td><td></td><td></td></tr>
607	//! <tr><td>windows-874</td><td>874</td><td>&bullet;</td><td></td></tr>
608	//! <tr><td>UTF-16LE</td><td>1200</td><td></td><td></td></tr>
609	//! <tr><td>UTF-16BE</td><td>1201</td><td></td><td></td></tr>
610	//! <tr><td>windows-1250</td><td>1250</td><td></td><td></td></tr>
611	//! <tr><td>windows-1251</td><td>1251</td><td></td><td></td></tr>
612	//! <tr><td>windows-1252</td><td>1252</td><td></td><td></td></tr>
613	//! <tr><td>windows-1253</td><td>1253</td><td>&bullet;</td><td></td></tr>
614	//! <tr><td>windows-1254</td><td>1254</td><td></td><td></td></tr>
615	//! <tr><td>windows-1255</td><td>1255</td><td>&bullet;</td><td></td></tr>
616	//! <tr><td>windows-1256</td><td>1256</td><td></td><td></td></tr>
617	//! <tr><td>windows-1257</td><td>1257</td><td>&bullet;</td><td></td></tr>
618	//! <tr><td>windows-1258</td><td>1258</td><td></td><td></td></tr>
619	//! <tr><td>macintosh</td><td>10000</td><td></td><td>1</td></tr>
620	//! <tr><td>x-mac-cyrillic</td><td>10017</td><td></td><td>2</td></tr>
621	//! <tr><td>KOI8-R</td><td>20866</td><td></td><td></td></tr>
622	//! <tr><td>EUC-JP</td><td>20932</td><td></td><td></td></tr>
623	//! <tr><td>KOI8-U</td><td>21866</td><td></td><td></td></tr>
624	//! <tr><td>ISO-8859-2</td><td>28592</td><td></td><td></td></tr>
625	//! <tr><td>ISO-8859-3</td><td>28593</td><td></td><td></td></tr>
626	//! <tr><td>ISO-8859-4</td><td>28594</td><td></td><td></td></tr>
627	//! <tr><td>ISO-8859-5</td><td>28595</td><td></td><td></td></tr>
628	//! <tr><td>ISO-8859-6</td><td>28596</td><td>&bullet;</td><td></td></tr>
629	//! <tr><td>ISO-8859-7</td><td>28597</td><td>&bullet;</td><td>3</td></tr>
630	//! <tr><td>ISO-8859-8</td><td>28598</td><td>&bullet;</td><td>4</td></tr>
631	//! <tr><td>ISO-8859-13</td><td>28603</td><td>&bullet;</td><td></td></tr>
632	//! <tr><td>ISO-8859-15</td><td>28605</td><td></td><td></td></tr>
633	//! <tr><td>ISO-8859-8-I</td><td>38598</td><td></td><td>5</td></tr>
634	//! <tr><td>ISO-2022-JP</td><td>50220</td><td></td><td></td></tr>
635	//! <tr><td>gb18030</td><td>54936</td><td></td><td></td></tr>
636	//! <tr><td>UTF-8</td><td>65001</td><td></td><td></td></tr>
637	//! </tbody>
638	//! </table>
639	//!
640	//! 1. Windows decodes 0xBD to U+2126 OHM SIGN instead of U+03A9 GREEK CAPITAL LETTER OMEGA.
641	//! 2. Windows decodes 0xFF to U+00A4 CURRENCY SIGN instead of U+20AC EURO SIGN.
642	//! 3. Windows decodes the currency signs at 0xA4 and 0xA5 as well as 0xAA,
643	//! which should be U+037A GREEK YPOGEGRAMMENI, to PUA code points. Windows
644	//! decodes 0xA1 to U+02BD MODIFIER LETTER REVERSED COMMA instead of U+2018
645	//! LEFT SINGLE QUOTATION MARK and 0xA2 to U+02BC MODIFIER LETTER APOSTROPHE
646	//! instead of U+2019 RIGHT SINGLE QUOTATION MARK.
647	//! 4. Windows decodes 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to PUA instead
648	//! of LRM and RLM.
649	//! 5. Remarks from the previous item apply.
650	//!
651	//! The differences between this crate and Windows in the case of multibyte encodings
652	//! are not yet fully documented here. The lack of remarks above should not be taken
653	//! as indication of lack of differences.
654	//!
655	//! # Notable Differences from IANA Naming
656	//!
657	//! In some cases, the Encoding Standard specifies the popular unextended encoding
658	//! name where in IANA terms one of the other labels would be more precise considering
659	//! the extensions that the Encoding Standard has unified into the encoding.
660	//!
661	//! <table>
662	//! <thead>
663	//! <tr><th>Encoding</th><th>IANA</th></tr>
664	//! </thead>
665	//! <tbody>
666	//! <tr><td>Big5</td><td>Big5-HKSCS</td></tr>
667	//! <tr><td>EUC-KR</td><td>windows-949</td></tr>
668	//! <tr><td>Shift_JIS</td><td>windows-31j</td></tr>
669	//! <tr><td>x-mac-cyrillic</td><td>x-mac-ukrainian</td></tr>
670	//! </tbody>
671	//! </table>
672	//!
673	//! In other cases where the Encoding Standard unifies unextended and extended
674	//! variants of an encoding, the encoding gets the name of the extended
675	//! variant.
676	//!
677	//! <table>
678	//! <thead>
679	//! <tr><th>IANA</th><th>Unified into Encoding</th></tr>
680	//! </thead>
681	//! <tbody>
682	//! <tr><td>ISO-8859-1</td><td>windows-1252</td></tr>
683	//! <tr><td>ISO-8859-9</td><td>windows-1254</td></tr>
684	//! <tr><td>TIS-620</td><td>windows-874</td></tr>
685	//! </tbody>
686	//! </table>
687	//!
688	//! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes)
689	//! for discussion about the UTF-16 family.
690
691	#![no_std]
692	#![cfg_attr(feature = "simd-accel", feature(core_intrinsics))]
693
694	#[cfg(feature = "alloc")]
695	#[cfg_attr(test, macro_use)]
696	extern crate alloc;
697
698	extern crate core;
699	#[macro_use]
700	extern crate cfg_if;
701
702	#[cfg(all(
703	feature = "simd-accel",
704	any(
705	target_feature = "sse2",
706	all(target_endian = "little", target_arch = "aarch64"),
707	all(target_endian = "little", target_feature = "neon")
708	)
709	))]
710	#[macro_use(shuffle)]
711	extern crate packed_simd;
712
713	#[cfg(feature = "serde")]
714	extern crate serde;
715
716	#[cfg(all(test, feature = "serde"))]
717	extern crate bincode;
718	#[cfg(all(test, feature = "serde"))]
719	#[macro_use]
720	extern crate serde_derive;
721	#[cfg(all(test, feature = "serde"))]
722	extern crate serde_json;
723
724	#[macro_use]
725	mod macros;
726
727	#[cfg(all(
728	feature = "simd-accel",
729	any(
730	target_feature = "sse2",
731	all(target_endian = "little", target_arch = "aarch64"),
732	all(target_endian = "little", target_feature = "neon")
733	)
734	))]
735	mod simd_funcs;
736
737	#[cfg(all(test, feature = "alloc"))]
738	mod testing;
739
740	mod big5;
741	mod euc_jp;
742	mod euc_kr;
743	mod gb18030;
744	mod iso_2022_jp;
745	mod replacement;
746	mod shift_jis;
747	mod single_byte;
748	mod utf_16;
749	mod utf_8;
750	mod x_user_defined;
751
752	mod ascii;
753	mod data;
754	mod handles;
755	mod variant;
756
757	pub mod mem;
758
759	use crate::ascii::ascii_valid_up_to;
760	use crate::ascii::iso_2022_jp_ascii_valid_up_to;
761	use crate::utf_8::utf8_valid_up_to;
762	use crate::variant::*;
763
764	#[cfg(feature = "alloc")]
765	use alloc::borrow::Cow;
766	#[cfg(feature = "alloc")]
767	use alloc::string::String;
768	#[cfg(feature = "alloc")]
769	use alloc::vec::Vec;
770	use core::cmp::Ordering;
771	use core::hash::Hash;
772	use core::hash::Hasher;
773
774	#[cfg(feature = "serde")]
775	use serde::de::Visitor;
776	#[cfg(feature = "serde")]
777	use serde::{Deserialize, Deserializer, Serialize, Serializer};
778
779	/// This has to be the max length of an NCR instead of max
780	/// minus one, because we can't rely on getting the minus
781	/// one from the space reserved for the current unmappable,
782	/// because the ISO-2022-JP encoder can fill up that space
783	/// with a state transition escape.
784	const NCR_EXTRA: usize = `10`; // 􏿿
785
786	// BEGIN GENERATED CODE. PLEASE DO NOT EDIT.
787	// Instead, please regenerate using generate-encoding-data.py
788
789	const LONGEST_LABEL_LENGTH: usize = `19`; // cseucpkdfmtjapanese
790
791	/// The initializer for the [Big5](static.BIG5.html) encoding.
792	///
793	/// For use only for taking the address of this form when
794	/// Rust prohibits the use of the non-`_INIT` form directly,
795	/// such as in initializers of other `static`s. If in doubt,
796	/// use the corresponding non-`_INIT` reference-typed `static`.
797	///
798	/// This part of the public API will go away if Rust changes
799	/// to make the referent of `pub const FOO: &'static Encoding`
800	/// unique cross-crate or if Rust starts allowing static arrays
801	/// to be initialized with `pub static FOO: &'static Encoding`
802	/// items.
803	pub static BIG5_INIT: Encoding = Encoding {
804	name: "Big5",
805	variant: VariantEncoding::Big5,
806	};
807
808	/// The Big5 encoding.
809	///
810	/// This is Big5 with HKSCS with mappings to more recent Unicode assignments
811	/// instead of the Private Use Area code points that have been used historically.
812	/// It is believed to be able to decode existing Web content in a way that makes
813	/// sense.
814	///
815	/// To avoid form submissions generating data that Web servers don't understand,
816	/// the encoder doesn't use the HKSCS byte sequences that precede the unextended
817	/// Big5 in the lexical order.
818	///
819	/// [Index visualization](https://encoding.spec.whatwg.org/big5.html),
820	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/big5-bmp.html)
821	///
822	/// This encoding is designed to be suited for decoding the Windows code page 950
823	/// and its HKSCS patched "951" variant such that the text makes sense, given
824	/// assignments that Unicode has made after those encodings used Private Use
825	/// Area characters.
826	///
827	/// This will change from `static` to `const` if Rust changes
828	/// to make the referent of `pub const FOO: &'static Encoding`
829	/// unique cross-crate, so don't take the address of this
830	/// `static`.
831	pub static BIG5: &'static Encoding = &BIG5_INIT;
832
833	/// The initializer for the [EUC-JP](static.EUC_JP.html) encoding.
834	///
835	/// For use only for taking the address of this form when
836	/// Rust prohibits the use of the non-`_INIT` form directly,
837	/// such as in initializers of other `static`s. If in doubt,
838	/// use the corresponding non-`_INIT` reference-typed `static`.
839	///
840	/// This part of the public API will go away if Rust changes
841	/// to make the referent of `pub const FOO: &'static Encoding`
842	/// unique cross-crate or if Rust starts allowing static arrays
843	/// to be initialized with `pub static FOO: &'static Encoding`
844	/// items.
845	pub static EUC_JP_INIT: Encoding = Encoding {
846	name: "EUC-JP",
847	variant: VariantEncoding::EucJp,
848	};
849
850	/// The EUC-JP encoding.
851	///
852	/// This is the legacy Unix encoding for Japanese.
853	///
854	/// For compatibility with Web servers that don't expect three-byte sequences
855	/// in form submissions, the encoder doesn't generate three-byte sequences.
856	/// That is, the JIS X 0212 support is decode-only.
857	///
858	/// [Index visualization](https://encoding.spec.whatwg.org/euc-jp.html),
859	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-jp-bmp.html)
860	///
861	/// This encoding roughly matches the Windows code page 20932. There are error
862	/// handling differences and a handful of 2-byte sequences that decode differently.
863	/// Additionall, Windows doesn't support 3-byte sequences.
864	///
865	/// This will change from `static` to `const` if Rust changes
866	/// to make the referent of `pub const FOO: &'static Encoding`
867	/// unique cross-crate, so don't take the address of this
868	/// `static`.
869	pub static EUC_JP: &'static Encoding = &EUC_JP_INIT;
870
871	/// The initializer for the [EUC-KR](static.EUC_KR.html) encoding.
872	///
873	/// For use only for taking the address of this form when
874	/// Rust prohibits the use of the non-`_INIT` form directly,
875	/// such as in initializers of other `static`s. If in doubt,
876	/// use the corresponding non-`_INIT` reference-typed `static`.
877	///
878	/// This part of the public API will go away if Rust changes
879	/// to make the referent of `pub const FOO: &'static Encoding`
880	/// unique cross-crate or if Rust starts allowing static arrays
881	/// to be initialized with `pub static FOO: &'static Encoding`
882	/// items.
883	pub static EUC_KR_INIT: Encoding = Encoding {
884	name: "EUC-KR",
885	variant: VariantEncoding::EucKr,
886	};
887
888	/// The EUC-KR encoding.
889	///
890	/// This is the Korean encoding for Windows. It extends the Unix legacy encoding
891	/// for Korean, based on KS X 1001 (which also formed the base of MacKorean on Mac OS
892	/// Classic), with all the characters from the Hangul Syllables block of Unicode.
893	///
894	/// [Index visualization](https://encoding.spec.whatwg.org/euc-kr.html),
895	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-kr-bmp.html)
896	///
897	/// This encoding matches the Windows code page 949, except Windows decodes byte 0x80
898	/// to U+0080 and some byte sequences that are error per the Encoding Standard to
899	/// the question mark or the Private Use Area.
900	///
901	/// This will change from `static` to `const` if Rust changes
902	/// to make the referent of `pub const FOO: &'static Encoding`
903	/// unique cross-crate, so don't take the address of this
904	/// `static`.
905	pub static EUC_KR: &'static Encoding = &EUC_KR_INIT;
906
907	/// The initializer for the [GBK](static.GBK.html) encoding.
908	///
909	/// For use only for taking the address of this form when
910	/// Rust prohibits the use of the non-`_INIT` form directly,
911	/// such as in initializers of other `static`s. If in doubt,
912	/// use the corresponding non-`_INIT` reference-typed `static`.
913	///
914	/// This part of the public API will go away if Rust changes
915	/// to make the referent of `pub const FOO: &'static Encoding`
916	/// unique cross-crate or if Rust starts allowing static arrays
917	/// to be initialized with `pub static FOO: &'static Encoding`
918	/// items.
919	pub static GBK_INIT: Encoding = Encoding {
920	name: "GBK",
921	variant: VariantEncoding::Gbk,
922	};
923
924	/// The GBK encoding.
925	///
926	/// The decoder for this encoding is the same as the decoder for gb18030.
927	/// The encoder side of this encoding is GBK with Windows code page 936 euro
928	/// sign behavior. GBK extends GB2312-80 to cover the CJK Unified Ideographs
929	/// Unicode block as well as a handful of ideographs from the CJK Unified
930	/// Ideographs Extension A and CJK Compatibility Ideographs blocks.
931	///
932	/// Unlike e.g. in the case of ISO-8859-1 and windows-1252, GBK encoder wasn't
933	/// unified with the gb18030 encoder in the Encoding Standard out of concern
934	/// that servers that expect GBK form submissions might not be able to handle
935	/// the four-byte sequences.
936	///
937	/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
938	/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
939	///
940	/// The encoder of this encoding roughly matches the Windows code page 936.
941	/// The decoder side is a superset.
942	///
943	/// This will change from `static` to `const` if Rust changes
944	/// to make the referent of `pub const FOO: &'static Encoding`
945	/// unique cross-crate, so don't take the address of this
946	/// `static`.
947	pub static GBK: &'static Encoding = &GBK_INIT;
948
949	/// The initializer for the [IBM866](static.IBM866.html) encoding.
950	///
951	/// For use only for taking the address of this form when
952	/// Rust prohibits the use of the non-`_INIT` form directly,
953	/// such as in initializers of other `static`s. If in doubt,
954	/// use the corresponding non-`_INIT` reference-typed `static`.
955	///
956	/// This part of the public API will go away if Rust changes
957	/// to make the referent of `pub const FOO: &'static Encoding`
958	/// unique cross-crate or if Rust starts allowing static arrays
959	/// to be initialized with `pub static FOO: &'static Encoding`
960	/// items.
961	pub static IBM866_INIT: Encoding = Encoding {
962	name: "IBM866",
963	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.ibm866, `0x0440`, `96`, `16`),
964	};
965
966	/// The IBM866 encoding.
967	///
968	/// This the most notable one of the DOS Cyrillic code pages. It has the same
969	/// box drawing characters as code page 437, so it can be used for decoding
970	/// DOS-era ASCII + box drawing data.
971	///
972	/// [Index visualization](https://encoding.spec.whatwg.org/ibm866.html),
973	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/ibm866-bmp.html)
974	///
975	/// This encoding matches the Windows code page 866.
976	///
977	/// This will change from `static` to `const` if Rust changes
978	/// to make the referent of `pub const FOO: &'static Encoding`
979	/// unique cross-crate, so don't take the address of this
980	/// `static`.
981	pub static IBM866: &'static Encoding = &IBM866_INIT;
982
983	/// The initializer for the [ISO-2022-JP](static.ISO_2022_JP.html) encoding.
984	///
985	/// For use only for taking the address of this form when
986	/// Rust prohibits the use of the non-`_INIT` form directly,
987	/// such as in initializers of other `static`s. If in doubt,
988	/// use the corresponding non-`_INIT` reference-typed `static`.
989	///
990	/// This part of the public API will go away if Rust changes
991	/// to make the referent of `pub const FOO: &'static Encoding`
992	/// unique cross-crate or if Rust starts allowing static arrays
993	/// to be initialized with `pub static FOO: &'static Encoding`
994	/// items.
995	pub static ISO_2022_JP_INIT: Encoding = Encoding {
996	name: "ISO-2022-JP",
997	variant: VariantEncoding::Iso2022Jp,
998	};
999
1000	/// The ISO-2022-JP encoding.
1001	///
1002	/// This the primary pre-UTF-8 encoding for Japanese email. It uses the ASCII
1003	/// byte range to encode non-Basic Latin characters. It's the only encoding
1004	/// supported by this crate whose encoder is stateful.
1005	///
1006	/// [Index visualization](https://encoding.spec.whatwg.org/jis0208.html),
1007	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/jis0208-bmp.html)
1008	///
1009	/// This encoding roughly matches the Windows code page 50220. Notably, Windows
1010	/// uses U+30FB in place of the REPLACEMENT CHARACTER and otherwise differs in
1011	/// error handling.
1012	///
1013	/// This will change from `static` to `const` if Rust changes
1014	/// to make the referent of `pub const FOO: &'static Encoding`
1015	/// unique cross-crate, so don't take the address of this
1016	/// `static`.
1017	pub static ISO_2022_JP: &'static Encoding = &ISO_2022_JP_INIT;
1018
1019	/// The initializer for the [ISO-8859-10](static.ISO_8859_10.html) encoding.
1020	///
1021	/// For use only for taking the address of this form when
1022	/// Rust prohibits the use of the non-`_INIT` form directly,
1023	/// such as in initializers of other `static`s. If in doubt,
1024	/// use the corresponding non-`_INIT` reference-typed `static`.
1025	///
1026	/// This part of the public API will go away if Rust changes
1027	/// to make the referent of `pub const FOO: &'static Encoding`
1028	/// unique cross-crate or if Rust starts allowing static arrays
1029	/// to be initialized with `pub static FOO: &'static Encoding`
1030	/// items.
1031	pub static ISO_8859_10_INIT: Encoding = Encoding {
1032	name: "ISO-8859-10",
1033	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_10, `0x00DA`, `90`, `6`),
1034	};
1035
1036	/// The ISO-8859-10 encoding.
1037	///
1038	/// This is the Nordic part of the ISO/IEC 8859 encoding family. This encoding
1039	/// is also known as Latin 6.
1040	///
1041	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-10.html),
1042	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-10-bmp.html)
1043	///
1044	/// The Windows code page number for this encoding is 28600, but kernel32.dll
1045	/// does not support this encoding.
1046	///
1047	/// This will change from `static` to `const` if Rust changes
1048	/// to make the referent of `pub const FOO: &'static Encoding`
1049	/// unique cross-crate, so don't take the address of this
1050	/// `static`.
1051	pub static ISO_8859_10: &'static Encoding = &ISO_8859_10_INIT;
1052
1053	/// The initializer for the [ISO-8859-13](static.ISO_8859_13.html) encoding.
1054	///
1055	/// For use only for taking the address of this form when
1056	/// Rust prohibits the use of the non-`_INIT` form directly,
1057	/// such as in initializers of other `static`s. If in doubt,
1058	/// use the corresponding non-`_INIT` reference-typed `static`.
1059	///
1060	/// This part of the public API will go away if Rust changes
1061	/// to make the referent of `pub const FOO: &'static Encoding`
1062	/// unique cross-crate or if Rust starts allowing static arrays
1063	/// to be initialized with `pub static FOO: &'static Encoding`
1064	/// items.
1065	pub static ISO_8859_13_INIT: Encoding = Encoding {
1066	name: "ISO-8859-13",
1067	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_13, `0x00DF`, `95`, `1`),
1068	};
1069
1070	/// The ISO-8859-13 encoding.
1071	///
1072	/// This is the Baltic part of the ISO/IEC 8859 encoding family. This encoding
1073	/// is also known as Latin 7.
1074	///
1075	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-13.html),
1076	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-13-bmp.html)
1077	///
1078	/// This encoding matches the Windows code page 28603, except Windows decodes
1079	/// unassigned code points to the Private Use Area of Unicode.
1080	///
1081	/// This will change from `static` to `const` if Rust changes
1082	/// to make the referent of `pub const FOO: &'static Encoding`
1083	/// unique cross-crate, so don't take the address of this
1084	/// `static`.
1085	pub static ISO_8859_13: &'static Encoding = &ISO_8859_13_INIT;
1086
1087	/// The initializer for the [ISO-8859-14](static.ISO_8859_14.html) encoding.
1088	///
1089	/// For use only for taking the address of this form when
1090	/// Rust prohibits the use of the non-`_INIT` form directly,
1091	/// such as in initializers of other `static`s. If in doubt,
1092	/// use the corresponding non-`_INIT` reference-typed `static`.
1093	///
1094	/// This part of the public API will go away if Rust changes
1095	/// to make the referent of `pub const FOO: &'static Encoding`
1096	/// unique cross-crate or if Rust starts allowing static arrays
1097	/// to be initialized with `pub static FOO: &'static Encoding`
1098	/// items.
1099	pub static ISO_8859_14_INIT: Encoding = Encoding {
1100	name: "ISO-8859-14",
1101	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_14, `0x00DF`, `95`, `17`),
1102	};
1103
1104	/// The ISO-8859-14 encoding.
1105	///
1106	/// This is the Celtic part of the ISO/IEC 8859 encoding family. This encoding
1107	/// is also known as Latin 8.
1108	///
1109	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-14.html),
1110	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-14-bmp.html)
1111	///
1112	/// The Windows code page number for this encoding is 28604, but kernel32.dll
1113	/// does not support this encoding.
1114	///
1115	/// This will change from `static` to `const` if Rust changes
1116	/// to make the referent of `pub const FOO: &'static Encoding`
1117	/// unique cross-crate, so don't take the address of this
1118	/// `static`.
1119	pub static ISO_8859_14: &'static Encoding = &ISO_8859_14_INIT;
1120
1121	/// The initializer for the [ISO-8859-15](static.ISO_8859_15.html) encoding.
1122	///
1123	/// For use only for taking the address of this form when
1124	/// Rust prohibits the use of the non-`_INIT` form directly,
1125	/// such as in initializers of other `static`s. If in doubt,
1126	/// use the corresponding non-`_INIT` reference-typed `static`.
1127	///
1128	/// This part of the public API will go away if Rust changes
1129	/// to make the referent of `pub const FOO: &'static Encoding`
1130	/// unique cross-crate or if Rust starts allowing static arrays
1131	/// to be initialized with `pub static FOO: &'static Encoding`
1132	/// items.
1133	pub static ISO_8859_15_INIT: Encoding = Encoding {
1134	name: "ISO-8859-15",
1135	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_15, `0x00BF`, `63`, `65`),
1136	};
1137
1138	/// The ISO-8859-15 encoding.
1139	///
1140	/// This is the revised Western European part of the ISO/IEC 8859 encoding
1141	/// family. This encoding is also known as Latin 9.
1142	///
1143	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-15.html),
1144	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-15-bmp.html)
1145	///
1146	/// This encoding matches the Windows code page 28605.
1147	///
1148	/// This will change from `static` to `const` if Rust changes
1149	/// to make the referent of `pub const FOO: &'static Encoding`
1150	/// unique cross-crate, so don't take the address of this
1151	/// `static`.
1152	pub static ISO_8859_15: &'static Encoding = &ISO_8859_15_INIT;
1153
1154	/// The initializer for the [ISO-8859-16](static.ISO_8859_16.html) encoding.
1155	///
1156	/// For use only for taking the address of this form when
1157	/// Rust prohibits the use of the non-`_INIT` form directly,
1158	/// such as in initializers of other `static`s. If in doubt,
1159	/// use the corresponding non-`_INIT` reference-typed `static`.
1160	///
1161	/// This part of the public API will go away if Rust changes
1162	/// to make the referent of `pub const FOO: &'static Encoding`
1163	/// unique cross-crate or if Rust starts allowing static arrays
1164	/// to be initialized with `pub static FOO: &'static Encoding`
1165	/// items.
1166	pub static ISO_8859_16_INIT: Encoding = Encoding {
1167	name: "ISO-8859-16",
1168	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_16, `0x00DF`, `95`, `4`),
1169	};
1170
1171	/// The ISO-8859-16 encoding.
1172	///
1173	/// This is the South-Eastern European part of the ISO/IEC 8859 encoding
1174	/// family. This encoding is also known as Latin 10.
1175	///
1176	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-16.html),
1177	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-16-bmp.html)
1178	///
1179	/// The Windows code page number for this encoding is 28606, but kernel32.dll
1180	/// does not support this encoding.
1181	///
1182	/// This will change from `static` to `const` if Rust changes
1183	/// to make the referent of `pub const FOO: &'static Encoding`
1184	/// unique cross-crate, so don't take the address of this
1185	/// `static`.
1186	pub static ISO_8859_16: &'static Encoding = &ISO_8859_16_INIT;
1187
1188	/// The initializer for the [ISO-8859-2](static.ISO_8859_2.html) encoding.
1189	///
1190	/// For use only for taking the address of this form when
1191	/// Rust prohibits the use of the non-`_INIT` form directly,
1192	/// such as in initializers of other `static`s. If in doubt,
1193	/// use the corresponding non-`_INIT` reference-typed `static`.
1194	///
1195	/// This part of the public API will go away if Rust changes
1196	/// to make the referent of `pub const FOO: &'static Encoding`
1197	/// unique cross-crate or if Rust starts allowing static arrays
1198	/// to be initialized with `pub static FOO: &'static Encoding`
1199	/// items.
1200	pub static ISO_8859_2_INIT: Encoding = Encoding {
1201	name: "ISO-8859-2",
1202	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_2, `0x00DF`, `95`, `1`),
1203	};
1204
1205	/// The ISO-8859-2 encoding.
1206	///
1207	/// This is the Central European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 2.
1208	///
1209	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-2.html),
1210	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-2-bmp.html)
1211	///
1212	/// This encoding matches the Windows code page 28592.
1213	///
1214	/// This will change from `static` to `const` if Rust changes
1215	/// to make the referent of `pub const FOO: &'static Encoding`
1216	/// unique cross-crate, so don't take the address of this
1217	/// `static`.
1218	pub static ISO_8859_2: &'static Encoding = &ISO_8859_2_INIT;
1219
1220	/// The initializer for the [ISO-8859-3](static.ISO_8859_3.html) encoding.
1221	///
1222	/// For use only for taking the address of this form when
1223	/// Rust prohibits the use of the non-`_INIT` form directly,
1224	/// such as in initializers of other `static`s. If in doubt,
1225	/// use the corresponding non-`_INIT` reference-typed `static`.
1226	///
1227	/// This part of the public API will go away if Rust changes
1228	/// to make the referent of `pub const FOO: &'static Encoding`
1229	/// unique cross-crate or if Rust starts allowing static arrays
1230	/// to be initialized with `pub static FOO: &'static Encoding`
1231	/// items.
1232	pub static ISO_8859_3_INIT: Encoding = Encoding {
1233	name: "ISO-8859-3",
1234	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_3, `0x00DF`, `95`, `4`),
1235	};
1236
1237	/// The ISO-8859-3 encoding.
1238	///
1239	/// This is the South European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 3.
1240	///
1241	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-3.html),
1242	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-3-bmp.html)
1243	///
1244	/// This encoding matches the Windows code page 28593.
1245	///
1246	/// This will change from `static` to `const` if Rust changes
1247	/// to make the referent of `pub const FOO: &'static Encoding`
1248	/// unique cross-crate, so don't take the address of this
1249	/// `static`.
1250	pub static ISO_8859_3: &'static Encoding = &ISO_8859_3_INIT;
1251
1252	/// The initializer for the [ISO-8859-4](static.ISO_8859_4.html) encoding.
1253	///
1254	/// For use only for taking the address of this form when
1255	/// Rust prohibits the use of the non-`_INIT` form directly,
1256	/// such as in initializers of other `static`s. If in doubt,
1257	/// use the corresponding non-`_INIT` reference-typed `static`.
1258	///
1259	/// This part of the public API will go away if Rust changes
1260	/// to make the referent of `pub const FOO: &'static Encoding`
1261	/// unique cross-crate or if Rust starts allowing static arrays
1262	/// to be initialized with `pub static FOO: &'static Encoding`
1263	/// items.
1264	pub static ISO_8859_4_INIT: Encoding = Encoding {
1265	name: "ISO-8859-4",
1266	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_4, `0x00DF`, `95`, `1`),
1267	};
1268
1269	/// The ISO-8859-4 encoding.
1270	///
1271	/// This is the North European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 4.
1272	///
1273	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-4.html),
1274	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-4-bmp.html)
1275	///
1276	/// This encoding matches the Windows code page 28594.
1277	///
1278	/// This will change from `static` to `const` if Rust changes
1279	/// to make the referent of `pub const FOO: &'static Encoding`
1280	/// unique cross-crate, so don't take the address of this
1281	/// `static`.
1282	pub static ISO_8859_4: &'static Encoding = &ISO_8859_4_INIT;
1283
1284	/// The initializer for the [ISO-8859-5](static.ISO_8859_5.html) encoding.
1285	///
1286	/// For use only for taking the address of this form when
1287	/// Rust prohibits the use of the non-`_INIT` form directly,
1288	/// such as in initializers of other `static`s. If in doubt,
1289	/// use the corresponding non-`_INIT` reference-typed `static`.
1290	///
1291	/// This part of the public API will go away if Rust changes
1292	/// to make the referent of `pub const FOO: &'static Encoding`
1293	/// unique cross-crate or if Rust starts allowing static arrays
1294	/// to be initialized with `pub static FOO: &'static Encoding`
1295	/// items.
1296	pub static ISO_8859_5_INIT: Encoding = Encoding {
1297	name: "ISO-8859-5",
1298	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_5, `0x040E`, `46`, `66`),
1299	};
1300
1301	/// The ISO-8859-5 encoding.
1302	///
1303	/// This is the Cyrillic part of the ISO/IEC 8859 encoding family.
1304	///
1305	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-5.html),
1306	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-5-bmp.html)
1307	///
1308	/// This encoding matches the Windows code page 28595.
1309	///
1310	/// This will change from `static` to `const` if Rust changes
1311	/// to make the referent of `pub const FOO: &'static Encoding`
1312	/// unique cross-crate, so don't take the address of this
1313	/// `static`.
1314	pub static ISO_8859_5: &'static Encoding = &ISO_8859_5_INIT;
1315
1316	/// The initializer for the [ISO-8859-6](static.ISO_8859_6.html) encoding.
1317	///
1318	/// For use only for taking the address of this form when
1319	/// Rust prohibits the use of the non-`_INIT` form directly,
1320	/// such as in initializers of other `static`s. If in doubt,
1321	/// use the corresponding non-`_INIT` reference-typed `static`.
1322	///
1323	/// This part of the public API will go away if Rust changes
1324	/// to make the referent of `pub const FOO: &'static Encoding`
1325	/// unique cross-crate or if Rust starts allowing static arrays
1326	/// to be initialized with `pub static FOO: &'static Encoding`
1327	/// items.
1328	pub static ISO_8859_6_INIT: Encoding = Encoding {
1329	name: "ISO-8859-6",
1330	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_6, `0x0621`, `65`, `26`),
1331	};
1332
1333	/// The ISO-8859-6 encoding.
1334	///
1335	/// This is the Arabic part of the ISO/IEC 8859 encoding family.
1336	///
1337	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-6.html),
1338	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-6-bmp.html)
1339	///
1340	/// This encoding matches the Windows code page 28596, except Windows decodes
1341	/// unassigned code points to the Private Use Area of Unicode.
1342	///
1343	/// This will change from `static` to `const` if Rust changes
1344	/// to make the referent of `pub const FOO: &'static Encoding`
1345	/// unique cross-crate, so don't take the address of this
1346	/// `static`.
1347	pub static ISO_8859_6: &'static Encoding = &ISO_8859_6_INIT;
1348
1349	/// The initializer for the [ISO-8859-7](static.ISO_8859_7.html) encoding.
1350	///
1351	/// For use only for taking the address of this form when
1352	/// Rust prohibits the use of the non-`_INIT` form directly,
1353	/// such as in initializers of other `static`s. If in doubt,
1354	/// use the corresponding non-`_INIT` reference-typed `static`.
1355	///
1356	/// This part of the public API will go away if Rust changes
1357	/// to make the referent of `pub const FOO: &'static Encoding`
1358	/// unique cross-crate or if Rust starts allowing static arrays
1359	/// to be initialized with `pub static FOO: &'static Encoding`
1360	/// items.
1361	pub static ISO_8859_7_INIT: Encoding = Encoding {
1362	name: "ISO-8859-7",
1363	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_7, `0x03A3`, `83`, `44`),
1364	};
1365
1366	/// The ISO-8859-7 encoding.
1367	///
1368	/// This is the Greek part of the ISO/IEC 8859 encoding family.
1369	///
1370	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-7.html),
1371	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-7-bmp.html)
1372	///
1373	/// This encoding roughly matches the Windows code page 28597. Windows decodes
1374	/// unassigned code points, the currency signs at 0xA4 and 0xA5 as well as
1375	/// 0xAA, which should be U+037A GREEK YPOGEGRAMMENI, to the Private Use Area
1376	/// of Unicode. Windows decodes 0xA1 to U+02BD MODIFIER LETTER REVERSED COMMA
1377	/// instead of U+2018 LEFT SINGLE QUOTATION MARK and 0xA2 to U+02BC MODIFIER
1378	/// LETTER APOSTROPHE instead of U+2019 RIGHT SINGLE QUOTATION MARK.
1379	///
1380	/// This will change from `static` to `const` if Rust changes
1381	/// to make the referent of `pub const FOO: &'static Encoding`
1382	/// unique cross-crate, so don't take the address of this
1383	/// `static`.
1384	pub static ISO_8859_7: &'static Encoding = &ISO_8859_7_INIT;
1385
1386	/// The initializer for the [ISO-8859-8](static.ISO_8859_8.html) encoding.
1387	///
1388	/// For use only for taking the address of this form when
1389	/// Rust prohibits the use of the non-`_INIT` form directly,
1390	/// such as in initializers of other `static`s. If in doubt,
1391	/// use the corresponding non-`_INIT` reference-typed `static`.
1392	///
1393	/// This part of the public API will go away if Rust changes
1394	/// to make the referent of `pub const FOO: &'static Encoding`
1395	/// unique cross-crate or if Rust starts allowing static arrays
1396	/// to be initialized with `pub static FOO: &'static Encoding`
1397	/// items.
1398	pub static ISO_8859_8_INIT: Encoding = Encoding {
1399	name: "ISO-8859-8",
1400	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_8, `0x05D0`, `96`, `27`),
1401	};
1402
1403	/// The ISO-8859-8 encoding.
1404	///
1405	/// This is the Hebrew part of the ISO/IEC 8859 encoding family in visual order.
1406	///
1407	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
1408	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
1409	///
1410	/// This encoding roughly matches the Windows code page 28598. Windows decodes
1411	/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
1412	/// Area instead of LRM and RLM. Windows decodes unassigned code points to
1413	/// the private use area.
1414	///
1415	/// This will change from `static` to `const` if Rust changes
1416	/// to make the referent of `pub const FOO: &'static Encoding`
1417	/// unique cross-crate, so don't take the address of this
1418	/// `static`.
1419	pub static ISO_8859_8: &'static Encoding = &ISO_8859_8_INIT;
1420
1421	/// The initializer for the [ISO-8859-8-I](static.ISO_8859_8_I.html) encoding.
1422	///
1423	/// For use only for taking the address of this form when
1424	/// Rust prohibits the use of the non-`_INIT` form directly,
1425	/// such as in initializers of other `static`s. If in doubt,
1426	/// use the corresponding non-`_INIT` reference-typed `static`.
1427	///
1428	/// This part of the public API will go away if Rust changes
1429	/// to make the referent of `pub const FOO: &'static Encoding`
1430	/// unique cross-crate or if Rust starts allowing static arrays
1431	/// to be initialized with `pub static FOO: &'static Encoding`
1432	/// items.
1433	pub static ISO_8859_8_I_INIT: Encoding = Encoding {
1434	name: "ISO-8859-8-I",
1435	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_8, `0x05D0`, `96`, `27`),
1436	};
1437
1438	/// The ISO-8859-8-I encoding.
1439	///
1440	/// This is the Hebrew part of the ISO/IEC 8859 encoding family in logical order.
1441	///
1442	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
1443	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
1444	///
1445	/// This encoding roughly matches the Windows code page 38598. Windows decodes
1446	/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
1447	/// Area instead of LRM and RLM. Windows decodes unassigned code points to
1448	/// the private use area.
1449	///
1450	/// This will change from `static` to `const` if Rust changes
1451	/// to make the referent of `pub const FOO: &'static Encoding`
1452	/// unique cross-crate, so don't take the address of this
1453	/// `static`.
1454	pub static ISO_8859_8_I: &'static Encoding = &ISO_8859_8_I_INIT;
1455
1456	/// The initializer for the [KOI8-R](static.KOI8_R.html) encoding.
1457	///
1458	/// For use only for taking the address of this form when
1459	/// Rust prohibits the use of the non-`_INIT` form directly,
1460	/// such as in initializers of other `static`s. If in doubt,
1461	/// use the corresponding non-`_INIT` reference-typed `static`.
1462	///
1463	/// This part of the public API will go away if Rust changes
1464	/// to make the referent of `pub const FOO: &'static Encoding`
1465	/// unique cross-crate or if Rust starts allowing static arrays
1466	/// to be initialized with `pub static FOO: &'static Encoding`
1467	/// items.
1468	pub static KOI8_R_INIT: Encoding = Encoding {
1469	name: "KOI8-R",
1470	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.koi8_r, `0x044E`, `64`, `1`),
1471	};
1472
1473	/// The KOI8-R encoding.
1474	///
1475	/// This is an encoding for Russian from [RFC 1489](https://tools.ietf.org/html/rfc1489).
1476	///
1477	/// [Index visualization](https://encoding.spec.whatwg.org/koi8-r.html),
1478	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-r-bmp.html)
1479	///
1480	/// This encoding matches the Windows code page 20866.
1481	///
1482	/// This will change from `static` to `const` if Rust changes
1483	/// to make the referent of `pub const FOO: &'static Encoding`
1484	/// unique cross-crate, so don't take the address of this
1485	/// `static`.
1486	pub static KOI8_R: &'static Encoding = &KOI8_R_INIT;
1487
1488	/// The initializer for the [KOI8-U](static.KOI8_U.html) encoding.
1489	///
1490	/// For use only for taking the address of this form when
1491	/// Rust prohibits the use of the non-`_INIT` form directly,
1492	/// such as in initializers of other `static`s. If in doubt,
1493	/// use the corresponding non-`_INIT` reference-typed `static`.
1494	///
1495	/// This part of the public API will go away if Rust changes
1496	/// to make the referent of `pub const FOO: &'static Encoding`
1497	/// unique cross-crate or if Rust starts allowing static arrays
1498	/// to be initialized with `pub static FOO: &'static Encoding`
1499	/// items.
1500	pub static KOI8_U_INIT: Encoding = Encoding {
1501	name: "KOI8-U",
1502	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.koi8_u, `0x044E`, `64`, `1`),
1503	};
1504
1505	/// The KOI8-U encoding.
1506	///
1507	/// This is an encoding for Ukrainian adapted from KOI8-R.
1508	///
1509	/// [Index visualization](https://encoding.spec.whatwg.org/koi8-u.html),
1510	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-u-bmp.html)
1511	///
1512	/// This encoding matches the Windows code page 21866.
1513	///
1514	/// This will change from `static` to `const` if Rust changes
1515	/// to make the referent of `pub const FOO: &'static Encoding`
1516	/// unique cross-crate, so don't take the address of this
1517	/// `static`.
1518	pub static KOI8_U: &'static Encoding = &KOI8_U_INIT;
1519
1520	/// The initializer for the [Shift_JIS](static.SHIFT_JIS.html) encoding.
1521	///
1522	/// For use only for taking the address of this form when
1523	/// Rust prohibits the use of the non-`_INIT` form directly,
1524	/// such as in initializers of other `static`s. If in doubt,
1525	/// use the corresponding non-`_INIT` reference-typed `static`.
1526	///
1527	/// This part of the public API will go away if Rust changes
1528	/// to make the referent of `pub const FOO: &'static Encoding`
1529	/// unique cross-crate or if Rust starts allowing static arrays
1530	/// to be initialized with `pub static FOO: &'static Encoding`
1531	/// items.
1532	pub static SHIFT_JIS_INIT: Encoding = Encoding {
1533	name: "Shift_JIS",
1534	variant: VariantEncoding::ShiftJis,
1535	};
1536
1537	/// The Shift_JIS encoding.
1538	///
1539	/// This is the Japanese encoding for Windows.
1540	///
1541	/// [Index visualization](https://encoding.spec.whatwg.org/shift_jis.html),
1542	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/shift_jis-bmp.html)
1543	///
1544	/// This encoding matches the Windows code page 932, except Windows decodes some byte
1545	/// sequences that are error per the Encoding Standard to the question mark or the
1546	/// Private Use Area and generally uses U+30FB in place of the REPLACEMENT CHARACTER.
1547	///
1548	/// This will change from `static` to `const` if Rust changes
1549	/// to make the referent of `pub const FOO: &'static Encoding`
1550	/// unique cross-crate, so don't take the address of this
1551	/// `static`.
1552	pub static SHIFT_JIS: &'static Encoding = &SHIFT_JIS_INIT;
1553
1554	/// The initializer for the [UTF-16BE](static.UTF_16BE.html) encoding.
1555	///
1556	/// For use only for taking the address of this form when
1557	/// Rust prohibits the use of the non-`_INIT` form directly,
1558	/// such as in initializers of other `static`s. If in doubt,
1559	/// use the corresponding non-`_INIT` reference-typed `static`.
1560	///
1561	/// This part of the public API will go away if Rust changes
1562	/// to make the referent of `pub const FOO: &'static Encoding`
1563	/// unique cross-crate or if Rust starts allowing static arrays
1564	/// to be initialized with `pub static FOO: &'static Encoding`
1565	/// items.
1566	pub static UTF_16BE_INIT: Encoding = Encoding {
1567	name: "UTF-16BE",
1568	variant: VariantEncoding::Utf16Be,
1569	};
1570
1571	/// The UTF-16BE encoding.
1572	///
1573	/// This decode-only encoding uses 16-bit code units due to Unicode originally
1574	/// having been designed as a 16-bit reportoire. In the absence of a byte order
1575	/// mark the big endian byte order is assumed.
1576	///
1577	/// There is no corresponding encoder in this crate or in the Encoding
1578	/// Standard. The output encoding of this encoding is UTF-8.
1579	///
1580	/// This encoding matches the Windows code page 1201.
1581	///
1582	/// This will change from `static` to `const` if Rust changes
1583	/// to make the referent of `pub const FOO: &'static Encoding`
1584	/// unique cross-crate, so don't take the address of this
1585	/// `static`.
1586	pub static UTF_16BE: &'static Encoding = &UTF_16BE_INIT;
1587
1588	/// The initializer for the [UTF-16LE](static.UTF_16LE.html) encoding.
1589	///
1590	/// For use only for taking the address of this form when
1591	/// Rust prohibits the use of the non-`_INIT` form directly,
1592	/// such as in initializers of other `static`s. If in doubt,
1593	/// use the corresponding non-`_INIT` reference-typed `static`.
1594	///
1595	/// This part of the public API will go away if Rust changes
1596	/// to make the referent of `pub const FOO: &'static Encoding`
1597	/// unique cross-crate or if Rust starts allowing static arrays
1598	/// to be initialized with `pub static FOO: &'static Encoding`
1599	/// items.
1600	pub static UTF_16LE_INIT: Encoding = Encoding {
1601	name: "UTF-16LE",
1602	variant: VariantEncoding::Utf16Le,
1603	};
1604
1605	/// The UTF-16LE encoding.
1606	///
1607	/// This decode-only encoding uses 16-bit code units due to Unicode originally
1608	/// having been designed as a 16-bit reportoire. In the absence of a byte order
1609	/// mark the little endian byte order is assumed.
1610	///
1611	/// There is no corresponding encoder in this crate or in the Encoding
1612	/// Standard. The output encoding of this encoding is UTF-8.
1613	///
1614	/// This encoding matches the Windows code page 1200.
1615	///
1616	/// This will change from `static` to `const` if Rust changes
1617	/// to make the referent of `pub const FOO: &'static Encoding`
1618	/// unique cross-crate, so don't take the address of this
1619	/// `static`.
1620	pub static UTF_16LE: &'static Encoding = &UTF_16LE_INIT;
1621
1622	/// The initializer for the [UTF-8](static.UTF_8.html) encoding.
1623	///
1624	/// For use only for taking the address of this form when
1625	/// Rust prohibits the use of the non-`_INIT` form directly,
1626	/// such as in initializers of other `static`s. If in doubt,
1627	/// use the corresponding non-`_INIT` reference-typed `static`.
1628	///
1629	/// This part of the public API will go away if Rust changes
1630	/// to make the referent of `pub const FOO: &'static Encoding`
1631	/// unique cross-crate or if Rust starts allowing static arrays
1632	/// to be initialized with `pub static FOO: &'static Encoding`
1633	/// items.
1634	pub static UTF_8_INIT: Encoding = Encoding {
1635	name: "UTF-8",
1636	variant: VariantEncoding::Utf8,
1637	};
1638
1639	/// The UTF-8 encoding.
1640	///
1641	/// This is the encoding that should be used for all new development it can
1642	/// represent all of Unicode.
1643	///
1644	/// This encoding matches the Windows code page 65001, except Windows differs
1645	/// in the number of errors generated for some erroneous byte sequences.
1646	///
1647	/// This will change from `static` to `const` if Rust changes
1648	/// to make the referent of `pub const FOO: &'static Encoding`
1649	/// unique cross-crate, so don't take the address of this
1650	/// `static`.
1651	pub static UTF_8: &'static Encoding = &UTF_8_INIT;
1652
1653	/// The initializer for the [gb18030](static.GB18030.html) encoding.
1654	///
1655	/// For use only for taking the address of this form when
1656	/// Rust prohibits the use of the non-`_INIT` form directly,
1657	/// such as in initializers of other `static`s. If in doubt,
1658	/// use the corresponding non-`_INIT` reference-typed `static`.
1659	///
1660	/// This part of the public API will go away if Rust changes
1661	/// to make the referent of `pub const FOO: &'static Encoding`
1662	/// unique cross-crate or if Rust starts allowing static arrays
1663	/// to be initialized with `pub static FOO: &'static Encoding`
1664	/// items.
1665	pub static GB18030_INIT: Encoding = Encoding {
1666	name: "gb18030",
1667	variant: VariantEncoding::Gb18030,
1668	};
1669
1670	/// The gb18030 encoding.
1671	///
1672	/// This encoding matches GB18030-2005 except the two-byte sequence 0xA3 0xA0
1673	/// maps to U+3000 for compatibility with existing Web content. As a result,
1674	/// this encoding can represent all of Unicode except for the private-use
1675	/// character U+E5E5.
1676	///
1677	/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
1678	/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
1679	///
1680	/// This encoding matches the Windows code page 54936.
1681	///
1682	/// This will change from `static` to `const` if Rust changes
1683	/// to make the referent of `pub const FOO: &'static Encoding`
1684	/// unique cross-crate, so don't take the address of this
1685	/// `static`.
1686	pub static GB18030: &'static Encoding = &GB18030_INIT;
1687
1688	/// The initializer for the [macintosh](static.MACINTOSH.html) encoding.
1689	///
1690	/// For use only for taking the address of this form when
1691	/// Rust prohibits the use of the non-`_INIT` form directly,
1692	/// such as in initializers of other `static`s. If in doubt,
1693	/// use the corresponding non-`_INIT` reference-typed `static`.
1694	///
1695	/// This part of the public API will go away if Rust changes
1696	/// to make the referent of `pub const FOO: &'static Encoding`
1697	/// unique cross-crate or if Rust starts allowing static arrays
1698	/// to be initialized with `pub static FOO: &'static Encoding`
1699	/// items.
1700	pub static MACINTOSH_INIT: Encoding = Encoding {
1701	name: "macintosh",
1702	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.macintosh, `0x00CD`, `106`, `3`),
1703	};
1704
1705	/// The macintosh encoding.
1706	///
1707	/// This is the MacRoman encoding from Mac OS Classic.
1708	///
1709	/// [Index visualization](https://encoding.spec.whatwg.org/macintosh.html),
1710	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/macintosh-bmp.html)
1711	///
1712	/// This encoding matches the Windows code page 10000, except Windows decodes
1713	/// 0xBD to U+2126 OHM SIGN instead of U+03A9 GREEK CAPITAL LETTER OMEGA.
1714	///
1715	/// This will change from `static` to `const` if Rust changes
1716	/// to make the referent of `pub const FOO: &'static Encoding`
1717	/// unique cross-crate, so don't take the address of this
1718	/// `static`.
1719	pub static MACINTOSH: &'static Encoding = &MACINTOSH_INIT;
1720
1721	/// The initializer for the [replacement](static.REPLACEMENT.html) encoding.
1722	///
1723	/// For use only for taking the address of this form when
1724	/// Rust prohibits the use of the non-`_INIT` form directly,
1725	/// such as in initializers of other `static`s. If in doubt,
1726	/// use the corresponding non-`_INIT` reference-typed `static`.
1727	///
1728	/// This part of the public API will go away if Rust changes
1729	/// to make the referent of `pub const FOO: &'static Encoding`
1730	/// unique cross-crate or if Rust starts allowing static arrays
1731	/// to be initialized with `pub static FOO: &'static Encoding`
1732	/// items.
1733	pub static REPLACEMENT_INIT: Encoding = Encoding {
1734	name: "replacement",
1735	variant: VariantEncoding::Replacement,
1736	};
1737
1738	/// The replacement encoding.
1739	///
1740	/// This decode-only encoding decodes all non-zero-length streams to a single
1741	/// REPLACEMENT CHARACTER. Its purpose is to avoid the use of an
1742	/// ASCII-compatible fallback encoding (typically windows-1252) for some
1743	/// encodings that are no longer supported by the Web Platform and that
1744	/// would be dangerous to treat as ASCII-compatible.
1745	///
1746	/// There is no corresponding encoder. The output encoding of this encoding
1747	/// is UTF-8.
1748	///
1749	/// This encoding does not have a Windows code page number.
1750	///
1751	/// This will change from `static` to `const` if Rust changes
1752	/// to make the referent of `pub const FOO: &'static Encoding`
1753	/// unique cross-crate, so don't take the address of this
1754	/// `static`.
1755	pub static REPLACEMENT: &'static Encoding = &REPLACEMENT_INIT;
1756
1757	/// The initializer for the [windows-1250](static.WINDOWS_1250.html) encoding.
1758	///
1759	/// For use only for taking the address of this form when
1760	/// Rust prohibits the use of the non-`_INIT` form directly,
1761	/// such as in initializers of other `static`s. If in doubt,
1762	/// use the corresponding non-`_INIT` reference-typed `static`.
1763	///
1764	/// This part of the public API will go away if Rust changes
1765	/// to make the referent of `pub const FOO: &'static Encoding`
1766	/// unique cross-crate or if Rust starts allowing static arrays
1767	/// to be initialized with `pub static FOO: &'static Encoding`
1768	/// items.
1769	pub static WINDOWS_1250_INIT: Encoding = Encoding {
1770	name: "windows-1250",
1771	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1250, `0x00DC`, `92`, `2`),
1772	};
1773
1774	/// The windows-1250 encoding.
1775	///
1776	/// This is the Central European encoding for Windows.
1777	///
1778	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1250.html),
1779	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1250-bmp.html)
1780	///
1781	/// This encoding matches the Windows code page 1250.
1782	///
1783	/// This will change from `static` to `const` if Rust changes
1784	/// to make the referent of `pub const FOO: &'static Encoding`
1785	/// unique cross-crate, so don't take the address of this
1786	/// `static`.
1787	pub static WINDOWS_1250: &'static Encoding = &WINDOWS_1250_INIT;
1788
1789	/// The initializer for the [windows-1251](static.WINDOWS_1251.html) encoding.
1790	///
1791	/// For use only for taking the address of this form when
1792	/// Rust prohibits the use of the non-`_INIT` form directly,
1793	/// such as in initializers of other `static`s. If in doubt,
1794	/// use the corresponding non-`_INIT` reference-typed `static`.
1795	///
1796	/// This part of the public API will go away if Rust changes
1797	/// to make the referent of `pub const FOO: &'static Encoding`
1798	/// unique cross-crate or if Rust starts allowing static arrays
1799	/// to be initialized with `pub static FOO: &'static Encoding`
1800	/// items.
1801	pub static WINDOWS_1251_INIT: Encoding = Encoding {
1802	name: "windows-1251",
1803	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1251, `0x0410`, `64`, `64`),
1804	};
1805
1806	/// The windows-1251 encoding.
1807	///
1808	/// This is the Cyrillic encoding for Windows.
1809	///
1810	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1251.html),
1811	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1251-bmp.html)
1812	///
1813	/// This encoding matches the Windows code page 1251.
1814	///
1815	/// This will change from `static` to `const` if Rust changes
1816	/// to make the referent of `pub const FOO: &'static Encoding`
1817	/// unique cross-crate, so don't take the address of this
1818	/// `static`.
1819	pub static WINDOWS_1251: &'static Encoding = &WINDOWS_1251_INIT;
1820
1821	/// The initializer for the [windows-1252](static.WINDOWS_1252.html) encoding.
1822	///
1823	/// For use only for taking the address of this form when
1824	/// Rust prohibits the use of the non-`_INIT` form directly,
1825	/// such as in initializers of other `static`s. If in doubt,
1826	/// use the corresponding non-`_INIT` reference-typed `static`.
1827	///
1828	/// This part of the public API will go away if Rust changes
1829	/// to make the referent of `pub const FOO: &'static Encoding`
1830	/// unique cross-crate or if Rust starts allowing static arrays
1831	/// to be initialized with `pub static FOO: &'static Encoding`
1832	/// items.
1833	pub static WINDOWS_1252_INIT: Encoding = Encoding {
1834	name: "windows-1252",
1835	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1252, `0x00A0`, `32`, `96`),
1836	};
1837
1838	/// The windows-1252 encoding.
1839	///
1840	/// This is the Western encoding for Windows. It is an extension of ISO-8859-1,
1841	/// which is known as Latin 1.
1842	///
1843	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1252.html),
1844	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1252-bmp.html)
1845	///
1846	/// This encoding matches the Windows code page 1252.
1847	///
1848	/// This will change from `static` to `const` if Rust changes
1849	/// to make the referent of `pub const FOO: &'static Encoding`
1850	/// unique cross-crate, so don't take the address of this
1851	/// `static`.
1852	pub static WINDOWS_1252: &'static Encoding = &WINDOWS_1252_INIT;
1853
1854	/// The initializer for the [windows-1253](static.WINDOWS_1253.html) encoding.
1855	///
1856	/// For use only for taking the address of this form when
1857	/// Rust prohibits the use of the non-`_INIT` form directly,
1858	/// such as in initializers of other `static`s. If in doubt,
1859	/// use the corresponding non-`_INIT` reference-typed `static`.
1860	///
1861	/// This part of the public API will go away if Rust changes
1862	/// to make the referent of `pub const FOO: &'static Encoding`
1863	/// unique cross-crate or if Rust starts allowing static arrays
1864	/// to be initialized with `pub static FOO: &'static Encoding`
1865	/// items.
1866	pub static WINDOWS_1253_INIT: Encoding = Encoding {
1867	name: "windows-1253",
1868	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1253, `0x03A3`, `83`, `44`),
1869	};
1870
1871	/// The windows-1253 encoding.
1872	///
1873	/// This is the Greek encoding for Windows. It is mostly an extension of
1874	/// ISO-8859-7, but U+0386 is mapped to a different byte.
1875	///
1876	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1253.html),
1877	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1253-bmp.html)
1878	///
1879	/// This encoding matches the Windows code page 1253, except Windows decodes
1880	/// unassigned code points to the Private Use Area of Unicode.
1881	///
1882	/// This will change from `static` to `const` if Rust changes
1883	/// to make the referent of `pub const FOO: &'static Encoding`
1884	/// unique cross-crate, so don't take the address of this
1885	/// `static`.
1886	pub static WINDOWS_1253: &'static Encoding = &WINDOWS_1253_INIT;
1887
1888	/// The initializer for the [windows-1254](static.WINDOWS_1254.html) encoding.
1889	///
1890	/// For use only for taking the address of this form when
1891	/// Rust prohibits the use of the non-`_INIT` form directly,
1892	/// such as in initializers of other `static`s. If in doubt,
1893	/// use the corresponding non-`_INIT` reference-typed `static`.
1894	///
1895	/// This part of the public API will go away if Rust changes
1896	/// to make the referent of `pub const FOO: &'static Encoding`
1897	/// unique cross-crate or if Rust starts allowing static arrays
1898	/// to be initialized with `pub static FOO: &'static Encoding`
1899	/// items.
1900	pub static WINDOWS_1254_INIT: Encoding = Encoding {
1901	name: "windows-1254",
1902	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1254, `0x00DF`, `95`, `17`),
1903	};
1904
1905	/// The windows-1254 encoding.
1906	///
1907	/// This is the Turkish encoding for Windows. It is an extension of ISO-8859-9,
1908	/// which is known as Latin 5.
1909	///
1910	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1254.html),
1911	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1254-bmp.html)
1912	///
1913	/// This encoding matches the Windows code page 1254.
1914	///
1915	/// This will change from `static` to `const` if Rust changes
1916	/// to make the referent of `pub const FOO: &'static Encoding`
1917	/// unique cross-crate, so don't take the address of this
1918	/// `static`.
1919	pub static WINDOWS_1254: &'static Encoding = &WINDOWS_1254_INIT;
1920
1921	/// The initializer for the [windows-1255](static.WINDOWS_1255.html) encoding.
1922	///
1923	/// For use only for taking the address of this form when
1924	/// Rust prohibits the use of the non-`_INIT` form directly,
1925	/// such as in initializers of other `static`s. If in doubt,
1926	/// use the corresponding non-`_INIT` reference-typed `static`.
1927	///
1928	/// This part of the public API will go away if Rust changes
1929	/// to make the referent of `pub const FOO: &'static Encoding`
1930	/// unique cross-crate or if Rust starts allowing static arrays
1931	/// to be initialized with `pub static FOO: &'static Encoding`
1932	/// items.
1933	pub static WINDOWS_1255_INIT: Encoding = Encoding {
1934	name: "windows-1255",
1935	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1255, `0x05D0`, `96`, `27`),
1936	};
1937
1938	/// The windows-1255 encoding.
1939	///
1940	/// This is the Hebrew encoding for Windows. It is an extension of ISO-8859-8-I,
1941	/// except for a currency sign swap.
1942	///
1943	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1255.html),
1944	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1255-bmp.html)
1945	///
1946	/// This encoding matches the Windows code page 1255, except Windows decodes
1947	/// unassigned code points to the Private Use Area of Unicode.
1948	///
1949	/// This will change from `static` to `const` if Rust changes
1950	/// to make the referent of `pub const FOO: &'static Encoding`
1951	/// unique cross-crate, so don't take the address of this
1952	/// `static`.
1953	pub static WINDOWS_1255: &'static Encoding = &WINDOWS_1255_INIT;
1954
1955	/// The initializer for the [windows-1256](static.WINDOWS_1256.html) encoding.
1956	///
1957	/// For use only for taking the address of this form when
1958	/// Rust prohibits the use of the non-`_INIT` form directly,
1959	/// such as in initializers of other `static`s. If in doubt,
1960	/// use the corresponding non-`_INIT` reference-typed `static`.
1961	///
1962	/// This part of the public API will go away if Rust changes
1963	/// to make the referent of `pub const FOO: &'static Encoding`
1964	/// unique cross-crate or if Rust starts allowing static arrays
1965	/// to be initialized with `pub static FOO: &'static Encoding`
1966	/// items.
1967	pub static WINDOWS_1256_INIT: Encoding = Encoding {
1968	name: "windows-1256",
1969	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1256, `0x0621`, `65`, `22`),
1970	};
1971
1972	/// The windows-1256 encoding.
1973	///
1974	/// This is the Arabic encoding for Windows.
1975	///
1976	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1256.html),
1977	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1256-bmp.html)
1978	///
1979	/// This encoding matches the Windows code page 1256.
1980	///
1981	/// This will change from `static` to `const` if Rust changes
1982	/// to make the referent of `pub const FOO: &'static Encoding`
1983	/// unique cross-crate, so don't take the address of this
1984	/// `static`.
1985	pub static WINDOWS_1256: &'static Encoding = &WINDOWS_1256_INIT;
1986
1987	/// The initializer for the [windows-1257](static.WINDOWS_1257.html) encoding.
1988	///
1989	/// For use only for taking the address of this form when
1990	/// Rust prohibits the use of the non-`_INIT` form directly,
1991	/// such as in initializers of other `static`s. If in doubt,
1992	/// use the corresponding non-`_INIT` reference-typed `static`.
1993	///
1994	/// This part of the public API will go away if Rust changes
1995	/// to make the referent of `pub const FOO: &'static Encoding`
1996	/// unique cross-crate or if Rust starts allowing static arrays
1997	/// to be initialized with `pub static FOO: &'static Encoding`
1998	/// items.
1999	pub static WINDOWS_1257_INIT: Encoding = Encoding {
2000	name: "windows-1257",
2001	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1257, `0x00DF`, `95`, `1`),
2002	};
2003
2004	/// The windows-1257 encoding.
2005	///
2006	/// This is the Baltic encoding for Windows.
2007	///
2008	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1257.html),
2009	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1257-bmp.html)
2010	///
2011	/// This encoding matches the Windows code page 1257, except Windows decodes
2012	/// unassigned code points to the Private Use Area of Unicode.
2013	///
2014	/// This will change from `static` to `const` if Rust changes
2015	/// to make the referent of `pub const FOO: &'static Encoding`
2016	/// unique cross-crate, so don't take the address of this
2017	/// `static`.
2018	pub static WINDOWS_1257: &'static Encoding = &WINDOWS_1257_INIT;
2019
2020	/// The initializer for the [windows-1258](static.WINDOWS_1258.html) encoding.
2021	///
2022	/// For use only for taking the address of this form when
2023	/// Rust prohibits the use of the non-`_INIT` form directly,
2024	/// such as in initializers of other `static`s. If in doubt,
2025	/// use the corresponding non-`_INIT` reference-typed `static`.
2026	///
2027	/// This part of the public API will go away if Rust changes
2028	/// to make the referent of `pub const FOO: &'static Encoding`
2029	/// unique cross-crate or if Rust starts allowing static arrays
2030	/// to be initialized with `pub static FOO: &'static Encoding`
2031	/// items.
2032	pub static WINDOWS_1258_INIT: Encoding = Encoding {
2033	name: "windows-1258",
2034	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1258, `0x00DF`, `95`, `4`),
2035	};
2036
2037	/// The windows-1258 encoding.
2038	///
2039	/// This is the Vietnamese encoding for Windows.
2040	///
2041	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1258.html),
2042	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1258-bmp.html)
2043	///
2044	/// This encoding matches the Windows code page 1258 when used in the
2045	/// non-normalizing mode. Unlike with the other single-byte encodings, the
2046	/// result of decoding is not necessarily in Normalization Form C. On the
2047	/// other hand, input in the Normalization Form C is not encoded without
2048	/// replacement. In general, it's a bad idea to encode to encodings other
2049	/// than UTF-8, but this encoding is especially hazardous to encode to.
2050	///
2051	/// This will change from `static` to `const` if Rust changes
2052	/// to make the referent of `pub const FOO: &'static Encoding`
2053	/// unique cross-crate, so don't take the address of this
2054	/// `static`.
2055	pub static WINDOWS_1258: &'static Encoding = &WINDOWS_1258_INIT;
2056
2057	/// The initializer for the [windows-874](static.WINDOWS_874.html) encoding.
2058	///
2059	/// For use only for taking the address of this form when
2060	/// Rust prohibits the use of the non-`_INIT` form directly,
2061	/// such as in initializers of other `static`s. If in doubt,
2062	/// use the corresponding non-`_INIT` reference-typed `static`.
2063	///
2064	/// This part of the public API will go away if Rust changes
2065	/// to make the referent of `pub const FOO: &'static Encoding`
2066	/// unique cross-crate or if Rust starts allowing static arrays
2067	/// to be initialized with `pub static FOO: &'static Encoding`
2068	/// items.
2069	pub static WINDOWS_874_INIT: Encoding = Encoding {
2070	name: "windows-874",
2071	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_874, `0x0E01`, `33`, `58`),
2072	};
2073
2074	/// The windows-874 encoding.
2075	///
2076	/// This is the Thai encoding for Windows. It is an extension of TIS-620 / ISO-8859-11.
2077	///
2078	/// [Index visualization](https://encoding.spec.whatwg.org/windows-874.html),
2079	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-874-bmp.html)
2080	///
2081	/// This encoding matches the Windows code page 874, except Windows decodes
2082	/// unassigned code points to the Private Use Area of Unicode.
2083	///
2084	/// This will change from `static` to `const` if Rust changes
2085	/// to make the referent of `pub const FOO: &'static Encoding`
2086	/// unique cross-crate, so don't take the address of this
2087	/// `static`.
2088	pub static WINDOWS_874: &'static Encoding = &WINDOWS_874_INIT;
2089
2090	/// The initializer for the [x-mac-cyrillic](static.X_MAC_CYRILLIC.html) encoding.
2091	///
2092	/// For use only for taking the address of this form when
2093	/// Rust prohibits the use of the non-`_INIT` form directly,
2094	/// such as in initializers of other `static`s. If in doubt,
2095	/// use the corresponding non-`_INIT` reference-typed `static`.
2096	///
2097	/// This part of the public API will go away if Rust changes
2098	/// to make the referent of `pub const FOO: &'static Encoding`
2099	/// unique cross-crate or if Rust starts allowing static arrays
2100	/// to be initialized with `pub static FOO: &'static Encoding`
2101	/// items.
2102	pub static X_MAC_CYRILLIC_INIT: Encoding = Encoding {
2103	name: "x-mac-cyrillic",
2104	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.x_mac_cyrillic, `0x0430`, `96`, `31`),
2105	};
2106
2107	/// The x-mac-cyrillic encoding.
2108	///
2109	/// This is the MacUkrainian encoding from Mac OS Classic.
2110	///
2111	/// [Index visualization](https://encoding.spec.whatwg.org/x-mac-cyrillic.html),
2112	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/x-mac-cyrillic-bmp.html)
2113	///
2114	/// This encoding matches the Windows code page 10017.
2115	///
2116	/// This will change from `static` to `const` if Rust changes
2117	/// to make the referent of `pub const FOO: &'static Encoding`
2118	/// unique cross-crate, so don't take the address of this
2119	/// `static`.
2120	pub static X_MAC_CYRILLIC: &'static Encoding = &X_MAC_CYRILLIC_INIT;
2121
2122	/// The initializer for the [x-user-defined](static.X_USER_DEFINED.html) encoding.
2123	///
2124	/// For use only for taking the address of this form when
2125	/// Rust prohibits the use of the non-`_INIT` form directly,
2126	/// such as in initializers of other `static`s. If in doubt,
2127	/// use the corresponding non-`_INIT` reference-typed `static`.
2128	///
2129	/// This part of the public API will go away if Rust changes
2130	/// to make the referent of `pub const FOO: &'static Encoding`
2131	/// unique cross-crate or if Rust starts allowing static arrays
2132	/// to be initialized with `pub static FOO: &'static Encoding`
2133	/// items.
2134	pub static X_USER_DEFINED_INIT: Encoding = Encoding {
2135	name: "x-user-defined",
2136	variant: VariantEncoding::UserDefined,
2137	};
2138
2139	/// The x-user-defined encoding.
2140	///
2141	/// This encoding offsets the non-ASCII bytes by `0xF700` thereby decoding
2142	/// them to the Private Use Area of Unicode. It was used for loading binary
2143	/// data into a JavaScript string using `XMLHttpRequest` before XHR supported
2144	/// the `"arraybuffer"` response type.
2145	///
2146	/// This encoding does not have a Windows code page number.
2147	///
2148	/// This will change from `static` to `const` if Rust changes
2149	/// to make the referent of `pub const FOO: &'static Encoding`
2150	/// unique cross-crate, so don't take the address of this
2151	/// `static`.
2152	pub static X_USER_DEFINED: &'static Encoding = &X_USER_DEFINED_INIT;
2153
2154	static LABELS_SORTED: [&'static str; `228`] = [
2155	"l1",
2156	"l2",
2157	"l3",
2158	"l4",
2159	"l5",
2160	"l6",
2161	"l9",
2162	"866",
2163	"mac",
2164	"koi",
2165	"gbk",
2166	"big5",
2167	"utf8",
2168	"koi8",
2169	"sjis",
2170	"ucs-2",
2171	"ms932",
2172	"cp866",
2173	"utf-8",
2174	"cp819",
2175	"ascii",
2176	"x-gbk",
2177	"greek",
2178	"cp1250",
2179	"cp1251",
2180	"latin1",
2181	"gb2312",
2182	"cp1252",
2183	"latin2",
2184	"cp1253",
2185	"latin3",
2186	"cp1254",
2187	"latin4",
2188	"cp1255",
2189	"csbig5",
2190	"latin5",
2191	"utf-16",
2192	"cp1256",
2193	"ibm866",
2194	"latin6",
2195	"cp1257",
2196	"cp1258",
2197	"greek8",
2198	"ibm819",
2199	"arabic",
2200	"visual",
2201	"korean",
2202	"euc-jp",
2203	"koi8-r",
2204	"koi8_r",
2205	"euc-kr",
2206	"x-sjis",
2207	"koi8-u",
2208	"hebrew",
2209	"tis-620",
2210	"gb18030",
2211	"ksc5601",
2212	"gb_2312",
2213	"dos-874",
2214	"cn-big5",
2215	"unicode",
2216	"chinese",
2217	"logical",
2218	"cskoi8r",
2219	"cseuckr",
2220	"koi8-ru",
2221	"x-cp1250",
2222	"ksc_5601",
2223	"x-cp1251",
2224	"iso88591",
2225	"csgb2312",
2226	"x-cp1252",
2227	"iso88592",
2228	"x-cp1253",
2229	"iso88593",
2230	"ecma-114",
2231	"x-cp1254",
2232	"iso88594",
2233	"x-cp1255",
2234	"iso88595",
2235	"x-x-big5",
2236	"x-cp1256",
2237	"csibm866",
2238	"iso88596",
2239	"x-cp1257",
2240	"iso88597",
2241	"asmo-708",
2242	"ecma-118",
2243	"elot_928",
2244	"x-cp1258",
2245	"iso88598",
2246	"iso88599",
2247	"cyrillic",
2248	"utf-16be",
2249	"utf-16le",
2250	"us-ascii",
2251	"ms_kanji",
2252	"x-euc-jp",
2253	"iso885910",
2254	"iso8859-1",
2255	"iso885911",
2256	"iso8859-2",
2257	"iso8859-3",
2258	"iso885913",
2259	"iso8859-4",
2260	"iso885914",
2261	"iso8859-5",
2262	"iso885915",
2263	"iso8859-6",
2264	"iso8859-7",
2265	"iso8859-8",
2266	"iso-ir-58",
2267	"iso8859-9",
2268	"csunicode",
2269	"macintosh",
2270	"shift-jis",
2271	"shift_jis",
2272	"iso-ir-100",
2273	"iso8859-10",
2274	"iso-ir-110",
2275	"gb_2312-80",
2276	"iso-8859-1",
2277	"iso_8859-1",
2278	"iso-ir-101",
2279	"iso8859-11",
2280	"iso-8859-2",
2281	"iso_8859-2",
2282	"hz-gb-2312",
2283	"iso-8859-3",
2284	"iso_8859-3",
2285	"iso8859-13",
2286	"iso-8859-4",
2287	"iso_8859-4",
2288	"iso8859-14",
2289	"iso-ir-144",
2290	"iso-8859-5",
2291	"iso_8859-5",
2292	"iso8859-15",
2293	"iso-8859-6",
2294	"iso_8859-6",
2295	"iso-ir-126",
2296	"iso-8859-7",
2297	"iso_8859-7",
2298	"iso-ir-127",
2299	"iso-ir-157",
2300	"iso-8859-8",
2301	"iso_8859-8",
2302	"iso-ir-138",
2303	"iso-ir-148",
2304	"iso-8859-9",
2305	"iso_8859-9",
2306	"iso-ir-109",
2307	"iso-ir-149",
2308	"big5-hkscs",
2309	"csshiftjis",
2310	"iso-8859-10",
2311	"iso-8859-11",
2312	"csisolatin1",
2313	"csisolatin2",
2314	"iso-8859-13",
2315	"csisolatin3",
2316	"iso-8859-14",
2317	"windows-874",
2318	"csisolatin4",
2319	"iso-8859-15",
2320	"iso_8859-15",
2321	"csisolatin5",
2322	"iso-8859-16",
2323	"csisolatin6",
2324	"windows-949",
2325	"csisolatin9",
2326	"csiso88596e",
2327	"csiso88598e",
2328	"unicodefffe",
2329	"unicodefeff",
2330	"csmacintosh",
2331	"csiso88596i",
2332	"csiso88598i",
2333	"windows-31j",
2334	"x-mac-roman",
2335	"iso-2022-cn",
2336	"iso-2022-jp",
2337	"csiso2022jp",
2338	"iso-2022-kr",
2339	"csiso2022kr",
2340	"replacement",
2341	"windows-1250",
2342	"windows-1251",
2343	"windows-1252",
2344	"windows-1253",
2345	"windows-1254",
2346	"windows-1255",
2347	"windows-1256",
2348	"windows-1257",
2349	"windows-1258",
2350	"iso-8859-6-e",
2351	"iso-8859-8-e",
2352	"iso-8859-6-i",
2353	"iso-8859-8-i",
2354	"sun_eu_greek",
2355	"csksc56011987",
2356	"unicode20utf8",
2357	"unicode11utf8",
2358	"ks_c_5601-1987",
2359	"ansi_x3.4-1968",
2360	"ks_c_5601-1989",
2361	"x-mac-cyrillic",
2362	"x-user-defined",
2363	"csiso58gb231280",
2364	"iso-10646-ucs-2",
2365	"iso_8859-1:1987",
2366	"iso_8859-2:1987",
2367	"iso_8859-6:1987",
2368	"iso_8859-7:1987",
2369	"iso_8859-3:1988",
2370	"iso_8859-4:1988",
2371	"iso_8859-5:1988",
2372	"iso_8859-8:1988",
2373	"x-unicode20utf8",
2374	"iso_8859-9:1989",
2375	"csisolatingreek",
2376	"x-mac-ukrainian",
2377	"iso-2022-cn-ext",
2378	"csisolatinarabic",
2379	"csisolatinhebrew",
2380	"unicode-1-1-utf-8",
2381	"csisolatincyrillic",
2382	"cseucpkdfmtjapanese",
2383	];
2384
2385	static ENCODINGS_IN_LABEL_SORT: [&'static Encoding; `228`] = [
2386	&WINDOWS_1252_INIT,
2387	&ISO_8859_2_INIT,
2388	&ISO_8859_3_INIT,
2389	&ISO_8859_4_INIT,
2390	&WINDOWS_1254_INIT,
2391	&ISO_8859_10_INIT,
2392	&ISO_8859_15_INIT,
2393	&IBM866_INIT,
2394	&MACINTOSH_INIT,
2395	&KOI8_R_INIT,
2396	&GBK_INIT,
2397	&BIG5_INIT,
2398	&UTF_8_INIT,
2399	&KOI8_R_INIT,
2400	&SHIFT_JIS_INIT,
2401	&UTF_16LE_INIT,
2402	&SHIFT_JIS_INIT,
2403	&IBM866_INIT,
2404	&UTF_8_INIT,
2405	&WINDOWS_1252_INIT,
2406	&WINDOWS_1252_INIT,
2407	&GBK_INIT,
2408	&ISO_8859_7_INIT,
2409	&WINDOWS_1250_INIT,
2410	&WINDOWS_1251_INIT,
2411	&WINDOWS_1252_INIT,
2412	&GBK_INIT,
2413	&WINDOWS_1252_INIT,
2414	&ISO_8859_2_INIT,
2415	&WINDOWS_1253_INIT,
2416	&ISO_8859_3_INIT,
2417	&WINDOWS_1254_INIT,
2418	&ISO_8859_4_INIT,
2419	&WINDOWS_1255_INIT,
2420	&BIG5_INIT,
2421	&WINDOWS_1254_INIT,
2422	&UTF_16LE_INIT,
2423	&WINDOWS_1256_INIT,
2424	&IBM866_INIT,
2425	&ISO_8859_10_INIT,
2426	&WINDOWS_1257_INIT,
2427	&WINDOWS_1258_INIT,
2428	&ISO_8859_7_INIT,
2429	&WINDOWS_1252_INIT,
2430	&ISO_8859_6_INIT,
2431	&ISO_8859_8_INIT,
2432	&EUC_KR_INIT,
2433	&EUC_JP_INIT,
2434	&KOI8_R_INIT,
2435	&KOI8_R_INIT,
2436	&EUC_KR_INIT,
2437	&SHIFT_JIS_INIT,
2438	&KOI8_U_INIT,
2439	&ISO_8859_8_INIT,
2440	&WINDOWS_874_INIT,
2441	&GB18030_INIT,
2442	&EUC_KR_INIT,
2443	&GBK_INIT,
2444	&WINDOWS_874_INIT,
2445	&BIG5_INIT,
2446	&UTF_16LE_INIT,
2447	&GBK_INIT,
2448	&ISO_8859_8_I_INIT,
2449	&KOI8_R_INIT,
2450	&EUC_KR_INIT,
2451	&KOI8_U_INIT,
2452	&WINDOWS_1250_INIT,
2453	&EUC_KR_INIT,
2454	&WINDOWS_1251_INIT,
2455	&WINDOWS_1252_INIT,
2456	&GBK_INIT,
2457	&WINDOWS_1252_INIT,
2458	&ISO_8859_2_INIT,
2459	&WINDOWS_1253_INIT,
2460	&ISO_8859_3_INIT,
2461	&ISO_8859_6_INIT,
2462	&WINDOWS_1254_INIT,
2463	&ISO_8859_4_INIT,
2464	&WINDOWS_1255_INIT,
2465	&ISO_8859_5_INIT,
2466	&BIG5_INIT,
2467	&WINDOWS_1256_INIT,
2468	&IBM866_INIT,
2469	&ISO_8859_6_INIT,
2470	&WINDOWS_1257_INIT,
2471	&ISO_8859_7_INIT,
2472	&ISO_8859_6_INIT,
2473	&ISO_8859_7_INIT,
2474	&ISO_8859_7_INIT,
2475	&WINDOWS_1258_INIT,
2476	&ISO_8859_8_INIT,
2477	&WINDOWS_1254_INIT,
2478	&ISO_8859_5_INIT,
2479	&UTF_16BE_INIT,
2480	&UTF_16LE_INIT,
2481	&WINDOWS_1252_INIT,
2482	&SHIFT_JIS_INIT,
2483	&EUC_JP_INIT,
2484	&ISO_8859_10_INIT,
2485	&WINDOWS_1252_INIT,
2486	&WINDOWS_874_INIT,
2487	&ISO_8859_2_INIT,
2488	&ISO_8859_3_INIT,
2489	&ISO_8859_13_INIT,
2490	&ISO_8859_4_INIT,
2491	&ISO_8859_14_INIT,
2492	&ISO_8859_5_INIT,
2493	&ISO_8859_15_INIT,
2494	&ISO_8859_6_INIT,
2495	&ISO_8859_7_INIT,
2496	&ISO_8859_8_INIT,
2497	&GBK_INIT,
2498	&WINDOWS_1254_INIT,
2499	&UTF_16LE_INIT,
2500	&MACINTOSH_INIT,
2501	&SHIFT_JIS_INIT,
2502	&SHIFT_JIS_INIT,
2503	&WINDOWS_1252_INIT,
2504	&ISO_8859_10_INIT,
2505	&ISO_8859_4_INIT,
2506	&GBK_INIT,
2507	&WINDOWS_1252_INIT,
2508	&WINDOWS_1252_INIT,
2509	&ISO_8859_2_INIT,
2510	&WINDOWS_874_INIT,
2511	&ISO_8859_2_INIT,
2512	&ISO_8859_2_INIT,
2513	&REPLACEMENT_INIT,
2514	&ISO_8859_3_INIT,
2515	&ISO_8859_3_INIT,
2516	&ISO_8859_13_INIT,
2517	&ISO_8859_4_INIT,
2518	&ISO_8859_4_INIT,
2519	&ISO_8859_14_INIT,
2520	&ISO_8859_5_INIT,
2521	&ISO_8859_5_INIT,
2522	&ISO_8859_5_INIT,
2523	&ISO_8859_15_INIT,
2524	&ISO_8859_6_INIT,
2525	&ISO_8859_6_INIT,
2526	&ISO_8859_7_INIT,
2527	&ISO_8859_7_INIT,
2528	&ISO_8859_7_INIT,
2529	&ISO_8859_6_INIT,
2530	&ISO_8859_10_INIT,
2531	&ISO_8859_8_INIT,
2532	&ISO_8859_8_INIT,
2533	&ISO_8859_8_INIT,
2534	&WINDOWS_1254_INIT,
2535	&WINDOWS_1254_INIT,
2536	&WINDOWS_1254_INIT,
2537	&ISO_8859_3_INIT,
2538	&EUC_KR_INIT,
2539	&BIG5_INIT,
2540	&SHIFT_JIS_INIT,
2541	&ISO_8859_10_INIT,
2542	&WINDOWS_874_INIT,
2543	&WINDOWS_1252_INIT,
2544	&ISO_8859_2_INIT,
2545	&ISO_8859_13_INIT,
2546	&ISO_8859_3_INIT,
2547	&ISO_8859_14_INIT,
2548	&WINDOWS_874_INIT,
2549	&ISO_8859_4_INIT,
2550	&ISO_8859_15_INIT,
2551	&ISO_8859_15_INIT,
2552	&WINDOWS_1254_INIT,
2553	&ISO_8859_16_INIT,
2554	&ISO_8859_10_INIT,
2555	&EUC_KR_INIT,
2556	&ISO_8859_15_INIT,
2557	&ISO_8859_6_INIT,
2558	&ISO_8859_8_INIT,
2559	&UTF_16BE_INIT,
2560	&UTF_16LE_INIT,
2561	&MACINTOSH_INIT,
2562	&ISO_8859_6_INIT,
2563	&ISO_8859_8_I_INIT,
2564	&SHIFT_JIS_INIT,
2565	&MACINTOSH_INIT,
2566	&REPLACEMENT_INIT,
2567	&ISO_2022_JP_INIT,
2568	&ISO_2022_JP_INIT,
2569	&REPLACEMENT_INIT,
2570	&REPLACEMENT_INIT,
2571	&REPLACEMENT_INIT,
2572	&WINDOWS_1250_INIT,
2573	&WINDOWS_1251_INIT,
2574	&WINDOWS_1252_INIT,
2575	&WINDOWS_1253_INIT,
2576	&WINDOWS_1254_INIT,
2577	&WINDOWS_1255_INIT,
2578	&WINDOWS_1256_INIT,
2579	&WINDOWS_1257_INIT,
2580	&WINDOWS_1258_INIT,
2581	&ISO_8859_6_INIT,
2582	&ISO_8859_8_INIT,
2583	&ISO_8859_6_INIT,
2584	&ISO_8859_8_I_INIT,
2585	&ISO_8859_7_INIT,
2586	&EUC_KR_INIT,
2587	&UTF_8_INIT,
2588	&UTF_8_INIT,
2589	&EUC_KR_INIT,
2590	&WINDOWS_1252_INIT,
2591	&EUC_KR_INIT,
2592	&X_MAC_CYRILLIC_INIT,
2593	&X_USER_DEFINED_INIT,
2594	&GBK_INIT,
2595	&UTF_16LE_INIT,
2596	&WINDOWS_1252_INIT,
2597	&ISO_8859_2_INIT,
2598	&ISO_8859_6_INIT,
2599	&ISO_8859_7_INIT,
2600	&ISO_8859_3_INIT,
2601	&ISO_8859_4_INIT,
2602	&ISO_8859_5_INIT,
2603	&ISO_8859_8_INIT,
2604	&UTF_8_INIT,
2605	&WINDOWS_1254_INIT,
2606	&ISO_8859_7_INIT,
2607	&X_MAC_CYRILLIC_INIT,
2608	&REPLACEMENT_INIT,
2609	&ISO_8859_6_INIT,
2610	&ISO_8859_8_INIT,
2611	&UTF_8_INIT,
2612	&ISO_8859_5_INIT,
2613	&EUC_JP_INIT,
2614	];
2615
2616	// END GENERATED CODE
2617
2618	/// An encoding as defined in the [Encoding Standard][1].
2619	///
2620	/// An _encoding_ defines a mapping from a `u8` sequence to a `char` sequence
2621	/// and, in most cases, vice versa. Each encoding has a name, an output
2622	/// encoding, and one or more labels.
2623	///
2624	/// _Labels_ are ASCII-case-insensitive strings that are used to identify an
2625	/// encoding in formats and protocols. The _name_ of the encoding is the
2626	/// preferred label in the case appropriate for returning from the
2627	/// [`characterSet`][2] property of the `Document` DOM interface.
2628	///
2629	/// The _output encoding_ is the encoding used for form submission and URL
2630	/// parsing on Web pages in the encoding. This is UTF-8 for the replacement,
2631	/// UTF-16LE and UTF-16BE encodings and the encoding itself for other
2632	/// encodings.
2633	///
2634	/// [1]: https://encoding.spec.whatwg.org/
2635	/// [2]: https://dom.spec.whatwg.org/#dom-document-characterset
2636	///
2637	/// # Streaming vs. Non-Streaming
2638	///
2639	/// When you have the entire input in a single buffer, you can use the
2640	/// methods [`decode()`][3], [`decode_with_bom_removal()`][3],
2641	/// [`decode_without_bom_handling()`][5],
2642	/// [`decode_without_bom_handling_and_without_replacement()`][6] and
2643	/// [`encode()`][7]. (These methods are available to Rust callers only and are
2644	/// not available in the C API.) Unlike the rest of the API available to Rust,
2645	/// these methods perform heap allocations. You should the `Decoder` and
2646	/// `Encoder` objects when your input is split into multiple buffers or when
2647	/// you want to control the allocation of the output buffers.
2648	///
2649	/// [3]: #method.decode
2650	/// [4]: #method.decode_with_bom_removal
2651	/// [5]: #method.decode_without_bom_handling
2652	/// [6]: #method.decode_without_bom_handling_and_without_replacement
2653	/// [7]: #method.encode
2654	///
2655	/// # Instances
2656	///
2657	/// All instances of `Encoding` are statically allocated and have the `'static`
2658	/// lifetime. There is precisely one unique `Encoding` instance for each
2659	/// encoding defined in the Encoding Standard.
2660	///
2661	/// To obtain a reference to a particular encoding whose identity you know at
2662	/// compile time, use a `static` that refers to encoding. There is a `static`
2663	/// for each encoding. The `static`s are named in all caps with hyphens
2664	/// replaced with underscores (and in C/C++ have `_ENCODING` appended to the
2665	/// name). For example, if you know at compile time that you will want to
2666	/// decode using the UTF-8 encoding, use the `UTF_8` `static` (`UTF_8_ENCODING`
2667	/// in C/C++).
2668	///
2669	/// Additionally, there are non-reference-typed forms ending with `_INIT` to
2670	/// work around the problem that `static`s of the type `&'static Encoding`
2671	/// cannot be used to initialize items of an array whose type is
2672	/// `[&'static Encoding; N]`.
2673	///
2674	/// If you don't know what encoding you need at compile time and need to
2675	/// dynamically get an encoding by label, use
2676	/// <code>Encoding::<a href="#method.for_label">for_label</a>(<var>label</var>)</code>.
2677	///
2678	/// Instances of `Encoding` can be compared with `==` (in both Rust and in
2679	/// C/C++).
2680	pub struct Encoding {
2681	name: &'static str,
2682	variant: VariantEncoding,
2683	}
2684
2685	impl Encoding {
2686	/// Implements the
2687	/// [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
2688	/// algorithm.
2689	///
2690	/// If, after ASCII-lowercasing and removing leading and trailing
2691	/// whitespace, the argument matches a label defined in the Encoding
2692	/// Standard, `Some(&'static Encoding)` representing the corresponding
2693	/// encoding is returned. If there is no match, `None` is returned.
2694	///
2695	/// This is the right method to use if the action upon the method returning
2696	/// `None` is to use a fallback encoding (e.g. `WINDOWS_1252`) instead.
2697	/// When the action upon the method returning `None` is not to proceed with
2698	/// a fallback but to refuse processing, `for_label_no_replacement()` is more
2699	/// appropriate.
2700	///
2701	/// The argument is of type `&[u8]` instead of `&str` to save callers
2702	/// that are extracting the label from a non-UTF-8 protocol the trouble
2703	/// of conversion to UTF-8. (If you have a `&str`, just call `.as_bytes()`
2704	/// on it.)
2705	///
2706	/// Available via the C wrapper.
2707	///
2708	/// # Example
2709	/// ```
2710	/// use encoding_rs::Encoding;
2711	///
2712	/// assert_eq!(Some(encoding_rs::UTF_8), Encoding::for_label(b"utf-8"));
2713	/// assert_eq!(Some(encoding_rs::UTF_8), Encoding::for_label(b"unicode11utf8"));
2714	///
2715	/// assert_eq!(Some(encoding_rs::ISO_8859_2), Encoding::for_label(b"latin2"));
2716	///
2717	/// assert_eq!(Some(encoding_rs::UTF_16BE), Encoding::for_label(b"utf-16be"));
2718	///
2719	/// assert_eq!(None, Encoding::for_label(b"unrecognized label"));
2720	/// ```
2721	pub fn for_label(label: &[u8]) -> Option<&'static Encoding> {
2722	let mut trimmed = [`0u8`; LONGEST_LABEL_LENGTH];
2723	let mut trimmed_pos = `0usize`;
2724	let mut iter = label.into_iter();
2725	// before
2726	loop {
2727	match iter.next() {
2728	None => {
2729	return None;
2730	}
2731	Some(byte) => {
2732	// The characters used in labels are:
2733	// a-z (except q, but excluding it below seems excessive)
2734	// 0-9
2735	// . _ - :
2736	match *byte {
2737	`0x09u8` \| `0x0Au8` \| `0x0Cu8` \| `0x0Du8` \| `0x20u8` => {
2738	continue;
2739	}
2740	b'A'..=b'Z' => {
2741	trimmed[trimmed_pos] = *byte + `0x20u8`;
2742	trimmed_pos = `1usize`;
2743	break;
2744	}
2745	b'a'..=b'z' \| b'0'..=b'9' \| b'-' \| b'_' \| b':' \| b'.' => {
2746	trimmed[trimmed_pos] = *byte;
2747	trimmed_pos = `1usize`;
2748	break;
2749	}
2750	_ => {
2751	return None;
2752	}
2753	}
2754	}
2755	}
2756	}
2757	// inside
2758	loop {
2759	match iter.next() {
2760	None => {
2761	break;
2762	}
2763	Some(byte) => {
2764	match *byte {
2765	`0x09u8` \| `0x0Au8` \| `0x0Cu8` \| `0x0Du8` \| `0x20u8` => {
2766	break;
2767	}
2768	b'A'..=b'Z' => {
2769	if trimmed_pos == LONGEST_LABEL_LENGTH {
2770	// There's no encoding with a label this long
2771	return None;
2772	}
2773	trimmed[trimmed_pos] = *byte + `0x20u8`;
2774	trimmed_pos += `1usize`;
2775	continue;
2776	}
2777	b'a'..=b'z' \| b'0'..=b'9' \| b'-' \| b'_' \| b':' \| b'.' => {
2778	if trimmed_pos == LONGEST_LABEL_LENGTH {
2779	// There's no encoding with a label this long
2780	return None;
2781	}
2782	trimmed[trimmed_pos] = *byte;
2783	trimmed_pos += `1usize`;
2784	continue;
2785	}
2786	_ => {
2787	return None;
2788	}
2789	}
2790	}
2791	}
2792	}
2793	// after
2794	loop {
2795	match iter.next() {
2796	None => {
2797	break;
2798	}
2799	Some(byte) => {
2800	match *byte {
2801	`0x09u8` \| `0x0Au8` \| `0x0Cu8` \| `0x0Du8` \| `0x20u8` => {
2802	continue;
2803	}
2804	_ => {
2805	// There's no label with space in the middle
2806	return None;
2807	}
2808	}
2809	}
2810	}
2811	}
2812	let candidate = &trimmed[..trimmed_pos];
2813	match LABELS_SORTED.binary_search_by(\|probe\| {
2814	let bytes = probe.as_bytes();
2815	let c = bytes.len().cmp(&candidate.len());
2816	if c != Ordering::Equal {
2817	return c;
2818	}
2819	let probe_iter = bytes.iter().rev();
2820	let candidate_iter = candidate.iter().rev();
2821	probe_iter.cmp(candidate_iter)
2822	}) {
2823	Ok(i) => Some(ENCODINGS_IN_LABEL_SORT[i]),
2824	Err(_) => None,
2825	}
2826	}
2827
2828	/// This method behaves the same as `for_label()`, except when `for_label()`
2829	/// would return `Some(REPLACEMENT)`, this method returns `None` instead.
2830	///
2831	/// This method is useful in scenarios where a fatal error is required
2832	/// upon invalid label, because in those cases the caller typically wishes
2833	/// to treat the labels that map to the replacement encoding as fatal
2834	/// errors, too.
2835	///
2836	/// It is not OK to use this method when the action upon the method returning
2837	/// `None` is to use a fallback encoding (e.g. `WINDOWS_1252`). In such a
2838	/// case, the `for_label()` method should be used instead in order to avoid
2839	/// unsafe fallback for labels that `for_label()` maps to `Some(REPLACEMENT)`.
2840	///
2841	/// Available via the C wrapper.
2842	#[inline]
2843	pub fn for_label_no_replacement(label: &[u8]) -> Option<&'static Encoding> {
2844	match Encoding::for_label(label) {
2845	None => None,
2846	Some(encoding) => {
2847	if encoding == REPLACEMENT {
2848	None
2849	} else {
2850	Some(encoding)
2851	}
2852	}
2853	}
2854	}
2855
2856	/// Performs non-incremental BOM sniffing.
2857	///
2858	/// The argument must either be a buffer representing the entire input
2859	/// stream (non-streaming case) or a buffer representing at least the first
2860	/// three bytes of the input stream (streaming case).
2861	///
2862	/// Returns `Some((UTF_8, 3))`, `Some((UTF_16LE, 2))` or
2863	/// `Some((UTF_16BE, 2))` if the argument starts with the UTF-8, UTF-16LE
2864	/// or UTF-16BE BOM or `None` otherwise.
2865	///
2866	/// Available via the C wrapper.
2867	#[inline]
2868	pub fn for_bom(buffer: &[u8]) -> Option<(&'static Encoding, usize)> {
2869	if buffer.starts_with(b"`\xEF\xBB\xBF`") {
2870	Some((UTF_8, `3`))
2871	} else if buffer.starts_with(b"`\xFF\xFE`") {
2872	Some((UTF_16LE, `2`))
2873	} else if buffer.starts_with(b"`\xFE\xFF`") {
2874	Some((UTF_16BE, `2`))
2875	} else {
2876	None
2877	}
2878	}
2879
2880	/// Returns the name of this encoding.
2881	///
2882	/// This name is appropriate to return as-is from the DOM
2883	/// `document.characterSet` property.
2884	///
2885	/// Available via the C wrapper.
2886	#[inline]
2887	pub fn name(&'static self) -> &'static str {
2888	self.name
2889	}
2890
2891	/// Checks whether the _output encoding_ of this encoding can encode every
2892	/// `char`. (Only true if the output encoding is UTF-8.)
2893	///
2894	/// Available via the C wrapper.
2895	#[inline]
2896	pub fn can_encode_everything(&'static self) -> bool {
2897	self.output_encoding() == UTF_8
2898	}
2899
2900	/// Checks whether the bytes 0x00...0x7F map exclusively to the characters
2901	/// U+0000...U+007F and vice versa.
2902	///
2903	/// Available via the C wrapper.
2904	#[inline]
2905	pub fn is_ascii_compatible(&'static self) -> bool {
2906	!(self == REPLACEMENT \|\| self == UTF_16BE \|\| self == UTF_16LE \|\| self == ISO_2022_JP)
2907	}
2908
2909	/// Checks whether this encoding maps one byte to one Basic Multilingual
2910	/// Plane code point (i.e. byte length equals decoded UTF-16 length) and
2911	/// vice versa (for mappable characters).
2912	///
2913	/// `true` iff this encoding is on the list of [Legacy single-byte
2914	/// encodings](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings)
2915	/// in the spec or x-user-defined.
2916	///
2917	/// Available via the C wrapper.
2918	#[inline]
2919	pub fn is_single_byte(&'static self) -> bool {
2920	self.variant.is_single_byte()
2921	}
2922
2923	/// Checks whether the bytes 0x00...0x7F map mostly to the characters
2924	/// U+0000...U+007F and vice versa.
2925	#[cfg(feature = "alloc")]
2926	#[inline]
2927	fn is_potentially_borrowable(&'static self) -> bool {
2928	!(self == REPLACEMENT \|\| self == UTF_16BE \|\| self == UTF_16LE)
2929	}
2930
2931	/// Returns the _output encoding_ of this encoding. This is UTF-8 for
2932	/// UTF-16BE, UTF-16LE, and replacement and the encoding itself otherwise.
2933	///
2934	/// _Note:_ The _output encoding_ concept is needed for form submission and
2935	/// error handling in the query strings of URLs in the Web Platform.
2936	///
2937	/// Available via the C wrapper.
2938	#[inline]
2939	pub fn output_encoding(&'static self) -> &'static Encoding {
2940	if self == REPLACEMENT \|\| self == UTF_16BE \|\| self == UTF_16LE {
2941	UTF_8
2942	} else {
2943	self
2944	}
2945	}
2946
2947	/// Decode complete input to `Cow<'a, str>` _with BOM sniffing_ and with
2948	/// malformed sequences replaced with the REPLACEMENT CHARACTER when the
2949	/// entire input is available as a single buffer (i.e. the end of the
2950	/// buffer marks the end of the stream).
2951	///
2952	/// The BOM, if any, does not appear in the output.
2953	///
2954	/// This method implements the (non-streaming version of) the
2955	/// [_decode_](https://encoding.spec.whatwg.org/#decode) spec concept.
2956	///
2957	/// The second item in the returned tuple is the encoding that was actually
2958	/// used (which may differ from this encoding thanks to BOM sniffing).
2959	///
2960	/// The third item in the returned tuple indicates whether there were
2961	/// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
2962	///
2963	/// _Note:_ It is wrong to use this when the input buffer represents only
2964	/// a segment of the input instead of the whole input. Use `new_decoder()`
2965	/// when decoding segmented input.
2966	///
2967	/// This method performs a one or two heap allocations for the backing
2968	/// buffer of the `String` when unable to borrow. (One allocation if not
2969	/// errors and potentially another one in the presence of errors.) The
2970	/// first allocation assumes jemalloc and may not be optimal with
2971	/// allocators that do not use power-of-two buckets. A borrow is performed
2972	/// if decoding UTF-8 and the input is valid UTF-8, if decoding an
2973	/// ASCII-compatible encoding and the input is ASCII-only, or when decoding
2974	/// ISO-2022-JP and the input is entirely in the ASCII state without state
2975	/// transitions.
2976	///
2977	/// # Panics
2978	///
2979	/// If the size calculation for a heap-allocated backing buffer overflows
2980	/// `usize`.
2981	///
2982	/// Available to Rust only and only with the `alloc` feature enabled (enabled
2983	/// by default).
2984	#[cfg(feature = "alloc")]
2985	#[inline]
2986	pub fn decode<'a>(&'static self, bytes: &'a [u8]) -> (Cow<'a, str>, &'static Encoding, bool) {
2987	let (encoding, without_bom) = match Encoding::for_bom(bytes) {
2988	Some((encoding, bom_length)) => (encoding, &bytes[bom_length..]),
2989	None => (self, bytes),
2990	};
2991	let (cow, had_errors) = encoding.decode_without_bom_handling(without_bom);
2992	(cow, encoding, had_errors)
2993	}
2994
2995	/// Decode complete input to `Cow<'a, str>` _with BOM removal_ and with
2996	/// malformed sequences replaced with the REPLACEMENT CHARACTER when the
2997	/// entire input is available as a single buffer (i.e. the end of the
2998	/// buffer marks the end of the stream).
2999	///
3000	/// Only an initial byte sequence that is a BOM for this encoding is removed.
3001	///
3002	/// When invoked on `UTF_8`, this method implements the (non-streaming
3003	/// version of) the
3004	/// [_UTF-8 decode_](https://encoding.spec.whatwg.org/#utf-8-decode) spec
3005	/// concept.
3006	///
3007	/// The second item in the returned pair indicates whether there were
3008	/// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
3009	///
3010	/// _Note:_ It is wrong to use this when the input buffer represents only
3011	/// a segment of the input instead of the whole input. Use
3012	/// `new_decoder_with_bom_removal()` when decoding segmented input.
3013	///
3014	/// This method performs a one or two heap allocations for the backing
3015	/// buffer of the `String` when unable to borrow. (One allocation if not
3016	/// errors and potentially another one in the presence of errors.) The
3017	/// first allocation assumes jemalloc and may not be optimal with
3018	/// allocators that do not use power-of-two buckets. A borrow is performed
3019	/// if decoding UTF-8 and the input is valid UTF-8, if decoding an
3020	/// ASCII-compatible encoding and the input is ASCII-only, or when decoding
3021	/// ISO-2022-JP and the input is entirely in the ASCII state without state
3022	/// transitions.
3023	///
3024	/// # Panics
3025	///
3026	/// If the size calculation for a heap-allocated backing buffer overflows
3027	/// `usize`.
3028	///
3029	/// Available to Rust only and only with the `alloc` feature enabled (enabled
3030	/// by default).
3031	#[cfg(feature = "alloc")]
3032	#[inline]
3033	pub fn decode_with_bom_removal<'a>(&'static self, bytes: &'a [u8]) -> (Cow<'a, str>, bool) {
3034	let without_bom = if self == UTF_8 && bytes.starts_with(b"`\xEF\xBB\xBF`") {
3035	&bytes[`3`..]
3036	} else if (self == UTF_16LE && bytes.starts_with(b"`\xFF\xFE`"))
3037	\|\| (self == UTF_16BE && bytes.starts_with(b"`\xFE\xFF`"))
3038	{
3039	&bytes[`2`..]
3040	} else {
3041	bytes
3042	};
3043	self.decode_without_bom_handling(without_bom)
3044	}
3045
3046	/// Decode complete input to `Cow<'a, str>` _without BOM handling_ and
3047	/// with malformed sequences replaced with the REPLACEMENT CHARACTER when
3048	/// the entire input is available as a single buffer (i.e. the end of the
3049	/// buffer marks the end of the stream).
3050	///
3051	/// When invoked on `UTF_8`, this method implements the (non-streaming
3052	/// version of) the
3053	/// [_UTF-8 decode without BOM_](https://encoding.spec.whatwg.org/#utf-8-decode-without-bom)
3054	/// spec concept.
3055	///
3056	/// The second item in the returned pair indicates whether there were
3057	/// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
3058	///
3059	/// _Note:_ It is wrong to use this when the input buffer represents only
3060	/// a segment of the input instead of the whole input. Use
3061	/// `new_decoder_without_bom_handling()` when decoding segmented input.
3062	///
3063	/// This method performs a one or two heap allocations for the backing
3064	/// buffer of the `String` when unable to borrow. (One allocation if not
3065	/// errors and potentially another one in the presence of errors.) The
3066	/// first allocation assumes jemalloc and may not be optimal with
3067	/// allocators that do not use power-of-two buckets. A borrow is performed
3068	/// if decoding UTF-8 and the input is valid UTF-8, if decoding an
3069	/// ASCII-compatible encoding and the input is ASCII-only, or when decoding
3070	/// ISO-2022-JP and the input is entirely in the ASCII state without state
3071	/// transitions.
3072	///
3073	/// # Panics
3074	///
3075	/// If the size calculation for a heap-allocated backing buffer overflows
3076	/// `usize`.
3077	///
3078	/// Available to Rust only and only with the `alloc` feature enabled (enabled
3079	/// by default).
3080	#[cfg(feature = "alloc")]
3081	pub fn decode_without_bom_handling<'a>(&'static self, bytes: &'a [u8]) -> (Cow<'a, str>, bool) {
3082	let (mut decoder, mut string, mut total_read) = if self.is_potentially_borrowable() {
3083	let valid_up_to = if self == UTF_8 {
3084	utf8_valid_up_to(bytes)
3085	} else if self == ISO_2022_JP {
3086	iso_2022_jp_ascii_valid_up_to(bytes)
3087	} else {
3088	ascii_valid_up_to(bytes)
3089	};
3090	if valid_up_to == bytes.len() {
3091	let str: &str = unsafe { core::str::from_utf8_unchecked(bytes) };
3092	return (Cow::Borrowed(str), `false`);
3093	}
3094	let decoder = self.new_decoder_without_bom_handling();
3095
3096	let rounded_without_replacement = checked_next_power_of_two(checked_add(
3097	valid_up_to,
3098	decoder.max_utf8_buffer_length_without_replacement(bytes.len() - valid_up_to),
3099	));
3100	let with_replacement = checked_add(
3101	valid_up_to,
3102	decoder.max_utf8_buffer_length(bytes.len() - valid_up_to),
3103	);
3104	let mut string = String::with_capacity(
3105	checked_min(rounded_without_replacement, with_replacement).unwrap(),
3106	);
3107	unsafe {
3108	let vec = string.as_mut_vec();
3109	vec.set_len(valid_up_to);
3110	core::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to);
3111	}
3112	(decoder, string, valid_up_to)
3113	} else {
3114	let decoder = self.new_decoder_without_bom_handling();
3115	let rounded_without_replacement = checked_next_power_of_two(
3116	decoder.max_utf8_buffer_length_without_replacement(bytes.len()),
3117	);
3118	let with_replacement = decoder.max_utf8_buffer_length(bytes.len());
3119	let string = String::with_capacity(
3120	checked_min(rounded_without_replacement, with_replacement).unwrap(),
3121	);
3122	(decoder, string, `0`)
3123	};
3124
3125	let mut total_had_errors = `false`;
3126	loop {
3127	let (result, read, had_errors) =
3128	decoder.decode_to_string(&bytes[total_read..], &mut string, `true`);
3129	total_read += read;
3130	total_had_errors \|= had_errors;
3131	match result {
3132	CoderResult::InputEmpty => {
3133	debug_assert_eq!(total_read, bytes.len());
3134	return (Cow::Owned(string), total_had_errors);
3135	}
3136	CoderResult::OutputFull => {
3137	// Allocate for the worst case. That is, we should come
3138	// here at most once per invocation of this method.
3139	let needed = decoder.max_utf8_buffer_length(bytes.len() - total_read);
3140	string.reserve(needed.unwrap());
3141	}
3142	}
3143	}
3144	}
3145
3146	/// Decode complete input to `Cow<'a, str>` _without BOM handling_ and
3147	/// _with malformed sequences treated as fatal_ when the entire input is
3148	/// available as a single buffer (i.e. the end of the buffer marks the end
3149	/// of the stream).
3150	///
3151	/// When invoked on `UTF_8`, this method implements the (non-streaming
3152	/// version of) the
3153	/// [_UTF-8 decode without BOM or fail_](https://encoding.spec.whatwg.org/#utf-8-decode-without-bom-or-fail)
3154	/// spec concept.
3155	///
3156	/// Returns `None` if a malformed sequence was encountered and the result
3157	/// of the decode as `Some(String)` otherwise.
3158	///
3159	/// _Note:_ It is wrong to use this when the input buffer represents only
3160	/// a segment of the input instead of the whole input. Use
3161	/// `new_decoder_without_bom_handling()` when decoding segmented input.
3162	///
3163	/// This method performs a single heap allocation for the backing
3164	/// buffer of the `String` when unable to borrow. A borrow is performed if
3165	/// decoding UTF-8 and the input is valid UTF-8, if decoding an
3166	/// ASCII-compatible encoding and the input is ASCII-only, or when decoding
3167	/// ISO-2022-JP and the input is entirely in the ASCII state without state
3168	/// transitions.
3169	///
3170	/// # Panics
3171	///
3172	/// If the size calculation for a heap-allocated backing buffer overflows
3173	/// `usize`.
3174	///
3175	/// Available to Rust only and only with the `alloc` feature enabled (enabled
3176	/// by default).
3177	#[cfg(feature = "alloc")]
3178	pub fn decode_without_bom_handling_and_without_replacement<'a>(
3179	&'static self,
3180	bytes: &'a [u8],
3181	) -> Option<Cow<'a, str>> {
3182	if self == UTF_8 {
3183	let valid_up_to = utf8_valid_up_to(bytes);
3184	if valid_up_to == bytes.len() {
3185	let str: &str = unsafe { core::str::from_utf8_unchecked(bytes) };
3186	return Some(Cow::Borrowed(str));
3187	}
3188	return None;
3189	}
3190	let (mut decoder, mut string, input) = if self.is_potentially_borrowable() {
3191	let valid_up_to = if self == ISO_2022_JP {
3192	iso_2022_jp_ascii_valid_up_to(bytes)
3193	} else {
3194	ascii_valid_up_to(bytes)
3195	};
3196	if valid_up_to == bytes.len() {
3197	let str: &str = unsafe { core::str::from_utf8_unchecked(bytes) };
3198	return Some(Cow::Borrowed(str));
3199	}
3200	let decoder = self.new_decoder_without_bom_handling();
3201	let mut string = String::with_capacity(
3202	checked_add(
3203	valid_up_to,
3204	decoder.max_utf8_buffer_length_without_replacement(bytes.len() - valid_up_to),
3205	)
3206	.unwrap(),
3207	);
3208	unsafe {
3209	let vec = string.as_mut_vec();
3210	vec.set_len(valid_up_to);
3211	core::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to);
3212	}
3213	(decoder, string, &bytes[valid_up_to..])
3214	} else {
3215	let decoder = self.new_decoder_without_bom_handling();
3216	let string = String::with_capacity(
3217	decoder
3218	.max_utf8_buffer_length_without_replacement(bytes.len())
3219	.unwrap(),
3220	);
3221	(decoder, string, bytes)
3222	};
3223	let (result, read) = decoder.decode_to_string_without_replacement(input, &mut string, `true`);
3224	match result {
3225	DecoderResult::InputEmpty => {
3226	debug_assert_eq!(read, input.len());
3227	Some(Cow::Owned(string))
3228	}
3229	DecoderResult::Malformed(_, _) => None,
3230	DecoderResult::OutputFull => unreachable!(),
3231	}
3232	}
3233
3234	/// Encode complete input to `Cow<'a, [u8]>` using the
3235	/// [_output encoding_](Encoding::output_encoding) of this encoding with
3236	/// unmappable characters replaced with decimal numeric character references
3237	/// when the entire input is available as a single buffer (i.e. the end of
3238	/// the buffer marks the end of the stream).
3239	///
3240	/// This method implements the (non-streaming version of) the
3241	/// [_encode_](https://encoding.spec.whatwg.org/#encode) spec concept. For
3242	/// the [_UTF-8 encode_](https://encoding.spec.whatwg.org/#utf-8-encode)
3243	/// spec concept, it is slightly more efficient to use
3244	/// <code><var>string</var>.as_bytes()</code> instead of invoking this
3245	/// method on `UTF_8`.
3246	///
3247	/// The second item in the returned tuple is the encoding that was actually
3248	/// used (which may differ from this encoding thanks to some encodings*
3249	/// having UTF-8 as their output encoding).*
3250	///
3251	/// The third item in the returned tuple indicates whether there were
3252	/// unmappable characters (that were replaced with HTML numeric character
3253	/// references).
3254	///
3255	/// _Note:_ It is wrong to use this when the input buffer represents only
3256	/// a segment of the input instead of the whole input. Use `new_encoder()`
3257	/// when encoding segmented output.
3258	///
3259	/// When encoding to UTF-8 or when encoding an ASCII-only input to a
3260	/// ASCII-compatible encoding, this method returns a borrow of the input
3261	/// without a heap allocation. Otherwise, this method performs a single
3262	/// heap allocation for the backing buffer of the `Vec<u8>` if there are no
3263	/// unmappable characters and potentially multiple heap allocations if
3264	/// there are. These allocations are tuned for jemalloc and may not be
3265	/// optimal when using a different allocator that doesn't use power-of-two
3266	/// buckets.
3267	///
3268	/// # Panics
3269	///
3270	/// If the size calculation for a heap-allocated backing buffer overflows
3271	/// `usize`.
3272	///
3273	/// Available to Rust only and only with the `alloc` feature enabled (enabled
3274	/// by default).
3275	#[cfg(feature = "alloc")]
3276	pub fn encode<'a>(&'static self, string: &'a str) -> (Cow<'a, [u8]>, &'static Encoding, bool) {
3277	let output_encoding = self.output_encoding();
3278	if output_encoding == UTF_8 {
3279	return (Cow::Borrowed(string.as_bytes()), output_encoding, `false`);
3280	}
3281	debug_assert!(output_encoding.is_potentially_borrowable());
3282	let bytes = string.as_bytes();
3283	let valid_up_to = if output_encoding == ISO_2022_JP {
3284	iso_2022_jp_ascii_valid_up_to(bytes)
3285	} else {
3286	ascii_valid_up_to(bytes)
3287	};
3288	if valid_up_to == bytes.len() {
3289	return (Cow::Borrowed(bytes), output_encoding, `false`);
3290	}
3291	let mut encoder = output_encoding.new_encoder();
3292	let mut vec: Vec<u8> = Vec::with_capacity(
3293	(checked_add(
3294	valid_up_to,
3295	encoder.max_buffer_length_from_utf8_if_no_unmappables(string.len() - valid_up_to),
3296	))
3297	.unwrap()
3298	.next_power_of_two(),
3299	);
3300	unsafe {
3301	vec.set_len(valid_up_to);
3302	core::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to);
3303	}
3304	let mut total_read = valid_up_to;
3305	let mut total_had_errors = `false`;
3306	loop {
3307	let (result, read, had_errors) =
3308	encoder.encode_from_utf8_to_vec(&string[total_read..], &mut vec, `true`);
3309	total_read += read;
3310	total_had_errors \|= had_errors;
3311	match result {
3312	CoderResult::InputEmpty => {
3313	debug_assert_eq!(total_read, string.len());
3314	return (Cow::Owned(vec), output_encoding, total_had_errors);
3315	}
3316	CoderResult::OutputFull => {
3317	// reserve_exact wants to know how much more on top of current
3318	// length--not current capacity.
3319	let needed = encoder
3320	.max_buffer_length_from_utf8_if_no_unmappables(string.len() - total_read);
3321	let rounded = (checked_add(vec.capacity(), needed))
3322	.unwrap()
3323	.next_power_of_two();
3324	let additional = rounded - vec.len();
3325	vec.reserve_exact(additional);
3326	}
3327	}
3328	}
3329	}
3330
3331	fn new_variant_decoder(&'static self) -> VariantDecoder {
3332	self.variant.new_variant_decoder()
3333	}
3334
3335	/// Instantiates a new decoder for this encoding with BOM sniffing enabled.
3336	///
3337	/// BOM sniffing may cause the returned decoder to morph into a decoder
3338	/// for UTF-8, UTF-16LE or UTF-16BE instead of this encoding. The BOM
3339	/// does not appear in the output.
3340	///
3341	/// Available via the C wrapper.
3342	#[inline]
3343	pub fn new_decoder(&'static self) -> Decoder {
3344	Decoder::new(self, self.new_variant_decoder(), BomHandling::Sniff)
3345	}
3346
3347	/// Instantiates a new decoder for this encoding with BOM removal.
3348	///
3349	/// If the input starts with bytes that are the BOM for this encoding,
3350	/// those bytes are removed. However, the decoder never morphs into a
3351	/// decoder for another encoding: A BOM for another encoding is treated as
3352	/// (potentially malformed) input to the decoding algorithm for this
3353	/// encoding.
3354	///
3355	/// Available via the C wrapper.
3356	#[inline]
3357	pub fn new_decoder_with_bom_removal(&'static self) -> Decoder {
3358	Decoder::new(self, self.new_variant_decoder(), BomHandling::Remove)
3359	}
3360
3361	/// Instantiates a new decoder for this encoding with BOM handling disabled.
3362	///
3363	/// If the input starts with bytes that look like a BOM, those bytes are
3364	/// not treated as a BOM. (Hence, the decoder never morphs into a decoder
3365	/// for another encoding.)
3366	///
3367	/// _Note:_ If the caller has performed BOM sniffing on its own but has not
3368	/// removed the BOM, the caller should use `new_decoder_with_bom_removal()`
3369	/// instead of this method to cause the BOM to be removed.
3370	///
3371	/// Available via the C wrapper.
3372	#[inline]
3373	pub fn new_decoder_without_bom_handling(&'static self) -> Decoder {
3374	Decoder::new(self, self.new_variant_decoder(), BomHandling::Off)
3375	}
3376
3377	/// Instantiates a new encoder for the [_output encoding_](Encoding::output_encoding)
3378	/// of this encoding.
3379	///
3380	/// _Note:_ The output encoding of UTF-16BE, UTF-16LE, and replacement is UTF-8. There
3381	/// is no encoder for UTF-16BE, UTF-16LE, and replacement themselves.
3382	///
3383	/// Available via the C wrapper.
3384	#[inline]
3385	pub fn new_encoder(&'static self) -> Encoder {
3386	let enc = self.output_encoding();
3387	enc.variant.new_encoder(enc)
3388	}
3389
3390	/// Validates UTF-8.
3391	///
3392	/// Returns the index of the first byte that makes the input malformed as
3393	/// UTF-8 or the length of the slice if the slice is entirely valid.
3394	///
3395	/// This is currently faster than the corresponding standard library
3396	/// functionality. If this implementation gets upstreamed to the standard
3397	/// library, this method may be removed in the future.
3398	///
3399	/// Available via the C wrapper.
3400	pub fn utf8_valid_up_to(bytes: &[u8]) -> usize {
3401	utf8_valid_up_to(bytes)
3402	}
3403
3404	/// Validates ASCII.
3405	///
3406	/// Returns the index of the first byte that makes the input malformed as
3407	/// ASCII or the length of the slice if the slice is entirely valid.
3408	///
3409	/// Available via the C wrapper.
3410	pub fn ascii_valid_up_to(bytes: &[u8]) -> usize {
3411	ascii_valid_up_to(bytes)
3412	}
3413
3414	/// Validates ISO-2022-JP ASCII-state data.
3415	///
3416	/// Returns the index of the first byte that makes the input not
3417	/// representable in the ASCII state of ISO-2022-JP or the length of the
3418	/// slice if the slice is entirely representable in the ASCII state of
3419	/// ISO-2022-JP.
3420	///
3421	/// Available via the C wrapper.
3422	pub fn iso_2022_jp_ascii_valid_up_to(bytes: &[u8]) -> usize {
3423	iso_2022_jp_ascii_valid_up_to(bytes)
3424	}
3425	}
3426
3427	impl PartialEq for Encoding {
3428	#[inline]
3429	fn eq(&self, other: &Encoding) -> bool {
3430	(self as *const Encoding) == (other as *const Encoding)
3431	}
3432	}
3433
3434	impl Eq for Encoding {}
3435
3436	#[cfg(test)]
3437	impl PartialOrd for Encoding {
3438	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
3439	(self as *const Encoding as usize).partial_cmp(&(other as *const Encoding as usize))
3440	}
3441	}
3442
3443	#[cfg(test)]
3444	impl Ord for Encoding {
3445	fn cmp(&self, other: &Self) -> Ordering {
3446	(self as *const Encoding as usize).cmp(&(other as *const Encoding as usize))
3447	}
3448	}
3449
3450	impl Hash for Encoding {
3451	#[inline]
3452	fn hash<H: Hasher>(&self, state: &mut H) {
3453	(self as *const Encoding).hash(state);
3454	}
3455	}
3456
3457	impl core::fmt::Debug for Encoding {
3458	#[inline]
3459	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
3460	write!(f, "Encoding `{{` {} `}}`", self.name)
3461	}
3462	}
3463
3464	#[cfg(feature = "serde")]
3465	impl Serialize for Encoding {
3466	#[inline]
3467	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
3468	where
3469	S: Serializer,
3470	{
3471	serializer.serialize_str(self.name)
3472	}
3473	}
3474
3475	#[cfg(feature = "serde")]
3476	struct EncodingVisitor;
3477
3478	#[cfg(feature = "serde")]
3479	impl<'de> Visitor<'de> for EncodingVisitor {
3480	type Value = &'static Encoding;
3481
3482	fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
3483	formatter.write_str("a valid encoding label")
3484	}
3485
3486	fn visit_str<E>(self, value: &str) -> Result<&'static Encoding, E>
3487	where
3488	E: serde::de::Error,
3489	{
3490	if let Some(enc) = Encoding::for_label(value.as_bytes()) {
3491	Ok(enc)
3492	} else {
3493	Err(E::custom(alloc::format!(
3494	"invalid encoding label: {}",
3495	value
3496	)))
3497	}
3498	}
3499	}
3500
3501	#[cfg(feature = "serde")]
3502	impl<'de> Deserialize<'de> for &'static Encoding {
3503	fn deserialize<D>(deserializer: D) -> Result<&'static Encoding, D::Error>
3504	where
3505	D: Deserializer<'de>,
3506	{
3507	deserializer.deserialize_str(EncodingVisitor)
3508	}
3509	}
3510
3511	/// Tracks the life cycle of a decoder from BOM sniffing to conversion to end.
3512	#[derive(PartialEq, Debug, Copy, Clone)]
3513	enum DecoderLifeCycle {
3514	/// The decoder has seen no input yet.
3515	AtStart,
3516	/// The decoder has seen no input yet but expects UTF-8.
3517	AtUtf8Start,
3518	/// The decoder has seen no input yet but expects UTF-16BE.
3519	AtUtf16BeStart,
3520	/// The decoder has seen no input yet but expects UTF-16LE.
3521	AtUtf16LeStart,
3522	/// The decoder has seen EF.
3523	SeenUtf8First,
3524	/// The decoder has seen EF, BB.
3525	SeenUtf8Second,
3526	/// The decoder has seen FE.
3527	SeenUtf16BeFirst,
3528	/// The decoder has seen FF.
3529	SeenUtf16LeFirst,
3530	/// Saw EF, BB but not BF, there was a buffer boundary after BB and the
3531	/// underlying decoder reported EF as an error, so we need to remember to
3532	/// push BB before the next buffer.
3533	ConvertingWithPendingBB,
3534	/// No longer looking for a BOM and EOF not yet seen.
3535	Converting,
3536	/// EOF has been seen.
3537	Finished,
3538	}
3539
3540	/// Communicate the BOM handling mode.
3541	#[derive(Debug, Copy, Clone)]
3542	enum BomHandling {
3543	/// Don't handle the BOM
3544	Off,
3545	/// Sniff for UTF-8, UTF-16BE or UTF-16LE BOM
3546	Sniff,
3547	/// Remove the BOM only if it's the BOM for this encoding
3548	Remove,
3549	}
3550
3551	/// Result of a (potentially partial) decode or encode operation with
3552	/// replacement.
3553	#[must_use]
3554	#[derive(Debug, PartialEq, Eq)]
3555	pub enum CoderResult {
3556	/// The input was exhausted.
3557	///
3558	/// If this result was returned from a call where `last` was `true`, the
3559	/// conversion process has completed. Otherwise, the caller should call a
3560	/// decode or encode method again with more input.
3561	InputEmpty,
3562
3563	/// The converter cannot produce another unit of output, because the output
3564	/// buffer does not have enough space left.
3565	///
3566	/// The caller must provide more output space upon the next call and re-push
3567	/// the remaining input to the converter.
3568	OutputFull,
3569	}
3570
3571	/// Result of a (potentially partial) decode operation without replacement.
3572	#[must_use]
3573	#[derive(Debug, PartialEq, Eq)]
3574	pub enum DecoderResult {
3575	/// The input was exhausted.
3576	///
3577	/// If this result was returned from a call where `last` was `true`, the
3578	/// decoding process has completed. Otherwise, the caller should call a
3579	/// decode method again with more input.
3580	InputEmpty,
3581
3582	/// The decoder cannot produce another unit of output, because the output
3583	/// buffer does not have enough space left.
3584	///
3585	/// The caller must provide more output space upon the next call and re-push
3586	/// the remaining input to the decoder.
3587	OutputFull,
3588
3589	/// The decoder encountered a malformed byte sequence.
3590	///
3591	/// The caller must either treat this as a fatal error or must append one
3592	/// REPLACEMENT CHARACTER (U+FFFD) to the output and then re-push the
3593	/// the remaining input to the decoder.
3594	///
3595	/// The first wrapped integer indicates the length of the malformed byte
3596	/// sequence. The second wrapped integer indicates the number of bytes
3597	/// that were consumed after the malformed sequence. If the second
3598	/// integer is zero, the last byte that was consumed is the last byte of
3599	/// the malformed sequence. Note that the malformed bytes may have been part
3600	/// of an earlier input buffer.
3601	///
3602	/// The first wrapped integer can have values 1, 2, 3 or 4. The second
3603	/// wrapped integer can have values 0, 1, 2 or 3. The worst-case sum
3604	/// of the two is 6, which happens with ISO-2022-JP.
3605	Malformed(u8, u8), // u8 instead of usize to avoid useless bloat
3606	}
3607
3608	/// A converter that decodes a byte stream into Unicode according to a
3609	/// character encoding in a streaming (incremental) manner.
3610	///
3611	/// The various `decode_` methods take an input buffer (`src`) and an output*
3612	/// buffer `dst` both of which are caller-allocated. There are variants for
3613	/// both UTF-8 and UTF-16 output buffers.
3614	///
3615	/// A `decode_` method decodes bytes from `src` into Unicode characters stored*
3616	/// into `dst` until one of the following three things happens:
3617	///
3618	/// 1. A malformed byte sequence is encountered (`_without_replacement`*
3619	/// variants only).
3620	///
3621	/// 2. The output buffer has been filled so near capacity that the decoder
3622	/// cannot be sure that processing an additional byte of input wouldn't
3623	/// cause so much output that the output buffer would overflow.
3624	///
3625	/// 3. All the input bytes have been processed.
3626	///
3627	/// The `decode_` method then returns tuple of a status indicating which one*
3628	/// of the three reasons to return happened, how many input bytes were read,
3629	/// how many output code units (`u8` when decoding into UTF-8 and `u16`
3630	/// when decoding to UTF-16) were written (except when decoding into `String`,
3631	/// whose length change indicates this), and in the case of the
3632	/// variants performing replacement, a boolean indicating whether an error was
3633	/// replaced with the REPLACEMENT CHARACTER during the call.
3634	///
3635	/// The number of bytes "written" is what's logically written. Garbage may be
3636	/// written in the output buffer beyond the point logically written to.
3637	/// Therefore, if you wish to decode into an `&mut str`, you should use the
3638	/// methods that take an `&mut str` argument instead of the ones that take an
3639	/// `&mut [u8]` argument. The former take care of overwriting the trailing
3640	/// garbage to ensure the UTF-8 validity of the `&mut str` as a whole, but the
3641	/// latter don't.
3642	///
3643	/// In the case of the `_without_replacement` variants, the status is a*
3644	/// [`DecoderResult`][1] enumeration (possibilities `Malformed`, `OutputFull` and
3645	/// `InputEmpty` corresponding to the three cases listed above).
3646	///
3647	/// In the case of methods whose name does not end with
3648	/// `_without_replacement`, malformed sequences are automatically replaced*
3649	/// with the REPLACEMENT CHARACTER and errors do not cause the methods to
3650	/// return early.
3651	///
3652	/// When decoding to UTF-8, the output buffer must have at least 4 bytes of
3653	/// space. When decoding to UTF-16, the output buffer must have at least two
3654	/// UTF-16 code units (`u16`) of space.
3655	///
3656	/// When decoding to UTF-8 without replacement, the methods are guaranteed
3657	/// not to return indicating that more output space is needed if the length
3658	/// of the output buffer is at least the length returned by
3659	/// [`max_utf8_buffer_length_without_replacement()`][2]. When decoding to UTF-8
3660	/// with replacement, the length of the output buffer that guarantees the
3661	/// methods not to return indicating that more output space is needed is given
3662	/// by [`max_utf8_buffer_length()`][3]. When decoding to UTF-16 with
3663	/// or without replacement, the length of the output buffer that guarantees
3664	/// the methods not to return indicating that more output space is needed is
3665	/// given by [`max_utf16_buffer_length()`][4].
3666	///
3667	/// The output written into `dst` is guaranteed to be valid UTF-8 or UTF-16,
3668	/// and the output after each `decode_` call is guaranteed to consist of*
3669	/// complete characters. (I.e. the code unit sequence for the last character is
3670	/// guaranteed not to be split across output buffers.)
3671	///
3672	/// The boolean argument `last` indicates that the end of the stream is reached
3673	/// when all the bytes in `src` have been consumed.
3674	///
3675	/// A `Decoder` object can be used to incrementally decode a byte stream.
3676	///
3677	/// During the processing of a single stream, the caller must call `decode_`*
3678	/// zero or more times with `last` set to `false` and then call `decode_` at*
3679	/// least once with `last` set to `true`. If `decode_` returns `InputEmpty`,*
3680	/// the processing of the stream has ended. Otherwise, the caller must call
3681	/// `decode_` again with `last` set to `true` (or treat a `Malformed` result as*
3682	/// a fatal error).
3683	///
3684	/// Once the stream has ended, the `Decoder` object must not be used anymore.
3685	/// That is, you need to create another one to process another stream.
3686	///
3687	/// When the decoder returns `OutputFull` or the decoder returns `Malformed` and
3688	/// the caller does not wish to treat it as a fatal error, the input buffer
3689	/// `src` may not have been completely consumed. In that case, the caller must
3690	/// pass the unconsumed contents of `src` to `decode_` again upon the next*
3691	/// call.
3692	///
3693	/// [1]: enum.DecoderResult.html
3694	/// [2]: #method.max_utf8_buffer_length_without_replacement
3695	/// [3]: #method.max_utf8_buffer_length
3696	/// [4]: #method.max_utf16_buffer_length
3697	///
3698	/// # Infinite loops
3699	///
3700	/// When converting with a fixed-size output buffer whose size is too small to
3701	/// accommodate one character or (when applicable) one numeric character
3702	/// reference of output, an infinite loop ensues. When converting with a
3703	/// fixed-size output buffer, it generally makes sense to make the buffer
3704	/// fairly large (e.g. couple of kilobytes).
3705	pub struct Decoder {
3706	encoding: &'static Encoding,
3707	variant: VariantDecoder,
3708	life_cycle: DecoderLifeCycle,
3709	}
3710
3711	impl Decoder {
3712	fn new(enc: &'static Encoding, decoder: VariantDecoder, sniffing: BomHandling) -> Decoder {
3713	Decoder {
3714	encoding: enc,
3715	variant: decoder,
3716	life_cycle: match sniffing {
3717	BomHandling::Off => DecoderLifeCycle::Converting,
3718	BomHandling::Sniff => DecoderLifeCycle::AtStart,
3719	BomHandling::Remove => {
3720	if enc == UTF_8 {
3721	DecoderLifeCycle::AtUtf8Start
3722	} else if enc == UTF_16BE {
3723	DecoderLifeCycle::AtUtf16BeStart
3724	} else if enc == UTF_16LE {
3725	DecoderLifeCycle::AtUtf16LeStart
3726	} else {
3727	DecoderLifeCycle::Converting
3728	}
3729	}
3730	},
3731	}
3732	}
3733
3734	/// The `Encoding` this `Decoder` is for.
3735	///
3736	/// BOM sniffing can change the return value of this method during the life
3737	/// of the decoder.
3738	///
3739	/// Available via the C wrapper.
3740	#[inline]
3741	pub fn encoding(&self) -> &'static Encoding {
3742	self.encoding
3743	}
3744
3745	/// Query the worst-case UTF-8 output size _with replacement_.
3746	///
3747	/// Returns the size of the output buffer in UTF-8 code units (`u8`)
3748	/// that will not overflow given the current state of the decoder and
3749	/// `byte_length` number of additional input bytes when decoding with
3750	/// errors handled by outputting a REPLACEMENT CHARACTER for each malformed
3751	/// sequence or `None` if `usize` would overflow.
3752	///
3753	/// Available via the C wrapper.
3754	pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
3755	// Need to consider a) the decoder morphing due to the BOM and b) a partial
3756	// BOM getting pushed to the underlying decoder.
3757	match self.life_cycle {
3758	DecoderLifeCycle::Converting
3759	\| DecoderLifeCycle::AtUtf8Start
3760	\| DecoderLifeCycle::AtUtf16LeStart
3761	\| DecoderLifeCycle::AtUtf16BeStart => {
3762	return self.variant.max_utf8_buffer_length(byte_length);
3763	}
3764	DecoderLifeCycle::AtStart => {
3765	if let Some(utf8_bom) = checked_add(`3`, byte_length.checked_mul(`3`)) {
3766	if let Some(utf16_bom) = checked_add(
3767	`1`,
3768	checked_mul(`3`, checked_div(byte_length.checked_add(`1`), `2`)),
3769	) {
3770	let utf_bom = core::cmp::max(utf8_bom, utf16_bom);
3771	let encoding = self.encoding();
3772	if encoding == UTF_8 \|\| encoding == UTF_16LE \|\| encoding == UTF_16BE {
3773	// No need to consider the internal state of the underlying decoder,
3774	// because it is at start, because no data has reached it yet.
3775	return Some(utf_bom);
3776	} else if let Some(non_bom) =
3777	self.variant.max_utf8_buffer_length(byte_length)
3778	{
3779	return Some(core::cmp::max(utf_bom, non_bom));
3780	}
3781	}
3782	}
3783	}
3784	DecoderLifeCycle::SeenUtf8First \| DecoderLifeCycle::SeenUtf8Second => {
3785	// Add two bytes even when only one byte has been seen,
3786	// because the one byte can become a lead byte in multibyte
3787	// decoders, but only after the decoder has been queried
3788	// for max length, so the decoder's own logic for adding
3789	// one for a pending lead cannot work.
3790	if let Some(sum) = byte_length.checked_add(`2`) {
3791	if let Some(utf8_bom) = checked_add(`3`, sum.checked_mul(`3`)) {
3792	if self.encoding() == UTF_8 {
3793	// No need to consider the internal state of the underlying decoder,
3794	// because it is at start, because no data has reached it yet.
3795	return Some(utf8_bom);
3796	} else if let Some(non_bom) = self.variant.max_utf8_buffer_length(sum) {
3797	return Some(core::cmp::max(utf8_bom, non_bom));
3798	}
3799	}
3800	}
3801	}
3802	DecoderLifeCycle::ConvertingWithPendingBB => {
3803	if let Some(sum) = byte_length.checked_add(`2`) {
3804	return self.variant.max_utf8_buffer_length(sum);
3805	}
3806	}
3807	DecoderLifeCycle::SeenUtf16LeFirst \| DecoderLifeCycle::SeenUtf16BeFirst => {
3808	// Add two bytes even when only one byte has been seen,
3809	// because the one byte can become a lead byte in multibyte
3810	// decoders, but only after the decoder has been queried
3811	// for max length, so the decoder's own logic for adding
3812	// one for a pending lead cannot work.
3813	if let Some(sum) = byte_length.checked_add(`2`) {
3814	if let Some(utf16_bom) =
3815	checked_add(`1`, checked_mul(`3`, checked_div(sum.checked_add(`1`), `2`)))
3816	{
3817	let encoding = self.encoding();
3818	if encoding == UTF_16LE \|\| encoding == UTF_16BE {
3819	// No need to consider the internal state of the underlying decoder,
3820	// because it is at start, because no data has reached it yet.
3821	return Some(utf16_bom);
3822	} else if let Some(non_bom) = self.variant.max_utf8_buffer_length(sum) {
3823	return Some(core::cmp::max(utf16_bom, non_bom));
3824	}
3825	}
3826	}
3827	}
3828	DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),
3829	}
3830	None
3831	}
3832
3833	/// Query the worst-case UTF-8 output size _without replacement_.
3834	///
3835	/// Returns the size of the output buffer in UTF-8 code units (`u8`)
3836	/// that will not overflow given the current state of the decoder and
3837	/// `byte_length` number of additional input bytes when decoding without
3838	/// replacement error handling or `None` if `usize` would overflow.
3839	///
3840	/// Note that this value may be too small for the `_with_replacement` case.
3841	/// Use `max_utf8_buffer_length()` for that case.
3842	///
3843	/// Available via the C wrapper.
3844	pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
3845	// Need to consider a) the decoder morphing due to the BOM and b) a partial
3846	// BOM getting pushed to the underlying decoder.
3847	match self.life_cycle {
3848	DecoderLifeCycle::Converting
3849	\| DecoderLifeCycle::AtUtf8Start
3850	\| DecoderLifeCycle::AtUtf16LeStart
3851	\| DecoderLifeCycle::AtUtf16BeStart => {
3852	return self
3853	.variant
3854	.max_utf8_buffer_length_without_replacement(byte_length);
3855	}
3856	DecoderLifeCycle::AtStart => {
3857	if let Some(utf8_bom) = byte_length.checked_add(`3`) {
3858	if let Some(utf16_bom) = checked_add(
3859	`1`,
3860	checked_mul(`3`, checked_div(byte_length.checked_add(`1`), `2`)),
3861	) {
3862	let utf_bom = core::cmp::max(utf8_bom, utf16_bom);
3863	let encoding = self.encoding();
3864	if encoding == UTF_8 \|\| encoding == UTF_16LE \|\| encoding == UTF_16BE {
3865	// No need to consider the internal state of the underlying decoder,
3866	// because it is at start, because no data has reached it yet.
3867	return Some(utf_bom);
3868	} else if let Some(non_bom) = self
3869	.variant
3870	.max_utf8_buffer_length_without_replacement(byte_length)
3871	{
3872	return Some(core::cmp::max(utf_bom, non_bom));
3873	}
3874	}
3875	}
3876	}
3877	DecoderLifeCycle::SeenUtf8First \| DecoderLifeCycle::SeenUtf8Second => {
3878	// Add two bytes even when only one byte has been seen,
3879	// because the one byte can become a lead byte in multibyte
3880	// decoders, but only after the decoder has been queried
3881	// for max length, so the decoder's own logic for adding
3882	// one for a pending lead cannot work.
3883	if let Some(sum) = byte_length.checked_add(`2`) {
3884	if let Some(utf8_bom) = sum.checked_add(`3`) {
3885	if self.encoding() == UTF_8 {
3886	// No need to consider the internal state of the underlying decoder,
3887	// because it is at start, because no data has reached it yet.
3888	return Some(utf8_bom);
3889	} else if let Some(non_bom) =
3890	self.variant.max_utf8_buffer_length_without_replacement(sum)
3891	{
3892	return Some(core::cmp::max(utf8_bom, non_bom));
3893	}
3894	}
3895	}
3896	}
3897	DecoderLifeCycle::ConvertingWithPendingBB => {
3898	if let Some(sum) = byte_length.checked_add(`2`) {
3899	return self.variant.max_utf8_buffer_length_without_replacement(sum);
3900	}
3901	}
3902	DecoderLifeCycle::SeenUtf16LeFirst \| DecoderLifeCycle::SeenUtf16BeFirst => {
3903	// Add two bytes even when only one byte has been seen,
3904	// because the one byte can become a lead byte in multibyte
3905	// decoders, but only after the decoder has been queried
3906	// for max length, so the decoder's own logic for adding
3907	// one for a pending lead cannot work.
3908	if let Some(sum) = byte_length.checked_add(`2`) {
3909	if let Some(utf16_bom) =
3910	checked_add(`1`, checked_mul(`3`, checked_div(sum.checked_add(`1`), `2`)))
3911	{
3912	let encoding = self.encoding();
3913	if encoding == UTF_16LE \|\| encoding == UTF_16BE {
3914	// No need to consider the internal state of the underlying decoder,
3915	// because it is at start, because no data has reached it yet.
3916	return Some(utf16_bom);
3917	} else if let Some(non_bom) =
3918	self.variant.max_utf8_buffer_length_without_replacement(sum)
3919	{
3920	return Some(core::cmp::max(utf16_bom, non_bom));
3921	}
3922	}
3923	}
3924	}
3925	DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),
3926	}
3927	None
3928	}
3929
3930	/// Incrementally decode a byte stream into UTF-8 with malformed sequences
3931	/// replaced with the REPLACEMENT CHARACTER.
3932	///
3933	/// See the documentation of the struct for documentation for `decode_`*
3934	/// methods collectively.
3935	///
3936	/// Available via the C wrapper.
3937	pub fn decode_to_utf8(
3938	&mut self,
3939	src: &[u8],
3940	dst: &mut [u8],
3941	last: bool,
3942	) -> (CoderResult, usize, usize, bool) {
3943	let mut had_errors = `false`;
3944	let mut total_read = `0usize`;
3945	let mut total_written = `0usize`;
3946	loop {
3947	let (result, read, written) = self.decode_to_utf8_without_replacement(
3948	&src[total_read..],
3949	&mut dst[total_written..],
3950	last,
3951	);
3952	total_read += read;
3953	total_written += written;
3954	match result {
3955	DecoderResult::InputEmpty => {
3956	return (
3957	CoderResult::InputEmpty,
3958	total_read,
3959	total_written,
3960	had_errors,
3961	);
3962	}
3963	DecoderResult::OutputFull => {
3964	return (
3965	CoderResult::OutputFull,
3966	total_read,
3967	total_written,
3968	had_errors,
3969	);
3970	}
3971	DecoderResult::Malformed(_, _) => {
3972	had_errors = `true`;
3973	// There should always be space for the U+FFFD, because
3974	// otherwise we'd have gotten OutputFull already.
3975	// XXX: is the above comment actually true for UTF-8 itself?
3976	// TODO: Consider having fewer bound checks here.
3977	dst[total_written] = `0xEFu8`;
3978	total_written += `1`;
3979	dst[total_written] = `0xBFu8`;
3980	total_written += `1`;
3981	dst[total_written] = `0xBDu8`;
3982	total_written += `1`;
3983	}
3984	}
3985	}
3986	}
3987
3988	/// Incrementally decode a byte stream into UTF-8 with malformed sequences
3989	/// replaced with the REPLACEMENT CHARACTER with type system signaling
3990	/// of UTF-8 validity.
3991	///
3992	/// This methods calls `decode_to_utf8` and then zeroes
3993	/// out up to three bytes that aren't logically part of the write in order
3994	/// to retain the UTF-8 validity even for the unwritten part of the buffer.
3995	///
3996	/// See the documentation of the struct for documentation for `decode_`*
3997	/// methods collectively.
3998	///
3999	/// Available to Rust only.
4000	pub fn decode_to_str(
4001	&mut self,
4002	src: &[u8],
4003	dst: &mut str,
4004	last: bool,
4005	) -> (CoderResult, usize, usize, bool) {
4006	let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
4007	let (result, read, written, replaced) = self.decode_to_utf8(src, bytes, last);
4008	let len = bytes.len();
4009	let mut trail = written;
4010	// Non-UTF-8 ASCII-compatible decoders may write up to `MAX_STRIDE_SIZE`
4011	// bytes of trailing garbage. No need to optimize non-ASCII-compatible
4012	// encodings to avoid overwriting here.
4013	if self.encoding != UTF_8 {
4014	let max = core::cmp::min(len, trail + ascii::MAX_STRIDE_SIZE);
4015	while trail < max {
4016	bytes[trail] = `0`;
4017	trail += `1`;
4018	}
4019	}
4020	while trail < len && ((bytes[trail] & `0xC0`) == `0x80`) {
4021	bytes[trail] = `0`;
4022	trail += `1`;
4023	}
4024	(result, read, written, replaced)
4025	}
4026
4027	/// Incrementally decode a byte stream into UTF-8 with malformed sequences
4028	/// replaced with the REPLACEMENT CHARACTER using a `String` receiver.
4029	///
4030	/// Like the others, this method follows the logic that the output buffer is
4031	/// caller-allocated. This method treats the capacity of the `String` as
4032	/// the output limit. That is, this method guarantees not to cause a
4033	/// reallocation of the backing buffer of `String`.
4034	///
4035	/// The return value is a tuple that contains the `DecoderResult`, the
4036	/// number of bytes read and a boolean indicating whether replacements
4037	/// were done. The number of bytes written is signaled via the length of
4038	/// the `String` changing.
4039	///
4040	/// See the documentation of the struct for documentation for `decode_`*
4041	/// methods collectively.
4042	///
4043	/// Available to Rust only and only with the `alloc` feature enabled (enabled
4044	/// by default).
4045	#[cfg(feature = "alloc")]
4046	pub fn decode_to_string(
4047	&mut self,
4048	src: &[u8],
4049	dst: &mut String,
4050	last: bool,
4051	) -> (CoderResult, usize, bool) {
4052	unsafe {
4053	let vec = dst.as_mut_vec();
4054	let old_len = vec.len();
4055	let capacity = vec.capacity();
4056	vec.set_len(capacity);
4057	let (result, read, written, replaced) =
4058	self.decode_to_utf8(src, &mut vec[old_len..], last);
4059	vec.set_len(old_len + written);
4060	(result, read, replaced)
4061	}
4062	}
4063
4064	public_decode_function!(/// Incrementally decode a byte stream into UTF-8
4065	/// _without replacement_.
4066	///
4067	/// See the documentation of the struct for
4068	/// documentation for `decode_` methods*
4069	/// collectively.
4070	///
4071	/// Available via the C wrapper.
4072	,
4073	decode_to_utf8_without_replacement,
4074	decode_to_utf8_raw,
4075	decode_to_utf8_checking_end,
4076	decode_to_utf8_after_one_potential_bom_byte,
4077	decode_to_utf8_after_two_potential_bom_bytes,
4078	decode_to_utf8_checking_end_with_offset,
4079	u8);
4080
4081	/// Incrementally decode a byte stream into UTF-8 with type system signaling
4082	/// of UTF-8 validity.
4083	///
4084	/// This methods calls `decode_to_utf8` and then zeroes out up to three
4085	/// bytes that aren't logically part of the write in order to retain the
4086	/// UTF-8 validity even for the unwritten part of the buffer.
4087	///
4088	/// See the documentation of the struct for documentation for `decode_`*
4089	/// methods collectively.
4090	///
4091	/// Available to Rust only.
4092	pub fn decode_to_str_without_replacement(
4093	&mut self,
4094	src: &[u8],
4095	dst: &mut str,
4096	last: bool,
4097	) -> (DecoderResult, usize, usize) {
4098	let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
4099	let (result, read, written) = self.decode_to_utf8_without_replacement(src, bytes, last);
4100	let len = bytes.len();
4101	let mut trail = written;
4102	// Non-UTF-8 ASCII-compatible decoders may write up to `MAX_STRIDE_SIZE`
4103	// bytes of trailing garbage. No need to optimize non-ASCII-compatible
4104	// encodings to avoid overwriting here.
4105	if self.encoding != UTF_8 {
4106	let max = core::cmp::min(len, trail + ascii::MAX_STRIDE_SIZE);
4107	while trail < max {
4108	bytes[trail] = `0`;
4109	trail += `1`;
4110	}
4111	}
4112	while trail < len && ((bytes[trail] & `0xC0`) == `0x80`) {
4113	bytes[trail] = `0`;
4114	trail += `1`;
4115	}
4116	(result, read, written)
4117	}
4118
4119	/// Incrementally decode a byte stream into UTF-8 using a `String` receiver.
4120	///
4121	/// Like the others, this method follows the logic that the output buffer is
4122	/// caller-allocated. This method treats the capacity of the `String` as
4123	/// the output limit. That is, this method guarantees not to cause a
4124	/// reallocation of the backing buffer of `String`.
4125	///
4126	/// The return value is a pair that contains the `DecoderResult` and the
4127	/// number of bytes read. The number of bytes written is signaled via
4128	/// the length of the `String` changing.
4129	///
4130	/// See the documentation of the struct for documentation for `decode_`*
4131	/// methods collectively.
4132	///
4133	/// Available to Rust only and only with the `alloc` feature enabled (enabled
4134	/// by default).
4135	#[cfg(feature = "alloc")]
4136	pub fn decode_to_string_without_replacement(
4137	&mut self,
4138	src: &[u8],
4139	dst: &mut String,
4140	last: bool,
4141	) -> (DecoderResult, usize) {
4142	unsafe {
4143	let vec = dst.as_mut_vec();
4144	let old_len = vec.len();
4145	let capacity = vec.capacity();
4146	vec.set_len(capacity);
4147	let (result, read, written) =
4148	self.decode_to_utf8_without_replacement(src, &mut vec[old_len..], last);
4149	vec.set_len(old_len + written);
4150	(result, read)
4151	}
4152	}
4153
4154	/// Query the worst-case UTF-16 output size (with or without replacement).
4155	///
4156	/// Returns the size of the output buffer in UTF-16 code units (`u16`)
4157	/// that will not overflow given the current state of the decoder and
4158	/// `byte_length` number of additional input bytes or `None` if `usize`
4159	/// would overflow.
4160	///
4161	/// Since the REPLACEMENT CHARACTER fits into one UTF-16 code unit, the
4162	/// return value of this method applies also in the
4163	/// `_without_replacement` case.
4164	///
4165	/// Available via the C wrapper.
4166	pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
4167	// Need to consider a) the decoder morphing due to the BOM and b) a partial
4168	// BOM getting pushed to the underlying decoder.
4169	match self.life_cycle {
4170	DecoderLifeCycle::Converting
4171	\| DecoderLifeCycle::AtUtf8Start
4172	\| DecoderLifeCycle::AtUtf16LeStart
4173	\| DecoderLifeCycle::AtUtf16BeStart => {
4174	return self.variant.max_utf16_buffer_length(byte_length);
4175	}
4176	DecoderLifeCycle::AtStart => {
4177	if let Some(utf8_bom) = byte_length.checked_add(`1`) {
4178	if let Some(utf16_bom) =
4179	checked_add(`1`, checked_div(byte_length.checked_add(`1`), `2`))
4180	{
4181	let utf_bom = core::cmp::max(utf8_bom, utf16_bom);
4182	let encoding = self.encoding();
4183	if encoding == UTF_8 \|\| encoding == UTF_16LE \|\| encoding == UTF_16BE {
4184	// No need to consider the internal state of the underlying decoder,
4185	// because it is at start, because no data has reached it yet.
4186	return Some(utf_bom);
4187	} else if let Some(non_bom) =
4188	self.variant.max_utf16_buffer_length(byte_length)
4189	{
4190	return Some(core::cmp::max(utf_bom, non_bom));
4191	}
4192	}
4193	}
4194	}
4195	DecoderLifeCycle::SeenUtf8First \| DecoderLifeCycle::SeenUtf8Second => {
4196	// Add two bytes even when only one byte has been seen,
4197	// because the one byte can become a lead byte in multibyte
4198	// decoders, but only after the decoder has been queried
4199	// for max length, so the decoder's own logic for adding
4200	// one for a pending lead cannot work.
4201	if let Some(sum) = byte_length.checked_add(`2`) {
4202	if let Some(utf8_bom) = sum.checked_add(`1`) {
4203	if self.encoding() == UTF_8 {
4204	// No need to consider the internal state of the underlying decoder,
4205	// because it is at start, because no data has reached it yet.
4206	return Some(utf8_bom);
4207	} else if let Some(non_bom) = self.variant.max_utf16_buffer_length(sum) {
4208	return Some(core::cmp::max(utf8_bom, non_bom));
4209	}
4210	}
4211	}
4212	}
4213	DecoderLifeCycle::ConvertingWithPendingBB => {
4214	if let Some(sum) = byte_length.checked_add(`2`) {
4215	return self.variant.max_utf16_buffer_length(sum);
4216	}
4217	}
4218	DecoderLifeCycle::SeenUtf16LeFirst \| DecoderLifeCycle::SeenUtf16BeFirst => {
4219	// Add two bytes even when only one byte has been seen,
4220	// because the one byte can become a lead byte in multibyte
4221	// decoders, but only after the decoder has been queried
4222	// for max length, so the decoder's own logic for adding
4223	// one for a pending lead cannot work.
4224	if let Some(sum) = byte_length.checked_add(`2`) {
4225	if let Some(utf16_bom) = checked_add(`1`, checked_div(sum.checked_add(`1`), `2`)) {
4226	let encoding = self.encoding();
4227	if encoding == UTF_16LE \|\| encoding == UTF_16BE {
4228	// No need to consider the internal state of the underlying decoder,
4229	// because it is at start, because no data has reached it yet.
4230	return Some(utf16_bom);
4231	} else if let Some(non_bom) = self.variant.max_utf16_buffer_length(sum) {
4232	return Some(core::cmp::max(utf16_bom, non_bom));
4233	}
4234	}
4235	}
4236	}
4237	DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),
4238	}
4239	None
4240	}
4241
4242	/// Incrementally decode a byte stream into UTF-16 with malformed sequences
4243	/// replaced with the REPLACEMENT CHARACTER.
4244	///
4245	/// See the documentation of the struct for documentation for `decode_`*
4246	/// methods collectively.
4247	///
4248	/// Available via the C wrapper.
4249	pub fn decode_to_utf16(
4250	&mut self,
4251	src: &[u8],
4252	dst: &mut [u16],
4253	last: bool,
4254	) -> (CoderResult, usize, usize, bool) {
4255	let mut had_errors = `false`;
4256	let mut total_read = `0usize`;
4257	let mut total_written = `0usize`;
4258	loop {
4259	let (result, read, written) = self.decode_to_utf16_without_replacement(
4260	&src[total_read..],
4261	&mut dst[total_written..],
4262	last,
4263	);
4264	total_read += read;
4265	total_written += written;
4266	match result {
4267	DecoderResult::InputEmpty => {
4268	return (
4269	CoderResult::InputEmpty,
4270	total_read,
4271	total_written,
4272	had_errors,
4273	);
4274	}
4275	DecoderResult::OutputFull => {
4276	return (
4277	CoderResult::OutputFull,
4278	total_read,
4279	total_written,
4280	had_errors,
4281	);
4282	}
4283	DecoderResult::Malformed(_, _) => {
4284	had_errors = `true`;
4285	// There should always be space for the U+FFFD, because
4286	// otherwise we'd have gotten OutputFull already.
4287	dst[total_written] = `0xFFFD`;
4288	total_written += `1`;
4289	}
4290	}
4291	}
4292	}
4293
4294	public_decode_function!(/// Incrementally decode a byte stream into UTF-16
4295	/// _without replacement_.
4296	///
4297	/// See the documentation of the struct for
4298	/// documentation for `decode_` methods*
4299	/// collectively.
4300	///
4301	/// Available via the C wrapper.
4302	,
4303	decode_to_utf16_without_replacement,
4304	decode_to_utf16_raw,
4305	decode_to_utf16_checking_end,
4306	decode_to_utf16_after_one_potential_bom_byte,
4307	decode_to_utf16_after_two_potential_bom_bytes,
4308	decode_to_utf16_checking_end_with_offset,
4309	u16);
4310
4311	/// Checks for compatibility with storing Unicode scalar values as unsigned
4312	/// bytes taking into account the state of the decoder.
4313	///
4314	/// Returns `None` if the decoder is not in a neutral state, including waiting
4315	/// for the BOM, or if the encoding is never Latin1-byte-compatible.
4316	///
4317	/// Otherwise returns the index of the first byte whose unsigned value doesn't
4318	/// directly correspond to the decoded Unicode scalar value, or the length
4319	/// of the input if all bytes in the input decode directly to scalar values
4320	/// corresponding to the unsigned byte values.
4321	///
4322	/// Does not change the state of the decoder.
4323	///
4324	/// Do not use this unless you are supporting SpiderMonkey/V8-style string
4325	/// storage optimizations.
4326	///
4327	/// Available via the C wrapper.
4328	pub fn latin1_byte_compatible_up_to(&self, bytes: &[u8]) -> Option<usize> {
4329	match self.life_cycle {
4330	DecoderLifeCycle::Converting => {
4331	return self.variant.latin1_byte_compatible_up_to(bytes);
4332	}
4333	DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),
4334	_ => None,
4335	}
4336	}
4337	}
4338
4339	/// Result of a (potentially partial) encode operation without replacement.
4340	#[must_use]
4341	#[derive(Debug, PartialEq, Eq)]
4342	pub enum EncoderResult {
4343	/// The input was exhausted.
4344	///
4345	/// If this result was returned from a call where `last` was `true`, the
4346	/// decoding process has completed. Otherwise, the caller should call a
4347	/// decode method again with more input.
4348	InputEmpty,
4349
4350	/// The encoder cannot produce another unit of output, because the output
4351	/// buffer does not have enough space left.
4352	///
4353	/// The caller must provide more output space upon the next call and re-push
4354	/// the remaining input to the decoder.
4355	OutputFull,
4356
4357	/// The encoder encountered an unmappable character.
4358	///
4359	/// The caller must either treat this as a fatal error or must append
4360	/// a placeholder to the output and then re-push the remaining input to the
4361	/// encoder.
4362	Unmappable(char),
4363	}
4364
4365	impl EncoderResult {
4366	fn unmappable_from_bmp(bmp: u16) -> EncoderResult {
4367	EncoderResult::Unmappable(::core::char::from_u32(u32::from(bmp)).unwrap())
4368	}
4369	}
4370
4371	/// A converter that encodes a Unicode stream into bytes according to a
4372	/// character encoding in a streaming (incremental) manner.
4373	///
4374	/// The various `encode_` methods take an input buffer (`src`) and an output*
4375	/// buffer `dst` both of which are caller-allocated. There are variants for
4376	/// both UTF-8 and UTF-16 input buffers.
4377	///
4378	/// An `encode_` method encode characters from `src` into bytes characters*
4379	/// stored into `dst` until one of the following three things happens:
4380	///
4381	/// 1. An unmappable character is encountered (`_without_replacement` variants*
4382	/// only).
4383	///
4384	/// 2. The output buffer has been filled so near capacity that the decoder
4385	/// cannot be sure that processing an additional character of input wouldn't
4386	/// cause so much output that the output buffer would overflow.
4387	///
4388	/// 3. All the input characters have been processed.
4389	///
4390	/// The `encode_` method then returns tuple of a status indicating which one*
4391	/// of the three reasons to return happened, how many input code units (`u8`
4392	/// when encoding from UTF-8 and `u16` when encoding from UTF-16) were read,
4393	/// how many output bytes were written (except when encoding into `Vec<u8>`,
4394	/// whose length change indicates this), and in the case of the variants that
4395	/// perform replacement, a boolean indicating whether an unmappable
4396	/// character was replaced with a numeric character reference during the call.
4397	///
4398	/// The number of bytes "written" is what's logically written. Garbage may be
4399	/// written in the output buffer beyond the point logically written to.
4400	///
4401	/// In the case of the methods whose name ends with
4402	/// `_without_replacement`, the status is an [`EncoderResult`][1] enumeration*
4403	/// (possibilities `Unmappable`, `OutputFull` and `InputEmpty` corresponding to
4404	/// the three cases listed above).
4405	///
4406	/// In the case of methods whose name does not end with
4407	/// `_without_replacement`, unmappable characters are automatically replaced*
4408	/// with the corresponding numeric character references and unmappable
4409	/// characters do not cause the methods to return early.
4410	///
4411	/// When encoding from UTF-8 without replacement, the methods are guaranteed
4412	/// not to return indicating that more output space is needed if the length
4413	/// of the output buffer is at least the length returned by
4414	/// [`max_buffer_length_from_utf8_without_replacement()`][2]. When encoding from
4415	/// UTF-8 with replacement, the length of the output buffer that guarantees the
4416	/// methods not to return indicating that more output space is needed in the
4417	/// absence of unmappable characters is given by
4418	/// [`max_buffer_length_from_utf8_if_no_unmappables()`][3]. When encoding from
4419	/// UTF-16 without replacement, the methods are guaranteed not to return
4420	/// indicating that more output space is needed if the length of the output
4421	/// buffer is at least the length returned by
4422	/// [`max_buffer_length_from_utf16_without_replacement()`][4]. When encoding
4423	/// from UTF-16 with replacement, the the length of the output buffer that
4424	/// guarantees the methods not to return indicating that more output space is
4425	/// needed in the absence of unmappable characters is given by
4426	/// [`max_buffer_length_from_utf16_if_no_unmappables()`][5].
4427	/// When encoding with replacement, applications are not expected to size the
4428	/// buffer for the worst case ahead of time but to resize the buffer if there
4429	/// are unmappable characters. This is why max length queries are only available
4430	/// for the case where there are no unmappable characters.
4431	///
4432	/// When encoding from UTF-8, each `src` buffer _must_ be valid UTF-8. (When
4433	/// calling from Rust, the type system takes care of this.) When encoding from
4434	/// UTF-16, unpaired surrogates in the input are treated as U+FFFD REPLACEMENT
4435	/// CHARACTERS. Therefore, in order for astral characters not to turn into a
4436	/// pair of REPLACEMENT CHARACTERS, the caller must ensure that surrogate pairs
4437	/// are not split across input buffer boundaries.
4438	///
4439	/// After an `encode_` call returns, the output produced so far, taken as a*
4440	/// whole from the start of the stream, is guaranteed to consist of a valid
4441	/// byte sequence in the target encoding. (I.e. the code unit sequence for a
4442	/// character is guaranteed not to be split across output buffers. However, due
4443	/// to the stateful nature of ISO-2022-JP, the stream needs to be considered
4444	/// from the start for it to be valid. For other encodings, the validity holds
4445	/// on a per-output buffer basis.)
4446	///
4447	/// The boolean argument `last` indicates that the end of the stream is reached
4448	/// when all the characters in `src` have been consumed. This argument is needed
4449	/// for ISO-2022-JP and is ignored for other encodings.
4450	///
4451	/// An `Encoder` object can be used to incrementally encode a byte stream.
4452	///
4453	/// During the processing of a single stream, the caller must call `encode_`*
4454	/// zero or more times with `last` set to `false` and then call `encode_` at*
4455	/// least once with `last` set to `true`. If `encode_` returns `InputEmpty`,*
4456	/// the processing of the stream has ended. Otherwise, the caller must call
4457	/// `encode_` again with `last` set to `true` (or treat an `Unmappable` result*
4458	/// as a fatal error).
4459	///
4460	/// Once the stream has ended, the `Encoder` object must not be used anymore.
4461	/// That is, you need to create another one to process another stream.
4462	///
4463	/// When the encoder returns `OutputFull` or the encoder returns `Unmappable`
4464	/// and the caller does not wish to treat it as a fatal error, the input buffer
4465	/// `src` may not have been completely consumed. In that case, the caller must
4466	/// pass the unconsumed contents of `src` to `encode_` again upon the next*
4467	/// call.
4468	///
4469	/// [1]: enum.EncoderResult.html
4470	/// [2]: #method.max_buffer_length_from_utf8_without_replacement
4471	/// [3]: #method.max_buffer_length_from_utf8_if_no_unmappables
4472	/// [4]: #method.max_buffer_length_from_utf16_without_replacement
4473	/// [5]: #method.max_buffer_length_from_utf16_if_no_unmappables
4474	///
4475	/// # Infinite loops
4476	///
4477	/// When converting with a fixed-size output buffer whose size is too small to
4478	/// accommodate one character of output, an infinite loop ensues. When
4479	/// converting with a fixed-size output buffer, it generally makes sense to
4480	/// make the buffer fairly large (e.g. couple of kilobytes).
4481	pub struct Encoder {
4482	encoding: &'static Encoding,
4483	variant: VariantEncoder,
4484	}
4485
4486	impl Encoder {
4487	fn new(enc: &'static Encoding, encoder: VariantEncoder) -> Encoder {
4488	Encoder {
4489	encoding: enc,
4490	variant: encoder,
4491	}
4492	}
4493
4494	/// The `Encoding` this `Encoder` is for.
4495	#[inline]
4496	pub fn encoding(&self) -> &'static Encoding {
4497	self.encoding
4498	}
4499
4500	/// Returns `true` if this is an ISO-2022-JP encoder that's not in the
4501	/// ASCII state and `false` otherwise.
4502	#[inline]
4503	pub fn has_pending_state(&self) -> bool {
4504	self.variant.has_pending_state()
4505	}
4506
4507	/// Query the worst-case output size when encoding from UTF-8 with
4508	/// replacement.
4509	///
4510	/// Returns the size of the output buffer in bytes that will not overflow
4511	/// given the current state of the encoder and `byte_length` number of
4512	/// additional input code units if there are no unmappable characters in
4513	/// the input or `None` if `usize` would overflow.
4514	///
4515	/// Available via the C wrapper.
4516	pub fn max_buffer_length_from_utf8_if_no_unmappables(
4517	&self,
4518	byte_length: usize,
4519	) -> Option<usize> {
4520	checked_add(
4521	if self.encoding().can_encode_everything() {
4522	`0`
4523	} else {
4524	NCR_EXTRA
4525	},
4526	self.max_buffer_length_from_utf8_without_replacement(byte_length),
4527	)
4528	}
4529
4530	/// Query the worst-case output size when encoding from UTF-8 without
4531	/// replacement.
4532	///
4533	/// Returns the size of the output buffer in bytes that will not overflow
4534	/// given the current state of the encoder and `byte_length` number of
4535	/// additional input code units or `None` if `usize` would overflow.
4536	///
4537	/// Available via the C wrapper.
4538	pub fn max_buffer_length_from_utf8_without_replacement(
4539	&self,
4540	byte_length: usize,
4541	) -> Option<usize> {
4542	self.variant
4543	.max_buffer_length_from_utf8_without_replacement(byte_length)
4544	}
4545
4546	/// Incrementally encode into byte stream from UTF-8 with unmappable
4547	/// characters replaced with HTML (decimal) numeric character references.
4548	///
4549	/// See the documentation of the struct for documentation for `encode_`*
4550	/// methods collectively.
4551	///
4552	/// Available via the C wrapper.
4553	pub fn encode_from_utf8(
4554	&mut self,
4555	src: &str,
4556	dst: &mut [u8],
4557	last: bool,
4558	) -> (CoderResult, usize, usize, bool) {
4559	let dst_len = dst.len();
4560	let effective_dst_len = if self.encoding().can_encode_everything() {
4561	dst_len
4562	} else {
4563	if dst_len < NCR_EXTRA {
4564	if src.is_empty() && !(last && self.has_pending_state()) {
4565	return (CoderResult::InputEmpty, `0`, `0`, `false`);
4566	}
4567	return (CoderResult::OutputFull, `0`, `0`, `false`);
4568	}
4569	dst_len - NCR_EXTRA
4570	};
4571	let mut had_unmappables = `false`;
4572	let mut total_read = `0usize`;
4573	let mut total_written = `0usize`;
4574	loop {
4575	let (result, read, written) = self.encode_from_utf8_without_replacement(
4576	&src[total_read..],
4577	&mut dst[total_written..effective_dst_len],
4578	last,
4579	);
4580	total_read += read;
4581	total_written += written;
4582	match result {
4583	EncoderResult::InputEmpty => {
4584	return (
4585	CoderResult::InputEmpty,
4586	total_read,
4587	total_written,
4588	had_unmappables,
4589	);
4590	}
4591	EncoderResult::OutputFull => {
4592	return (
4593	CoderResult::OutputFull,
4594	total_read,
4595	total_written,
4596	had_unmappables,
4597	);
4598	}
4599	EncoderResult::Unmappable(unmappable) => {
4600	had_unmappables = `true`;
4601	debug_assert!(dst.len() - total_written >= NCR_EXTRA);
4602	debug_assert_ne!(self.encoding(), UTF_16BE);
4603	debug_assert_ne!(self.encoding(), UTF_16LE);
4604	// Additionally, Iso2022JpEncoder is responsible for
4605	// transitioning to ASCII when returning with Unmappable.
4606	total_written += write_ncr(unmappable, &mut dst[total_written..]);
4607	if total_written >= effective_dst_len {
4608	if total_read == src.len() && !(last && self.has_pending_state()) {
4609	return (
4610	CoderResult::InputEmpty,
4611	total_read,
4612	total_written,
4613	had_unmappables,
4614	);
4615	}
4616	return (
4617	CoderResult::OutputFull,
4618	total_read,
4619	total_written,
4620	had_unmappables,
4621	);
4622	}
4623	}
4624	}
4625	}
4626	}
4627
4628	/// Incrementally encode into byte stream from UTF-8 with unmappable
4629	/// characters replaced with HTML (decimal) numeric character references.
4630	///
4631	/// See the documentation of the struct for documentation for `encode_`*
4632	/// methods collectively.
4633	///
4634	/// Available to Rust only and only with the `alloc` feature enabled (enabled
4635	/// by default).
4636	#[cfg(feature = "alloc")]
4637	pub fn encode_from_utf8_to_vec(
4638	&mut self,
4639	src: &str,
4640	dst: &mut Vec<u8>,
4641	last: bool,
4642	) -> (CoderResult, usize, bool) {
4643	unsafe {
4644	let old_len = dst.len();
4645	let capacity = dst.capacity();
4646	dst.set_len(capacity);
4647	let (result, read, written, replaced) =
4648	self.encode_from_utf8(src, &mut dst[old_len..], last);
4649	dst.set_len(old_len + written);
4650	(result, read, replaced)
4651	}
4652	}
4653
4654	/// Incrementally encode into byte stream from UTF-8 _without replacement_.
4655	///
4656	/// See the documentation of the struct for documentation for `encode_`*
4657	/// methods collectively.
4658	///
4659	/// Available via the C wrapper.
4660	pub fn encode_from_utf8_without_replacement(
4661	&mut self,
4662	src: &str,
4663	dst: &mut [u8],
4664	last: bool,
4665	) -> (EncoderResult, usize, usize) {
4666	self.variant.encode_from_utf8_raw(src, dst, last)
4667	}
4668
4669	/// Incrementally encode into byte stream from UTF-8 _without replacement_.
4670	///
4671	/// See the documentation of the struct for documentation for `encode_`*
4672	/// methods collectively.
4673	///
4674	/// Available to Rust only and only with the `alloc` feature enabled (enabled
4675	/// by default).
4676	#[cfg(feature = "alloc")]
4677	pub fn encode_from_utf8_to_vec_without_replacement(
4678	&mut self,
4679	src: &str,
4680	dst: &mut Vec<u8>,
4681	last: bool,
4682	) -> (EncoderResult, usize) {
4683	unsafe {
4684	let old_len = dst.len();
4685	let capacity = dst.capacity();
4686	dst.set_len(capacity);
4687	let (result, read, written) =
4688	self.encode_from_utf8_without_replacement(src, &mut dst[old_len..], last);
4689	dst.set_len(old_len + written);
4690	(result, read)
4691	}
4692	}
4693
4694	/// Query the worst-case output size when encoding from UTF-16 with
4695	/// replacement.
4696	///
4697	/// Returns the size of the output buffer in bytes that will not overflow
4698	/// given the current state of the encoder and `u16_length` number of
4699	/// additional input code units if there are no unmappable characters in
4700	/// the input or `None` if `usize` would overflow.
4701	///
4702	/// Available via the C wrapper.
4703	pub fn max_buffer_length_from_utf16_if_no_unmappables(
4704	&self,
4705	u16_length: usize,
4706	) -> Option<usize> {
4707	checked_add(
4708	if self.encoding().can_encode_everything() {
4709	`0`
4710	} else {
4711	NCR_EXTRA
4712	},
4713	self.max_buffer_length_from_utf16_without_replacement(u16_length),
4714	)
4715	}
4716
4717	/// Query the worst-case output size when encoding from UTF-16 without
4718	/// replacement.
4719	///
4720	/// Returns the size of the output buffer in bytes that will not overflow
4721	/// given the current state of the encoder and `u16_length` number of
4722	/// additional input code units or `None` if `usize` would overflow.
4723	///
4724	/// Available via the C wrapper.
4725	pub fn max_buffer_length_from_utf16_without_replacement(
4726	&self,
4727	u16_length: usize,
4728	) -> Option<usize> {
4729	self.variant
4730	.max_buffer_length_from_utf16_without_replacement(u16_length)
4731	}
4732
4733	/// Incrementally encode into byte stream from UTF-16 with unmappable
4734	/// characters replaced with HTML (decimal) numeric character references.
4735	///
4736	/// See the documentation of the struct for documentation for `encode_`*
4737	/// methods collectively.
4738	///
4739	/// Available via the C wrapper.
4740	pub fn encode_from_utf16(
4741	&mut self,
4742	src: &[u16],
4743	dst: &mut [u8],
4744	last: bool,
4745	) -> (CoderResult, usize, usize, bool) {
4746	let dst_len = dst.len();
4747	let effective_dst_len = if self.encoding().can_encode_everything() {
4748	dst_len
4749	} else {
4750	if dst_len < NCR_EXTRA {
4751	if src.is_empty() && !(last && self.has_pending_state()) {
4752	return (CoderResult::InputEmpty, `0`, `0`, `false`);
4753	}
4754	return (CoderResult::OutputFull, `0`, `0`, `false`);
4755	}
4756	dst_len - NCR_EXTRA
4757	};
4758	let mut had_unmappables = `false`;
4759	let mut total_read = `0usize`;
4760	let mut total_written = `0usize`;
4761	loop {
4762	let (result, read, written) = self.encode_from_utf16_without_replacement(
4763	&src[total_read..],
4764	&mut dst[total_written..effective_dst_len],
4765	last,
4766	);
4767	total_read += read;
4768	total_written += written;
4769	match result {
4770	EncoderResult::InputEmpty => {
4771	return (
4772	CoderResult::InputEmpty,
4773	total_read,
4774	total_written,
4775	had_unmappables,
4776	);
4777	}
4778	EncoderResult::OutputFull => {
4779	return (
4780	CoderResult::OutputFull,
4781	total_read,
4782	total_written,
4783	had_unmappables,
4784	);
4785	}
4786	EncoderResult::Unmappable(unmappable) => {
4787	had_unmappables = `true`;
4788	debug_assert!(dst.len() - total_written >= NCR_EXTRA);
4789	// There are no UTF-16 encoders and even if there were,
4790	// they'd never have unmappables.
4791	debug_assert_ne!(self.encoding(), UTF_16BE);
4792	debug_assert_ne!(self.encoding(), UTF_16LE);
4793	// Additionally, Iso2022JpEncoder is responsible for
4794	// transitioning to ASCII when returning with Unmappable
4795	// from the jis0208 state. That is, when we encode
4796	// ISO-2022-JP and come here, the encoder is in either the
4797	// ASCII or the Roman state. We are allowed to generate any
4798	// printable ASCII excluding \ and ~.
4799	total_written += write_ncr(unmappable, &mut dst[total_written..]);
4800	if total_written >= effective_dst_len {
4801	if total_read == src.len() && !(last && self.has_pending_state()) {
4802	return (
4803	CoderResult::InputEmpty,
4804	total_read,
4805	total_written,
4806	had_unmappables,
4807	);
4808	}
4809	return (
4810	CoderResult::OutputFull,
4811	total_read,
4812	total_written,
4813	had_unmappables,
4814	);
4815	}
4816	}
4817	}
4818	}
4819	}
4820
4821	/// Incrementally encode into byte stream from UTF-16 _without replacement_.
4822	///
4823	/// See the documentation of the struct for documentation for `encode_`*
4824	/// methods collectively.
4825	///
4826	/// Available via the C wrapper.
4827	pub fn encode_from_utf16_without_replacement(
4828	&mut self,
4829	src: &[u16],
4830	dst: &mut [u8],
4831	last: bool,
4832	) -> (EncoderResult, usize, usize) {
4833	self.variant.encode_from_utf16_raw(src, dst, last)
4834	}
4835	}
4836
4837	/// Format an unmappable as NCR without heap allocation.
4838	fn write_ncr(unmappable: char, dst: &mut [u8]) -> usize {
4839	// len is the number of decimal digits needed to represent unmappable plus
4840	// 3 (the length of "&#" and ";").
4841	let mut number = unmappable as u32;
4842	let len = if number >= `1_000_000u32` {
4843	`10usize`
4844	} else if number >= `100_000u32` {
4845	`9usize`
4846	} else if number >= `10_000u32` {
4847	`8usize`
4848	} else if number >= `1_000u32` {
4849	`7usize`
4850	} else if number >= `100u32` {
4851	`6usize`
4852	} else {
4853	// Review the outcome of https://github.com/whatwg/encoding/issues/15
4854	// to see if this case is possible
4855	`5usize`
4856	};
4857	debug_assert!(number >= `10u32`);
4858	debug_assert!(len <= dst.len());
4859	let mut pos = len - `1`;
4860	dst[pos] = b';';
4861	pos -= `1`;
4862	loop {
4863	let rightmost = number % `10`;
4864	dst[pos] = rightmost as u8 + b'0';
4865	pos -= `1`;
4866	if number < `10` {
4867	break;
4868	}
4869	number /= `10`;
4870	}
4871	dst[`1`] = b'#';
4872	dst[`0`] = b'&';
4873	len
4874	}
4875
4876	#[inline(always)]
4877	fn in_range16(i: u16, start: u16, end: u16) -> bool {
4878	i.wrapping_sub(start) < (end - start)
4879	}
4880
4881	#[inline(always)]
4882	fn in_range32(i: u32, start: u32, end: u32) -> bool {
4883	i.wrapping_sub(start) < (end - start)
4884	}
4885
4886	#[inline(always)]
4887	fn in_inclusive_range8(i: u8, start: u8, end: u8) -> bool {
4888	i.wrapping_sub(start) <= (end - start)
4889	}
4890
4891	#[inline(always)]
4892	fn in_inclusive_range16(i: u16, start: u16, end: u16) -> bool {
4893	i.wrapping_sub(start) <= (end - start)
4894	}
4895
4896	#[inline(always)]
4897	fn in_inclusive_range32(i: u32, start: u32, end: u32) -> bool {
4898	i.wrapping_sub(start) <= (end - start)
4899	}
4900
4901	#[inline(always)]
4902	fn in_inclusive_range(i: usize, start: usize, end: usize) -> bool {
4903	i.wrapping_sub(start) <= (end - start)
4904	}
4905
4906	#[inline(always)]
4907	fn checked_add(num: usize, opt: Option<usize>) -> Option<usize> {
4908	if let Some(n: usize) = opt {
4909	n.checked_add(num)
4910	} else {
4911	None
4912	}
4913	}
4914
4915	#[inline(always)]
4916	fn checked_add_opt(one: Option<usize>, other: Option<usize>) -> Option<usize> {
4917	if let Some(n: usize) = one {
4918	checked_add(num:n, opt:other)
4919	} else {
4920	None
4921	}
4922	}
4923
4924	#[inline(always)]
4925	fn checked_mul(num: usize, opt: Option<usize>) -> Option<usize> {
4926	if let Some(n: usize) = opt {
4927	n.checked_mul(num)
4928	} else {
4929	None
4930	}
4931	}
4932
4933	#[inline(always)]
4934	fn checked_div(opt: Option<usize>, num: usize) -> Option<usize> {
4935	if let Some(n: usize) = opt {
4936	n.checked_div(num)
4937	} else {
4938	None
4939	}
4940	}
4941
4942	#[cfg(feature = "alloc")]
4943	#[inline(always)]
4944	fn checked_next_power_of_two(opt: Option<usize>) -> Option<usize> {
4945	opt.map(\|n: usize\| n.next_power_of_two())
4946	}
4947
4948	#[cfg(feature = "alloc")]
4949	#[inline(always)]
4950	fn checked_min(one: Option<usize>, other: Option<usize>) -> Option<usize> {
4951	if let Some(a: usize) = one {
4952	if let Some(b: usize) = other {
4953	Some(::core::cmp::min(v1:a, v2:b))
4954	} else {
4955	Some(a)
4956	}
4957	} else {
4958	other
4959	}
4960	}
4961
4962	// ############## TESTS ###############
4963
4964	#[cfg(all(test, feature = "serde"))]
4965	#[derive(Serialize, Deserialize, Debug, PartialEq)]
4966	struct Demo {
4967	num: u32,
4968	name: String,
4969	enc: &'static Encoding,
4970	}
4971
4972	#[cfg(test)]
4973	mod test_labels_names;
4974
4975	#[cfg(all(test, feature = "alloc"))]
4976	mod tests {
4977	use super::*;
4978	use alloc::borrow::Cow;
4979
4980	fn sniff_to_utf16(
4981	initial_encoding: &'static Encoding,
4982	expected_encoding: &'static Encoding,
4983	bytes: &[u8],
4984	expect: &[u16],
4985	breaks: &[usize],
4986	) {
4987	let mut decoder = initial_encoding.new_decoder();
4988
4989	let mut dest: Vec<u16> =
4990	Vec::with_capacity(decoder.max_utf16_buffer_length(bytes.len()).unwrap());
4991	let capacity = dest.capacity();
4992	dest.resize(capacity, `0u16`);
4993
4994	let mut total_written = `0usize`;
4995	let mut start = `0usize`;
4996	for br in breaks {
4997	let (result, read, written, _) =
4998	decoder.decode_to_utf16(&bytes[start..br], &mut* dest[total_written..], `false`);
4999	total_written += written;
5000	assert_eq!(read, *br - start);
5001	match result {
5002	CoderResult::InputEmpty => {}
5003	CoderResult::OutputFull => {
5004	unreachable!();
5005	}
5006	}
5007	start = *br;
5008	}
5009	let (result, read, written, _) =
5010	decoder.decode_to_utf16(&bytes[start..], &mut dest[total_written..], `true`);
5011	total_written += written;
5012	match result {
5013	CoderResult::InputEmpty => {}
5014	CoderResult::OutputFull => {
5015	unreachable!();
5016	}
5017	}
5018	assert_eq!(read, bytes.len() - start);
5019	assert_eq!(total_written, expect.len());
5020	assert_eq!(&dest[..total_written], expect);
5021	assert_eq!(decoder.encoding(), expected_encoding);
5022	}
5023
5024	// Any copyright to the test code below this comment is dedicated to the
5025	// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
5026
5027	#[test]
5028	fn test_bom_sniffing() {
5029	// ASCII
5030	sniff_to_utf16(
5031	WINDOWS_1252,
5032	WINDOWS_1252,
5033	b"`\x61\x62`",
5034	&[`0x0061u16`, `0x0062u16`],
5035	&[],
5036	);
5037	// UTF-8
5038	sniff_to_utf16(
5039	WINDOWS_1252,
5040	UTF_8,
5041	b"`\xEF\xBB\xBF\x61\x62`",
5042	&[`0x0061u16`, `0x0062u16`],
5043	&[],
5044	);
5045	sniff_to_utf16(
5046	WINDOWS_1252,
5047	UTF_8,
5048	b"`\xEF\xBB\xBF\x61\x62`",
5049	&[`0x0061u16`, `0x0062u16`],
5050	&[`1`],
5051	);
5052	sniff_to_utf16(
5053	WINDOWS_1252,
5054	UTF_8,
5055	b"`\xEF\xBB\xBF\x61\x62`",
5056	&[`0x0061u16`, `0x0062u16`],
5057	&[`2`],
5058	);
5059	sniff_to_utf16(
5060	WINDOWS_1252,
5061	UTF_8,
5062	b"`\xEF\xBB\xBF\x61\x62`",
5063	&[`0x0061u16`, `0x0062u16`],
5064	&[`3`],
5065	);
5066	sniff_to_utf16(
5067	WINDOWS_1252,
5068	UTF_8,
5069	b"`\xEF\xBB\xBF\x61\x62`",
5070	&[`0x0061u16`, `0x0062u16`],
5071	&[`4`],
5072	);
5073	sniff_to_utf16(
5074	WINDOWS_1252,
5075	UTF_8,
5076	b"`\xEF\xBB\xBF\x61\x62`",
5077	&[`0x0061u16`, `0x0062u16`],
5078	&[`2`, `3`],
5079	);
5080	sniff_to_utf16(
5081	WINDOWS_1252,
5082	UTF_8,
5083	b"`\xEF\xBB\xBF\x61\x62`",
5084	&[`0x0061u16`, `0x0062u16`],
5085	&[`1`, `2`],
5086	);
5087	sniff_to_utf16(
5088	WINDOWS_1252,
5089	UTF_8,
5090	b"`\xEF\xBB\xBF\x61\x62`",
5091	&[`0x0061u16`, `0x0062u16`],
5092	&[`1`, `3`],
5093	);
5094	sniff_to_utf16(
5095	WINDOWS_1252,
5096	UTF_8,
5097	b"`\xEF\xBB\xBF\x61\x62`",
5098	&[`0x0061u16`, `0x0062u16`],
5099	&[`1`, `2`, `3`, `4`],
5100	);
5101	sniff_to_utf16(WINDOWS_1252, UTF_8, b"`\xEF\xBB\xBF`", &[], &[]);
5102	// Not UTF-8
5103	sniff_to_utf16(
5104	WINDOWS_1252,
5105	WINDOWS_1252,
5106	b"`\xEF\xBB\x61\x62`",
5107	&[`0x00EFu16`, `0x00BBu16`, `0x0061u16`, `0x0062u16`],
5108	&[],
5109	);
5110	sniff_to_utf16(
5111	WINDOWS_1252,
5112	WINDOWS_1252,
5113	b"`\xEF\xBB\x61\x62`",
5114	&[`0x00EFu16`, `0x00BBu16`, `0x0061u16`, `0x0062u16`],
5115	&[`1`],
5116	);
5117	sniff_to_utf16(
5118	WINDOWS_1252,
5119	WINDOWS_1252,
5120	b"`\xEF\x61\x62`",
5121	&[`0x00EFu16`, `0x0061u16`, `0x0062u16`],
5122	&[],
5123	);
5124	sniff_to_utf16(
5125	WINDOWS_1252,
5126	WINDOWS_1252,
5127	b"`\xEF\x61\x62`",
5128	&[`0x00EFu16`, `0x0061u16`, `0x0062u16`],
5129	&[`1`],
5130	);
5131	sniff_to_utf16(
5132	WINDOWS_1252,
5133	WINDOWS_1252,
5134	b"`\xEF\xBB`",
5135	&[`0x00EFu16`, `0x00BBu16`],
5136	&[],
5137	);
5138	sniff_to_utf16(
5139	WINDOWS_1252,
5140	WINDOWS_1252,
5141	b"`\xEF\xBB`",
5142	&[`0x00EFu16`, `0x00BBu16`],
5143	&[`1`],
5144	);
5145	sniff_to_utf16(WINDOWS_1252, WINDOWS_1252, b"`\xEF`", &[`0x00EFu16`], &[]);
5146	// Not UTF-16
5147	sniff_to_utf16(
5148	WINDOWS_1252,
5149	WINDOWS_1252,
5150	b"`\xFE\x61\x62`",
5151	&[`0x00FEu16`, `0x0061u16`, `0x0062u16`],
5152	&[],
5153	);
5154	sniff_to_utf16(
5155	WINDOWS_1252,
5156	WINDOWS_1252,
5157	b"`\xFE\x61\x62`",
5158	&[`0x00FEu16`, `0x0061u16`, `0x0062u16`],
5159	&[`1`],
5160	);
5161	sniff_to_utf16(WINDOWS_1252, WINDOWS_1252, b"`\xFE`", &[`0x00FEu16`], &[]);
5162	sniff_to_utf16(
5163	WINDOWS_1252,
5164	WINDOWS_1252,
5165	b"`\xFF\x61\x62`",
5166	&[`0x00FFu16`, `0x0061u16`, `0x0062u16`],
5167	&[],
5168	);
5169	sniff_to_utf16(
5170	WINDOWS_1252,
5171	WINDOWS_1252,
5172	b"`\xFF\x61\x62`",
5173	&[`0x00FFu16`, `0x0061u16`, `0x0062u16`],
5174	&[`1`],
5175	);
5176	sniff_to_utf16(WINDOWS_1252, WINDOWS_1252, b"`\xFF`", &[`0x00FFu16`], &[]);
5177	// UTF-16
5178	sniff_to_utf16(WINDOWS_1252, UTF_16BE, b"`\xFE\xFF`", &[], &[]);
5179	sniff_to_utf16(WINDOWS_1252, UTF_16BE, b"`\xFE\xFF`", &[], &[`1`]);
5180	sniff_to_utf16(WINDOWS_1252, UTF_16LE, b"`\xFF\xFE`", &[], &[]);
5181	sniff_to_utf16(WINDOWS_1252, UTF_16LE, b"`\xFF\xFE`", &[], &[`1`]);
5182	}
5183
5184	#[test]
5185	fn test_output_encoding() {
5186	assert_eq!(REPLACEMENT.output_encoding(), UTF_8);
5187	assert_eq!(UTF_16BE.output_encoding(), UTF_8);
5188	assert_eq!(UTF_16LE.output_encoding(), UTF_8);
5189	assert_eq!(UTF_8.output_encoding(), UTF_8);
5190	assert_eq!(WINDOWS_1252.output_encoding(), WINDOWS_1252);
5191	assert_eq!(REPLACEMENT.new_encoder().encoding(), UTF_8);
5192	assert_eq!(UTF_16BE.new_encoder().encoding(), UTF_8);
5193	assert_eq!(UTF_16LE.new_encoder().encoding(), UTF_8);
5194	assert_eq!(UTF_8.new_encoder().encoding(), UTF_8);
5195	assert_eq!(WINDOWS_1252.new_encoder().encoding(), WINDOWS_1252);
5196	}
5197
5198	#[test]
5199	fn test_label_resolution() {
5200	assert_eq!(Encoding::for_label(b"utf-8"), Some(UTF_8));
5201	assert_eq!(Encoding::for_label(b"UTF-8"), Some(UTF_8));
5202	assert_eq!(
5203	Encoding::for_label(b" `\t` `\n` `\x0C` `\n` utf-8 `\r` `\n` `\t` `\x0C` "),
5204	Some(UTF_8)
5205	);
5206	assert_eq!(Encoding::for_label(b"utf-8 _"), None);
5207	assert_eq!(Encoding::for_label(b"bogus"), None);
5208	assert_eq!(Encoding::for_label(b"bogusbogusbogusbogus"), None);
5209	}
5210
5211	#[test]
5212	fn test_decode_valid_windows_1257_to_cow() {
5213	let (cow, encoding, had_errors) = WINDOWS_1257.decode(b"abc`\x80\xE4`");
5214	match cow {
5215	Cow::Borrowed(_) => unreachable!(),
5216	Cow::Owned(s) => {
5217	assert_eq!(s, "abc`\u{20AC}\u{00E4}`");
5218	}
5219	}
5220	assert_eq!(encoding, WINDOWS_1257);
5221	assert!(!had_errors);
5222	}
5223
5224	#[test]
5225	fn test_decode_invalid_windows_1257_to_cow() {
5226	let (cow, encoding, had_errors) = WINDOWS_1257.decode(b"abc`\x80\xA1\xE4`");
5227	match cow {
5228	Cow::Borrowed(_) => unreachable!(),
5229	Cow::Owned(s) => {
5230	assert_eq!(s, "abc`\u{20AC}\u{FFFD}\u{00E4}`");
5231	}
5232	}
5233	assert_eq!(encoding, WINDOWS_1257);
5234	assert!(had_errors);
5235	}
5236
5237	#[test]
5238	fn test_decode_ascii_only_windows_1257_to_cow() {
5239	let (cow, encoding, had_errors) = WINDOWS_1257.decode(b"abc");
5240	match cow {
5241	Cow::Borrowed(s) => {
5242	assert_eq!(s, "abc");
5243	}
5244	Cow::Owned(_) => unreachable!(),
5245	}
5246	assert_eq!(encoding, WINDOWS_1257);
5247	assert!(!had_errors);
5248	}
5249
5250	#[test]
5251	fn test_decode_bomful_valid_utf8_as_windows_1257_to_cow() {
5252	let (cow, encoding, had_errors) = WINDOWS_1257.decode(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5253	match cow {
5254	Cow::Borrowed(s) => {
5255	assert_eq!(s, "`\u{20AC}\u{00E4}`");
5256	}
5257	Cow::Owned(_) => unreachable!(),
5258	}
5259	assert_eq!(encoding, UTF_8);
5260	assert!(!had_errors);
5261	}
5262
5263	#[test]
5264	fn test_decode_bomful_invalid_utf8_as_windows_1257_to_cow() {
5265	let (cow, encoding, had_errors) =
5266	WINDOWS_1257.decode(b"`\xEF\xBB\xBF\xE2\x82\xAC\x80\xC3\xA4`");
5267	match cow {
5268	Cow::Borrowed(_) => unreachable!(),
5269	Cow::Owned(s) => {
5270	assert_eq!(s, "`\u{20AC}\u{FFFD}\u{00E4}`");
5271	}
5272	}
5273	assert_eq!(encoding, UTF_8);
5274	assert!(had_errors);
5275	}
5276
5277	#[test]
5278	fn test_decode_bomful_valid_utf8_as_utf_8_to_cow() {
5279	let (cow, encoding, had_errors) = UTF_8.decode(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5280	match cow {
5281	Cow::Borrowed(s) => {
5282	assert_eq!(s, "`\u{20AC}\u{00E4}`");
5283	}
5284	Cow::Owned(_) => unreachable!(),
5285	}
5286	assert_eq!(encoding, UTF_8);
5287	assert!(!had_errors);
5288	}
5289
5290	#[test]
5291	fn test_decode_bomful_invalid_utf8_as_utf_8_to_cow() {
5292	let (cow, encoding, had_errors) = UTF_8.decode(b"`\xEF\xBB\xBF\xE2\x82\xAC\x80\xC3\xA4`");
5293	match cow {
5294	Cow::Borrowed(_) => unreachable!(),
5295	Cow::Owned(s) => {
5296	assert_eq!(s, "`\u{20AC}\u{FFFD}\u{00E4}`");
5297	}
5298	}
5299	assert_eq!(encoding, UTF_8);
5300	assert!(had_errors);
5301	}
5302
5303	#[test]
5304	fn test_decode_bomful_valid_utf8_as_utf_8_to_cow_with_bom_removal() {
5305	let (cow, had_errors) = UTF_8.decode_with_bom_removal(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5306	match cow {
5307	Cow::Borrowed(s) => {
5308	assert_eq!(s, "`\u{20AC}\u{00E4}`");
5309	}
5310	Cow::Owned(_) => unreachable!(),
5311	}
5312	assert!(!had_errors);
5313	}
5314
5315	#[test]
5316	fn test_decode_bomful_valid_utf8_as_windows_1257_to_cow_with_bom_removal() {
5317	let (cow, had_errors) =
5318	WINDOWS_1257.decode_with_bom_removal(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5319	match cow {
5320	Cow::Borrowed(_) => unreachable!(),
5321	Cow::Owned(s) => {
5322	assert_eq!(
5323	s,
5324	"`\u{013C}\u{00BB}\u{00E6}\u{0101}\u{201A}\u{00AC}\u{0106}\u{00A4}`"
5325	);
5326	}
5327	}
5328	assert!(!had_errors);
5329	}
5330
5331	#[test]
5332	fn test_decode_valid_windows_1257_to_cow_with_bom_removal() {
5333	let (cow, had_errors) = WINDOWS_1257.decode_with_bom_removal(b"abc`\x80\xE4`");
5334	match cow {
5335	Cow::Borrowed(_) => unreachable!(),
5336	Cow::Owned(s) => {
5337	assert_eq!(s, "abc`\u{20AC}\u{00E4}`");
5338	}
5339	}
5340	assert!(!had_errors);
5341	}
5342
5343	#[test]
5344	fn test_decode_invalid_windows_1257_to_cow_with_bom_removal() {
5345	let (cow, had_errors) = WINDOWS_1257.decode_with_bom_removal(b"abc`\x80\xA1\xE4`");
5346	match cow {
5347	Cow::Borrowed(_) => unreachable!(),
5348	Cow::Owned(s) => {
5349	assert_eq!(s, "abc`\u{20AC}\u{FFFD}\u{00E4}`");
5350	}
5351	}
5352	assert!(had_errors);
5353	}
5354
5355	#[test]
5356	fn test_decode_ascii_only_windows_1257_to_cow_with_bom_removal() {
5357	let (cow, had_errors) = WINDOWS_1257.decode_with_bom_removal(b"abc");
5358	match cow {
5359	Cow::Borrowed(s) => {
5360	assert_eq!(s, "abc");
5361	}
5362	Cow::Owned(_) => unreachable!(),
5363	}
5364	assert!(!had_errors);
5365	}
5366
5367	#[test]
5368	fn test_decode_bomful_valid_utf8_to_cow_without_bom_handling() {
5369	let (cow, had_errors) =
5370	UTF_8.decode_without_bom_handling(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5371	match cow {
5372	Cow::Borrowed(s) => {
5373	assert_eq!(s, "`\u{FEFF}\u{20AC}\u{00E4}`");
5374	}
5375	Cow::Owned(_) => unreachable!(),
5376	}
5377	assert!(!had_errors);
5378	}
5379
5380	#[test]
5381	fn test_decode_bomful_invalid_utf8_to_cow_without_bom_handling() {
5382	let (cow, had_errors) =
5383	UTF_8.decode_without_bom_handling(b"`\xEF\xBB\xBF\xE2\x82\xAC\x80\xC3\xA4`");
5384	match cow {
5385	Cow::Borrowed(_) => unreachable!(),
5386	Cow::Owned(s) => {
5387	assert_eq!(s, "`\u{FEFF}\u{20AC}\u{FFFD}\u{00E4}`");
5388	}
5389	}
5390	assert!(had_errors);
5391	}
5392
5393	#[test]
5394	fn test_decode_valid_windows_1257_to_cow_without_bom_handling() {
5395	let (cow, had_errors) = WINDOWS_1257.decode_without_bom_handling(b"abc`\x80\xE4`");
5396	match cow {
5397	Cow::Borrowed(_) => unreachable!(),
5398	Cow::Owned(s) => {
5399	assert_eq!(s, "abc`\u{20AC}\u{00E4}`");
5400	}
5401	}
5402	assert!(!had_errors);
5403	}
5404
5405	#[test]
5406	fn test_decode_invalid_windows_1257_to_cow_without_bom_handling() {
5407	let (cow, had_errors) = WINDOWS_1257.decode_without_bom_handling(b"abc`\x80\xA1\xE4`");
5408	match cow {
5409	Cow::Borrowed(_) => unreachable!(),
5410	Cow::Owned(s) => {
5411	assert_eq!(s, "abc`\u{20AC}\u{FFFD}\u{00E4}`");
5412	}
5413	}
5414	assert!(had_errors);
5415	}
5416
5417	#[test]
5418	fn test_decode_ascii_only_windows_1257_to_cow_without_bom_handling() {
5419	let (cow, had_errors) = WINDOWS_1257.decode_without_bom_handling(b"abc");
5420	match cow {
5421	Cow::Borrowed(s) => {
5422	assert_eq!(s, "abc");
5423	}
5424	Cow::Owned(_) => unreachable!(),
5425	}
5426	assert!(!had_errors);
5427	}
5428
5429	#[test]
5430	fn test_decode_bomful_valid_utf8_to_cow_without_bom_handling_and_without_replacement() {
5431	match UTF_8.decode_without_bom_handling_and_without_replacement(
5432	b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`",
5433	) {
5434	Some(cow) => match cow {
5435	Cow::Borrowed(s) => {
5436	assert_eq!(s, "`\u{FEFF}\u{20AC}\u{00E4}`");
5437	}
5438	Cow::Owned(_) => unreachable!(),
5439	},
5440	None => unreachable!(),
5441	}
5442	}
5443
5444	#[test]
5445	fn test_decode_bomful_invalid_utf8_to_cow_without_bom_handling_and_without_replacement() {
5446	assert!(UTF_8
5447	.decode_without_bom_handling_and_without_replacement(
5448	b"`\xEF\xBB\xBF\xE2\x82\xAC\x80\xC3\xA4`"
5449	)
5450	.is_none());
5451	}
5452
5453	#[test]
5454	fn test_decode_valid_windows_1257_to_cow_without_bom_handling_and_without_replacement() {
5455	match WINDOWS_1257.decode_without_bom_handling_and_without_replacement(b"abc`\x80\xE4`") {
5456	Some(cow) => match cow {
5457	Cow::Borrowed(_) => unreachable!(),
5458	Cow::Owned(s) => {
5459	assert_eq!(s, "abc`\u{20AC}\u{00E4}`");
5460	}
5461	},
5462	None => unreachable!(),
5463	}
5464	}
5465
5466	#[test]
5467	fn test_decode_invalid_windows_1257_to_cow_without_bom_handling_and_without_replacement() {
5468	assert!(WINDOWS_1257
5469	.decode_without_bom_handling_and_without_replacement(b"abc`\x80\xA1\xE4`")
5470	.is_none());
5471	}
5472
5473	#[test]
5474	fn test_decode_ascii_only_windows_1257_to_cow_without_bom_handling_and_without_replacement() {
5475	match WINDOWS_1257.decode_without_bom_handling_and_without_replacement(b"abc") {
5476	Some(cow) => match cow {
5477	Cow::Borrowed(s) => {
5478	assert_eq!(s, "abc");
5479	}
5480	Cow::Owned(_) => unreachable!(),
5481	},
5482	None => unreachable!(),
5483	}
5484	}
5485
5486	#[test]
5487	fn test_encode_ascii_only_windows_1257_to_cow() {
5488	let (cow, encoding, had_errors) = WINDOWS_1257.encode("abc");
5489	match cow {
5490	Cow::Borrowed(s) => {
5491	assert_eq!(s, b"abc");
5492	}
5493	Cow::Owned(_) => unreachable!(),
5494	}
5495	assert_eq!(encoding, WINDOWS_1257);
5496	assert!(!had_errors);
5497	}
5498
5499	#[test]
5500	fn test_encode_valid_windows_1257_to_cow() {
5501	let (cow, encoding, had_errors) = WINDOWS_1257.encode("abc`\u{20AC}\u{00E4}`");
5502	match cow {
5503	Cow::Borrowed(_) => unreachable!(),
5504	Cow::Owned(s) => {
5505	assert_eq!(s, b"abc`\x80\xE4`");
5506	}
5507	}
5508	assert_eq!(encoding, WINDOWS_1257);
5509	assert!(!had_errors);
5510	}
5511
5512	#[test]
5513	fn test_utf16_space_with_one_bom_byte() {
5514	let mut decoder = UTF_16LE.new_decoder();
5515	let mut dst = [`0u16`; `12`];
5516	{
5517	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5518	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `false`);
5519	assert_eq!(result, CoderResult::InputEmpty);
5520	}
5521	{
5522	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5523	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `true`);
5524	assert_eq!(result, CoderResult::InputEmpty);
5525	}
5526	}
5527
5528	#[test]
5529	fn test_utf8_space_with_one_bom_byte() {
5530	let mut decoder = UTF_8.new_decoder();
5531	let mut dst = [`0u16`; `12`];
5532	{
5533	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5534	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `false`);
5535	assert_eq!(result, CoderResult::InputEmpty);
5536	}
5537	{
5538	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5539	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `true`);
5540	assert_eq!(result, CoderResult::InputEmpty);
5541	}
5542	}
5543
5544	#[test]
5545	fn test_utf16_space_with_two_bom_bytes() {
5546	let mut decoder = UTF_16LE.new_decoder();
5547	let mut dst = [`0u16`; `12`];
5548	{
5549	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5550	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xEF`", &mut dst[..needed], `false`);
5551	assert_eq!(result, CoderResult::InputEmpty);
5552	}
5553	{
5554	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5555	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xBB`", &mut dst[..needed], `false`);
5556	assert_eq!(result, CoderResult::InputEmpty);
5557	}
5558	{
5559	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5560	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `true`);
5561	assert_eq!(result, CoderResult::InputEmpty);
5562	}
5563	}
5564
5565	#[test]
5566	fn test_utf8_space_with_two_bom_bytes() {
5567	let mut decoder = UTF_8.new_decoder();
5568	let mut dst = [`0u16`; `12`];
5569	{
5570	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5571	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xEF`", &mut dst[..needed], `false`);
5572	assert_eq!(result, CoderResult::InputEmpty);
5573	}
5574	{
5575	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5576	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xBB`", &mut dst[..needed], `false`);
5577	assert_eq!(result, CoderResult::InputEmpty);
5578	}
5579	{
5580	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5581	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `true`);
5582	assert_eq!(result, CoderResult::InputEmpty);
5583	}
5584	}
5585
5586	#[test]
5587	fn test_utf16_space_with_one_bom_byte_and_a_second_byte_in_same_call() {
5588	let mut decoder = UTF_16LE.new_decoder();
5589	let mut dst = [`0u16`; `12`];
5590	{
5591	let needed = decoder.max_utf16_buffer_length(`2`).unwrap();
5592	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF\xFF`", &mut dst[..needed], `true`);
5593	assert_eq!(result, CoderResult::InputEmpty);
5594	}
5595	}
5596
5597	#[test]
5598	fn test_too_short_buffer_with_iso_2022_jp_ascii_from_utf8() {
5599	let mut dst = [`0u8`; `8`];
5600	let mut encoder = ISO_2022_JP.new_encoder();
5601	{
5602	let (result, _, _, _) = encoder.encode_from_utf8("", &mut dst[..], `false`);
5603	assert_eq!(result, CoderResult::InputEmpty);
5604	}
5605	{
5606	let (result, _, _, _) = encoder.encode_from_utf8("", &mut dst[..], `true`);
5607	assert_eq!(result, CoderResult::InputEmpty);
5608	}
5609	}
5610
5611	#[test]
5612	fn test_too_short_buffer_with_iso_2022_jp_roman_from_utf8() {
5613	let mut dst = [`0u8`; `16`];
5614	let mut encoder = ISO_2022_JP.new_encoder();
5615	{
5616	let (result, _, _, _) = encoder.encode_from_utf8("`\u{A5}`", &mut dst[..], `false`);
5617	assert_eq!(result, CoderResult::InputEmpty);
5618	}
5619	{
5620	let (result, _, _, _) = encoder.encode_from_utf8("", &mut dst[..`8`], `false`);
5621	assert_eq!(result, CoderResult::InputEmpty);
5622	}
5623	{
5624	let (result, _, _, _) = encoder.encode_from_utf8("", &mut dst[..`8`], `true`);
5625	assert_eq!(result, CoderResult::OutputFull);
5626	}
5627	}
5628
5629	#[test]
5630	fn test_buffer_end_iso_2022_jp_from_utf8() {
5631	let mut dst = [`0u8`; `18`];
5632	{
5633	let mut encoder = ISO_2022_JP.new_encoder();
5634	let (result, _, _, _) =
5635	encoder.encode_from_utf8("`\u{A5}\u{1F4A9}`", &mut dst[..], `false`);
5636	assert_eq!(result, CoderResult::InputEmpty);
5637	}
5638	{
5639	let mut encoder = ISO_2022_JP.new_encoder();
5640	let (result, _, _, _) = encoder.encode_from_utf8("`\u{A5}\u{1F4A9}`", &mut dst[..], `true`);
5641	assert_eq!(result, CoderResult::OutputFull);
5642	}
5643	{
5644	let mut encoder = ISO_2022_JP.new_encoder();
5645	let (result, _, _, _) = encoder.encode_from_utf8("`\u{1F4A9}`", &mut dst[..`13`], `false`);
5646	assert_eq!(result, CoderResult::InputEmpty);
5647	}
5648	{
5649	let mut encoder = ISO_2022_JP.new_encoder();
5650	let (result, _, _, _) = encoder.encode_from_utf8("`\u{1F4A9}`", &mut dst[..`13`], `true`);
5651	assert_eq!(result, CoderResult::InputEmpty);
5652	}
5653	}
5654
5655	#[test]
5656	fn test_too_short_buffer_with_iso_2022_jp_ascii_from_utf16() {
5657	let mut dst = [`0u8`; `8`];
5658	let mut encoder = ISO_2022_JP.new_encoder();
5659	{
5660	let (result, _, _, _) = encoder.encode_from_utf16(&[`0u16`; `0`], &mut dst[..], `false`);
5661	assert_eq!(result, CoderResult::InputEmpty);
5662	}
5663	{
5664	let (result, _, _, _) = encoder.encode_from_utf16(&[`0u16`; `0`], &mut dst[..], `true`);
5665	assert_eq!(result, CoderResult::InputEmpty);
5666	}
5667	}
5668
5669	#[test]
5670	fn test_too_short_buffer_with_iso_2022_jp_roman_from_utf16() {
5671	let mut dst = [`0u8`; `16`];
5672	let mut encoder = ISO_2022_JP.new_encoder();
5673	{
5674	let (result, _, _, _) = encoder.encode_from_utf16(&[`0xA5u16`], &mut dst[..], `false`);
5675	assert_eq!(result, CoderResult::InputEmpty);
5676	}
5677	{
5678	let (result, _, _, _) = encoder.encode_from_utf16(&[`0u16`; `0`], &mut dst[..`8`], `false`);
5679	assert_eq!(result, CoderResult::InputEmpty);
5680	}
5681	{
5682	let (result, _, _, _) = encoder.encode_from_utf16(&[`0u16`; `0`], &mut dst[..`8`], `true`);
5683	assert_eq!(result, CoderResult::OutputFull);
5684	}
5685	}
5686
5687	#[test]
5688	fn test_buffer_end_iso_2022_jp_from_utf16() {
5689	let mut dst = [`0u8`; `18`];
5690	{
5691	let mut encoder = ISO_2022_JP.new_encoder();
5692	let (result, _, _, _) =
5693	encoder.encode_from_utf16(&[`0xA5u16`, `0xD83Du16`, `0xDCA9u16`], &mut dst[..], `false`);
5694	assert_eq!(result, CoderResult::InputEmpty);
5695	}
5696	{
5697	let mut encoder = ISO_2022_JP.new_encoder();
5698	let (result, _, _, _) =
5699	encoder.encode_from_utf16(&[`0xA5u16`, `0xD83Du16`, `0xDCA9u16`], &mut dst[..], `true`);
5700	assert_eq!(result, CoderResult::OutputFull);
5701	}
5702	{
5703	let mut encoder = ISO_2022_JP.new_encoder();
5704	let (result, _, _, _) =
5705	encoder.encode_from_utf16(&[`0xD83Du16`, `0xDCA9u16`], &mut dst[..`13`], `false`);
5706	assert_eq!(result, CoderResult::InputEmpty);
5707	}
5708	{
5709	let mut encoder = ISO_2022_JP.new_encoder();
5710	let (result, _, _, _) =
5711	encoder.encode_from_utf16(&[`0xD83Du16`, `0xDCA9u16`], &mut dst[..`13`], `true`);
5712	assert_eq!(result, CoderResult::InputEmpty);
5713	}
5714	}
5715
5716	#[test]
5717	fn test_buffer_end_utf16be() {
5718	let mut decoder = UTF_16BE.new_decoder_without_bom_handling();
5719	let mut dest = [`0u8`; `4`];
5720
5721	assert_eq!(
5722	decoder.decode_to_utf8(&[`0xD8`, `0x00`], &mut dest, `false`),
5723	(CoderResult::InputEmpty, `2`, `0`, `false`)
5724	);
5725
5726	let _ = decoder.decode_to_utf8(&[`0xD8`, `0x00`], &mut dest, `true`);
5727	}
5728
5729	#[test]
5730	fn test_hash() {
5731	let mut encodings = ::alloc::collections::btree_set::BTreeSet::new();
5732	encodings.insert(UTF_8);
5733	encodings.insert(ISO_2022_JP);
5734	assert!(encodings.contains(UTF_8));
5735	assert!(encodings.contains(ISO_2022_JP));
5736	assert!(!encodings.contains(WINDOWS_1252));
5737	encodings.remove(ISO_2022_JP);
5738	assert!(!encodings.contains(ISO_2022_JP));
5739	}
5740
5741	#[test]
5742	fn test_iso_2022_jp_ncr_extra_from_utf16() {
5743	let mut dst = [`0u8`; `17`];
5744	{
5745	let mut encoder = ISO_2022_JP.new_encoder();
5746	let (result, _, _, _) =
5747	encoder.encode_from_utf16(&[`0x3041u16`, `0xFFFFu16`], &mut dst[..], `true`);
5748	assert_eq!(result, CoderResult::OutputFull);
5749	}
5750	}
5751
5752	#[test]
5753	fn test_iso_2022_jp_ncr_extra_from_utf8() {
5754	let mut dst = [`0u8`; `17`];
5755	{
5756	let mut encoder = ISO_2022_JP.new_encoder();
5757	let (result, _, _, _) =
5758	encoder.encode_from_utf8("`\u{3041}\u{FFFF}`", &mut dst[..], `true`);
5759	assert_eq!(result, CoderResult::OutputFull);
5760	}
5761	}
5762
5763	#[test]
5764	fn test_max_length_with_bom_to_utf8() {
5765	let mut output = [`0u8`; `20`];
5766	let mut decoder = REPLACEMENT.new_decoder();
5767	let input = b"`\xEF\xBB\xBF`A";
5768	{
5769	let needed = decoder
5770	.max_utf8_buffer_length_without_replacement(input.len())
5771	.unwrap();
5772	let (result, read, written) =
5773	decoder.decode_to_utf8_without_replacement(input, &mut output[..needed], `true`);
5774	assert_eq!(result, DecoderResult::InputEmpty);
5775	assert_eq!(read, input.len());
5776	assert_eq!(written, `1`);
5777	assert_eq!(output[`0`], `0x41`);
5778	}
5779	}
5780
5781	#[cfg(feature = "serde")]
5782	#[test]
5783	fn test_serde() {
5784	let demo = Demo {
5785	num: `42`,
5786	name: "foo".into(),
5787	enc: UTF_8,
5788	};
5789
5790	let serialized = serde_json::to_string(&demo).unwrap();
5791
5792	let deserialized: Demo = serde_json::from_str(&serialized).unwrap();
5793	assert_eq!(deserialized, demo);
5794
5795	let bincoded = bincode::serialize(&demo).unwrap();
5796	let debincoded: Demo = bincode::deserialize(&bincoded[..]).unwrap();
5797	assert_eq!(debincoded, demo);
5798	}
5799
5800	#[test]
5801	fn test_is_single_byte() {
5802	assert!(!BIG5.is_single_byte());
5803	assert!(!EUC_JP.is_single_byte());
5804	assert!(!EUC_KR.is_single_byte());
5805	assert!(!GB18030.is_single_byte());
5806	assert!(!GBK.is_single_byte());
5807	assert!(!REPLACEMENT.is_single_byte());
5808	assert!(!SHIFT_JIS.is_single_byte());
5809	assert!(!UTF_8.is_single_byte());
5810	assert!(!UTF_16BE.is_single_byte());
5811	assert!(!UTF_16LE.is_single_byte());
5812	assert!(!ISO_2022_JP.is_single_byte());
5813
5814	assert!(IBM866.is_single_byte());
5815	assert!(ISO_8859_2.is_single_byte());
5816	assert!(ISO_8859_3.is_single_byte());
5817	assert!(ISO_8859_4.is_single_byte());
5818	assert!(ISO_8859_5.is_single_byte());
5819	assert!(ISO_8859_6.is_single_byte());
5820	assert!(ISO_8859_7.is_single_byte());
5821	assert!(ISO_8859_8.is_single_byte());
5822	assert!(ISO_8859_10.is_single_byte());
5823	assert!(ISO_8859_13.is_single_byte());
5824	assert!(ISO_8859_14.is_single_byte());
5825	assert!(ISO_8859_15.is_single_byte());
5826	assert!(ISO_8859_16.is_single_byte());
5827	assert!(ISO_8859_8_I.is_single_byte());
5828	assert!(KOI8_R.is_single_byte());
5829	assert!(KOI8_U.is_single_byte());
5830	assert!(MACINTOSH.is_single_byte());
5831	assert!(WINDOWS_874.is_single_byte());
5832	assert!(WINDOWS_1250.is_single_byte());
5833	assert!(WINDOWS_1251.is_single_byte());
5834	assert!(WINDOWS_1252.is_single_byte());
5835	assert!(WINDOWS_1253.is_single_byte());
5836	assert!(WINDOWS_1254.is_single_byte());
5837	assert!(WINDOWS_1255.is_single_byte());
5838	assert!(WINDOWS_1256.is_single_byte());
5839	assert!(WINDOWS_1257.is_single_byte());
5840	assert!(WINDOWS_1258.is_single_byte());
5841	assert!(X_MAC_CYRILLIC.is_single_byte());
5842	assert!(X_USER_DEFINED.is_single_byte());
5843	}
5844
5845	#[test]
5846	fn test_latin1_byte_compatible_up_to() {
5847	let buffer = b"a`\x81\xB6\xF6\xF0\x82\xB4`";
5848	assert_eq!(
5849	BIG5.new_decoder_without_bom_handling()
5850	.latin1_byte_compatible_up_to(buffer)
5851	.unwrap(),
5852	`1`
5853	);
5854	assert_eq!(
5855	EUC_JP
5856	.new_decoder_without_bom_handling()
5857	.latin1_byte_compatible_up_to(buffer)
5858	.unwrap(),
5859	`1`
5860	);
5861	assert_eq!(
5862	EUC_KR
5863	.new_decoder_without_bom_handling()
5864	.latin1_byte_compatible_up_to(buffer)
5865	.unwrap(),
5866	`1`
5867	);
5868	assert_eq!(
5869	GB18030
5870	.new_decoder_without_bom_handling()
5871	.latin1_byte_compatible_up_to(buffer)
5872	.unwrap(),
5873	`1`
5874	);
5875	assert_eq!(
5876	GBK.new_decoder_without_bom_handling()
5877	.latin1_byte_compatible_up_to(buffer)
5878	.unwrap(),
5879	`1`
5880	);
5881	assert!(REPLACEMENT
5882	.new_decoder_without_bom_handling()
5883	.latin1_byte_compatible_up_to(buffer)
5884	.is_none());
5885	assert_eq!(
5886	SHIFT_JIS
5887	.new_decoder_without_bom_handling()
5888	.latin1_byte_compatible_up_to(buffer)
5889	.unwrap(),
5890	`1`
5891	);
5892	assert_eq!(
5893	UTF_8
5894	.new_decoder_without_bom_handling()
5895	.latin1_byte_compatible_up_to(buffer)
5896	.unwrap(),
5897	`1`
5898	);
5899	assert!(UTF_16BE
5900	.new_decoder_without_bom_handling()
5901	.latin1_byte_compatible_up_to(buffer)
5902	.is_none());
5903	assert!(UTF_16LE
5904	.new_decoder_without_bom_handling()
5905	.latin1_byte_compatible_up_to(buffer)
5906	.is_none());
5907	assert_eq!(
5908	ISO_2022_JP
5909	.new_decoder_without_bom_handling()
5910	.latin1_byte_compatible_up_to(buffer)
5911	.unwrap(),
5912	`1`
5913	);
5914
5915	assert_eq!(
5916	IBM866
5917	.new_decoder_without_bom_handling()
5918	.latin1_byte_compatible_up_to(buffer)
5919	.unwrap(),
5920	`1`
5921	);
5922	assert_eq!(
5923	ISO_8859_2
5924	.new_decoder_without_bom_handling()
5925	.latin1_byte_compatible_up_to(buffer)
5926	.unwrap(),
5927	`2`
5928	);
5929	assert_eq!(
5930	ISO_8859_3
5931	.new_decoder_without_bom_handling()
5932	.latin1_byte_compatible_up_to(buffer)
5933	.unwrap(),
5934	`2`
5935	);
5936	assert_eq!(
5937	ISO_8859_4
5938	.new_decoder_without_bom_handling()
5939	.latin1_byte_compatible_up_to(buffer)
5940	.unwrap(),
5941	`2`
5942	);
5943	assert_eq!(
5944	ISO_8859_5
5945	.new_decoder_without_bom_handling()
5946	.latin1_byte_compatible_up_to(buffer)
5947	.unwrap(),
5948	`2`
5949	);
5950	assert_eq!(
5951	ISO_8859_6
5952	.new_decoder_without_bom_handling()
5953	.latin1_byte_compatible_up_to(buffer)
5954	.unwrap(),
5955	`2`
5956	);
5957	assert_eq!(
5958	ISO_8859_7
5959	.new_decoder_without_bom_handling()
5960	.latin1_byte_compatible_up_to(buffer)
5961	.unwrap(),
5962	`2`
5963	);
5964	assert_eq!(
5965	ISO_8859_8
5966	.new_decoder_without_bom_handling()
5967	.latin1_byte_compatible_up_to(buffer)
5968	.unwrap(),
5969	`3`
5970	);
5971	assert_eq!(
5972	ISO_8859_10
5973	.new_decoder_without_bom_handling()
5974	.latin1_byte_compatible_up_to(buffer)
5975	.unwrap(),
5976	`2`
5977	);
5978	assert_eq!(
5979	ISO_8859_13
5980	.new_decoder_without_bom_handling()
5981	.latin1_byte_compatible_up_to(buffer)
5982	.unwrap(),
5983	`4`
5984	);
5985	assert_eq!(
5986	ISO_8859_14
5987	.new_decoder_without_bom_handling()
5988	.latin1_byte_compatible_up_to(buffer)
5989	.unwrap(),
5990	`4`
5991	);
5992	assert_eq!(
5993	ISO_8859_15
5994	.new_decoder_without_bom_handling()
5995	.latin1_byte_compatible_up_to(buffer)
5996	.unwrap(),
5997	`6`
5998	);
5999	assert_eq!(
6000	ISO_8859_16
6001	.new_decoder_without_bom_handling()
6002	.latin1_byte_compatible_up_to(buffer)
6003	.unwrap(),
6004	`4`
6005	);
6006	assert_eq!(
6007	ISO_8859_8_I
6008	.new_decoder_without_bom_handling()
6009	.latin1_byte_compatible_up_to(buffer)
6010	.unwrap(),
6011	`3`
6012	);
6013	assert_eq!(
6014	KOI8_R
6015	.new_decoder_without_bom_handling()
6016	.latin1_byte_compatible_up_to(buffer)
6017	.unwrap(),
6018	`1`
6019	);
6020	assert_eq!(
6021	KOI8_U
6022	.new_decoder_without_bom_handling()
6023	.latin1_byte_compatible_up_to(buffer)
6024	.unwrap(),
6025	`1`
6026	);
6027	assert_eq!(
6028	MACINTOSH
6029	.new_decoder_without_bom_handling()
6030	.latin1_byte_compatible_up_to(buffer)
6031	.unwrap(),
6032	`1`
6033	);
6034	assert_eq!(
6035	WINDOWS_874
6036	.new_decoder_without_bom_handling()
6037	.latin1_byte_compatible_up_to(buffer)
6038	.unwrap(),
6039	`2`
6040	);
6041	assert_eq!(
6042	WINDOWS_1250
6043	.new_decoder_without_bom_handling()
6044	.latin1_byte_compatible_up_to(buffer)
6045	.unwrap(),
6046	`4`
6047	);
6048	assert_eq!(
6049	WINDOWS_1251
6050	.new_decoder_without_bom_handling()
6051	.latin1_byte_compatible_up_to(buffer)
6052	.unwrap(),
6053	`1`
6054	);
6055	assert_eq!(
6056	WINDOWS_1252
6057	.new_decoder_without_bom_handling()
6058	.latin1_byte_compatible_up_to(buffer)
6059	.unwrap(),
6060	`5`
6061	);
6062	assert_eq!(
6063	WINDOWS_1253
6064	.new_decoder_without_bom_handling()
6065	.latin1_byte_compatible_up_to(buffer)
6066	.unwrap(),
6067	`3`
6068	);
6069	assert_eq!(
6070	WINDOWS_1254
6071	.new_decoder_without_bom_handling()
6072	.latin1_byte_compatible_up_to(buffer)
6073	.unwrap(),
6074	`4`
6075	);
6076	assert_eq!(
6077	WINDOWS_1255
6078	.new_decoder_without_bom_handling()
6079	.latin1_byte_compatible_up_to(buffer)
6080	.unwrap(),
6081	`3`
6082	);
6083	assert_eq!(
6084	WINDOWS_1256
6085	.new_decoder_without_bom_handling()
6086	.latin1_byte_compatible_up_to(buffer)
6087	.unwrap(),
6088	`1`
6089	);
6090	assert_eq!(
6091	WINDOWS_1257
6092	.new_decoder_without_bom_handling()
6093	.latin1_byte_compatible_up_to(buffer)
6094	.unwrap(),
6095	`4`
6096	);
6097	assert_eq!(
6098	WINDOWS_1258
6099	.new_decoder_without_bom_handling()
6100	.latin1_byte_compatible_up_to(buffer)
6101	.unwrap(),
6102	`4`
6103	);
6104	assert_eq!(
6105	X_MAC_CYRILLIC
6106	.new_decoder_without_bom_handling()
6107	.latin1_byte_compatible_up_to(buffer)
6108	.unwrap(),
6109	`1`
6110	);
6111	assert_eq!(
6112	X_USER_DEFINED
6113	.new_decoder_without_bom_handling()
6114	.latin1_byte_compatible_up_to(buffer)
6115	.unwrap(),
6116	`1`
6117	);
6118
6119	assert!(UTF_8
6120	.new_decoder()
6121	.latin1_byte_compatible_up_to(buffer)
6122	.is_none());
6123
6124	let mut decoder = UTF_8.new_decoder();
6125	let mut output = [`0u16`; `4`];
6126	let _ = decoder.decode_to_utf16(b"`\xEF`", &mut output, `false`);
6127	assert!(decoder.latin1_byte_compatible_up_to(buffer).is_none());
6128	let _ = decoder.decode_to_utf16(b"`\xBB\xBF`", &mut output, `false`);
6129	assert_eq!(decoder.latin1_byte_compatible_up_to(buffer), Some(`1`));
6130	let _ = decoder.decode_to_utf16(b"`\xEF`", &mut output, `false`);
6131	assert_eq!(decoder.latin1_byte_compatible_up_to(buffer), None);
6132	}
6133	}
6134