lib.rs source code [crates/encoding_rs/src/lib.rs]

1	// Copyright Mozilla Foundation. See the COPYRIGHT
2	// file at the top-level directory of this distribution.
3	//
4	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5	// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6	// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
7	// option. This file may not be copied, modified, or distributed
8	// except according to those terms.
9
10	// The above license applies to code in this file. The label data in
11	// this file is generated from WHATWG's encodings.json, which came under
12	// the following license:
13
14	// Copyright © WHATWG (Apple, Google, Mozilla, Microsoft).
15	//
16	// Redistribution and use in source and binary forms, with or without
17	// modification, are permitted provided that the following conditions are met:
18	//
19	// 1. Redistributions of source code must retain the above copyright notice, this
20	// list of conditions and the following disclaimer.
21	//
22	// 2. Redistributions in binary form must reproduce the above copyright notice,
23	// this list of conditions and the following disclaimer in the documentation
24	// and/or other materials provided with the distribution.
25	//
26	// 3. Neither the name of the copyright holder nor the names of its
27	// contributors may be used to endorse or promote products derived from
28	// this software without specific prior written permission.
29	//
30	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
33	// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
34	// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35	// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
36	// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
37	// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
38	// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40
41	#![cfg_attr(
42	feature = "cargo-clippy",
43	allow(doc_markdown, inline_always, new_ret_no_self)
44	)]
45
46	//! encoding_rs is a Gecko-oriented Free Software / Open Source implementation
47	//! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust.
48	//! Gecko-oriented means that converting to and from UTF-16 is supported in
49	//! addition to converting to and from UTF-8, that the performance and
50	//! streamability goals are browser-oriented, and that FFI-friendliness is a
51	//! goal.
52	//!
53	//! Additionally, the `mem` module provides functions that are useful for
54	//! applications that need to be able to deal with legacy in-memory
55	//! representations of Unicode.
56	//!
57	//! For expectation setting, please be sure to read the sections
58	//! [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes),
59	//! [_ISO-8859-1_](#iso-8859-1) and [_Web / Browser Focus_](#web--browser-focus) below.
60	//!
61	//! There is a [long-form write-up](https://hsivonen.fi/encoding_rs/) about the
62	//! design and internals of the crate.
63	//!
64	//! # Availability
65	//!
66	//! The code is available under the
67	//! [Apache license, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0)
68	//! or the [MIT license](https://opensource.org/licenses/MIT), at your option.
69	//! See the
70	//! [`COPYRIGHT`](https://github.com/hsivonen/encoding_rs/blob/master/COPYRIGHT)
71	//! file for details.
72	//! The [repository is on GitHub](https://github.com/hsivonen/encoding_rs). The
73	//! [crate is available on crates.io](https://crates.io/crates/encoding_rs).
74	//!
75	//! # Integration with `std::io`
76	//!
77	//! This crate doesn't implement traits from `std::io`. However, for the case of
78	//! wrapping a `std::io::Read` in a decoder that implements `std::io::Read` and
79	//! presents the data from the wrapped `std::io::Read` as UTF-8 is addressed by
80	//! the [`encoding_rs_io`](https://docs.rs/encoding_rs_io/) crate.
81	//!
82	//! # Examples
83	//!
84	//! Example programs:
85	//!
86	//! [Rust](https://github.com/hsivonen/recode_rs)*
87	//! [C](https://github.com/hsivonen/recode_c)*
88	//! [C++](https://github.com/hsivonen/recode_cpp)*
89	//!
90	//! Decode using the non-streaming API:
91	//!
92	//! ```
93	//! #[cfg(feature = "alloc")] {
94	//! use encoding_rs::*;
95	//!
96	//! let expectation = "`\u{30CF}\u{30ED}\u{30FC}\u{30FB}\u{30EF}\u{30FC}\u{30EB}\u{30C9}`";
97	//! let bytes = b"`\x83`n`\x83\x8D\x81`[`\x81`E`\x83\x8F\x81`[`\x83\x8B\x83`h";
98	//!
99	//! let (cow, encoding_used, had_errors) = SHIFT_JIS.decode(bytes);
100	//! assert_eq!(&cow[..], expectation);
101	//! assert_eq!(encoding_used, SHIFT_JIS);
102	//! assert!(!had_errors);
103	//! }
104	//! ```
105	//!
106	//! Decode using the streaming API with minimal `unsafe`:
107	//!
108	//! ```
109	//! use encoding_rs::*;
110	//!
111	//! let expectation = "`\u{30CF}\u{30ED}\u{30FC}\u{30FB}\u{30EF}\u{30FC}\u{30EB}\u{30C9}`";
112	//!
113	//! // Use an array of byte slices to demonstrate content arriving piece by
114	//! // piece from the network.
115	//! let bytes: [&'static [u8]; `4`] = [b"`\x83`",
116	//! b"n`\x83\x8D\x81`",
117	//! b"[`\x81`E`\x83\x8F\x81`[`\x83`",
118	//! b"`\x8B\x83`h"];
119	//!
120	//! // Very short output buffer to demonstrate the output buffer getting full.
121	//! // Normally, you'd use something like `[0u8; 2048]`.
122	//! let mut buffer_bytes = [`0u8`; `8`];
123	//! let mut buffer: &mut str = std::str::from_utf8_mut(&mut buffer_bytes[..]).unwrap();
124	//!
125	//! // How many bytes in the buffer currently hold significant data.
126	//! let mut bytes_in_buffer = `0usize`;
127	//!
128	//! // Collect the output to a string for demonstration purposes.
129	//! let mut output = String::new();
130	//!
131	//! // The `Decoder`
132	//! let mut decoder = SHIFT_JIS.new_decoder();
133	//!
134	//! // Track whether we see errors.
135	//! let mut total_had_errors = `false`;
136	//!
137	//! // Decode using a fixed-size intermediate buffer (for demonstrating the
138	//! // use of a fixed-size buffer; normally when the output of an incremental
139	//! // decode goes to a `String` one would use `Decoder.decode_to_string()` to
140	//! // avoid the intermediate buffer).
141	//! for input in &bytes[..] {
142	//! // The number of bytes already read from current `input` in total.
143	//! let mut total_read_from_current_input = `0usize`;
144	//!
145	//! loop {
146	//! let (result, read, written, had_errors) =
147	//! decoder.decode_to_str(&input[total_read_from_current_input..],
148	//! &mut buffer[bytes_in_buffer..],
149	//! `false`);
150	//! total_read_from_current_input += read;
151	//! bytes_in_buffer += written;
152	//! total_had_errors \|= had_errors;
153	//! match result {
154	//! CoderResult::InputEmpty => {
155	//! // We have consumed the current input buffer. Break out of
156	//! // the inner loop to get the next input buffer from the
157	//! // outer loop.
158	//! break;
159	//! },
160	//! CoderResult::OutputFull => {
161	//! // Write the current buffer out and consider the buffer
162	//! // empty.
163	//! output.push_str(&buffer[..bytes_in_buffer]);
164	//! bytes_in_buffer = `0usize`;
165	//! continue;
166	//! }
167	//! }
168	//! }
169	//! }
170	//!
171	//! // Process EOF
172	//! loop {
173	//! let (result, _, written, had_errors) =
174	//! decoder.decode_to_str(b"",
175	//! &mut buffer[bytes_in_buffer..],
176	//! `true`);
177	//! bytes_in_buffer += written;
178	//! total_had_errors \|= had_errors;
179	//! // Write the current buffer out and consider the buffer empty.
180	//! // Need to do this here for both `match` arms, because we exit the
181	//! // loop on `CoderResult::InputEmpty`.
182	//! output.push_str(&buffer[..bytes_in_buffer]);
183	//! bytes_in_buffer = `0usize`;
184	//! match result {
185	//! CoderResult::InputEmpty => {
186	//! // Done!
187	//! break;
188	//! },
189	//! CoderResult::OutputFull => {
190	//! continue;
191	//! }
192	//! }
193	//! }
194	//!
195	//! assert_eq!(&output[..], expectation);
196	//! assert!(!total_had_errors);
197	//! ```
198	//!
199	//! ## UTF-16LE, UTF-16BE and Unicode Encoding Schemes
200	//!
201	//! The Encoding Standard doesn't specify encoders for UTF-16LE and UTF-16BE,
202	//! __so this crate does not provide encoders for those encodings__!
203	//! Along with the replacement encoding, their _output encoding_ (i.e. the
204	//! encoding used for form submission and error handling in the query string
205	//! of URLs) is UTF-8, so you get an UTF-8 encoder if you request an encoder
206	//! for them.
207	//!
208	//! Additionally, the Encoding Standard factors BOM handling into wrapper
209	//! algorithms so that BOM handling isn't part of the definition of the
210	//! encodings themselves. The Unicode _encoding schemes_ in the Unicode
211	//! Standard define BOM handling or lack thereof as part of the encoding
212	//! scheme.
213	//!
214	//! When used with the `_without_bom_handling` entry points, the UTF-16LE
215	//! and UTF-16BE _encodings_ match the same-named _encoding schemes_ from
216	//! the Unicode Standard.
217	//!
218	//! When used with the `_with_bom_removal` entry points, the UTF-8
219	//! _encoding_ matches the UTF-8 _encoding scheme_ from the Unicode
220	//! Standard.
221	//!
222	//! This crate does not provide a mode that matches the UTF-16 _encoding
223	//! scheme_ from the Unicode Stardard. The UTF-16BE encoding used with
224	//! the entry points without `_bom_` qualifiers is the closest match,
225	//! but in that case, the UTF-8 BOM triggers UTF-8 decoding, which is
226	//! not part of the behavior of the UTF-16 _encoding scheme_ per the
227	//! Unicode Standard.
228	//!
229	//! The UTF-32 family of Unicode encoding schemes is not supported
230	//! by this crate. The Encoding Standard doesn't define any UTF-32
231	//! family encodings, since they aren't necessary for consuming Web
232	//! content.
233	//!
234	//! While gb18030 is capable of representing U+FEFF, the Encoding
235	//! Standard does not treat the gb18030 byte representation of U+FEFF
236	//! as a BOM, so neither does this crate.
237	//!
238	//! ## ISO-8859-1
239	//!
240	//! ISO-8859-1 does not exist as a distinct encoding from windows-1252 in
241	//! the Encoding Standard. Therefore, an encoding that maps the unsigned
242	//! byte value to the same Unicode scalar value is not available via
243	//! `Encoding` in this crate.
244	//!
245	//! However, the functions whose name starts with `convert` and contains
246	//! `latin1` in the `mem` module support such conversions, which are known as
247	//! [_isomorphic decode_](https://infra.spec.whatwg.org/#isomorphic-decode)
248	//! and [_isomorphic encode_](https://infra.spec.whatwg.org/#isomorphic-encode)
249	//! in the [Infra Standard](https://infra.spec.whatwg.org/).
250	//!
251	//! ## Web / Browser Focus
252	//!
253	//! Both in terms of scope and performance, the focus is on the Web. For scope,
254	//! this means that encoding_rs implements the Encoding Standard fully and
255	//! doesn't implement encodings that are not specified in the Encoding
256	//! Standard. For performance, this means that decoding performance is
257	//! important as well as performance for encoding into UTF-8 or encoding the
258	//! Basic Latin range (ASCII) into legacy encodings. Non-Basic Latin needs to
259	//! be encoded into legacy encodings in only two places in the Web platform: in
260	//! the query part of URLs, in which case it's a matter of relatively rare
261	//! error handling, and in form submission, in which case the user action and
262	//! networking tend to hide the performance of the encoder.
263	//!
264	//! Deemphasizing performance of encoding non-Basic Latin text into legacy
265	//! encodings enables smaller code size thanks to the encoder side using the
266	//! decode-optimized data tables without having encode-optimized data tables at
267	//! all. Even in decoders, smaller lookup table size is preferred over avoiding
268	//! multiplication operations.
269	//!
270	//! Additionally, performance is a non-goal for the ASCII-incompatible
271	//! ISO-2022-JP encoding, which are rarely used on the Web. Instead of
272	//! performance, the decoder for ISO-2022-JP optimizes for ease/clarity
273	//! of implementation.
274	//!
275	//! Despite the browser focus, the hope is that non-browser applications
276	//! that wish to consume Web content or submit Web forms in a Web-compatible
277	//! way will find encoding_rs useful. While encoding_rs does not try to match
278	//! Windows behavior, many of the encodings are close enough to legacy
279	//! encodings implemented by Windows that applications that need to consume
280	//! data in legacy Windows encodins may find encoding_rs useful. The
281	//! [codepage](https://crates.io/crates/codepage) crate maps from Windows
282	//! code page identifiers onto encoding_rs `Encoding`s and vice versa.
283	//!
284	//! For decoding email, UTF-7 support is needed (unfortunately) in additition
285	//! to the encodings defined in the Encoding Standard. The
286	//! [charset](https://crates.io/crates/charset) wraps encoding_rs and adds
287	//! UTF-7 decoding for email purposes.
288	//!
289	//! For single-byte DOS encodings beyond the ones supported by the Encoding
290	//! Standard, there is the [`oem_cp`](https://crates.io/crates/oem_cp) crate.
291	//!
292	//! # Preparing Text for the Encoders
293	//!
294	//! Normalizing text into Unicode Normalization Form C prior to encoding text
295	//! into a legacy encoding minimizes unmappable characters. Text can be
296	//! normalized to Unicode Normalization Form C using the
297	//! [`icu_normalizer`](https://crates.io/crates/icu_normalizer) crate, which
298	//! is part of [ICU4X](https://icu4x.unicode.org/).
299	//!
300	//! The exception is windows-1258, which after normalizing to Unicode
301	//! Normalization Form C requires tone marks to be decomposed in order to
302	//! minimize unmappable characters. Vietnamese tone marks can be decomposed
303	//! using the [`detone`](https://crates.io/crates/detone) crate.
304	//!
305	//! # Streaming & Non-Streaming; Rust & C/C++
306	//!
307	//! The API in Rust has two modes of operation: streaming and non-streaming.
308	//! The streaming API is the foundation of the implementation and should be
309	//! used when processing data that arrives piecemeal from an i/o stream. The
310	//! streaming API has an FFI wrapper (as a [separate crate][1]) that exposes it
311	//! to C callers. The non-streaming part of the API is for Rust callers only and
312	//! is smart about borrowing instead of copying when possible. When
313	//! streamability is not needed, the non-streaming API should be preferrer in
314	//! order to avoid copying data when a borrow suffices.
315	//!
316	//! There is no analogous C API exposed via FFI, mainly because C doesn't have
317	//! standard types for growable byte buffers and Unicode strings that know
318	//! their length.
319	//!
320	//! The C API (header file generated at `target/include/encoding_rs.h` when
321	//! building encoding_rs) can, in turn, be wrapped for use from C++. Such a
322	//! C++ wrapper can re-create the non-streaming API in C++ for C++ callers.
323	//! The C binding comes with a [C++17 wrapper][2] that uses standard library +
324	//! [GSL][3] types and that recreates the non-streaming API in C++ on top of
325	//! the streaming API. A C++ wrapper with XPCOM/MFBT types is available as
326	//! [`mozilla::Encoding`][4].
327	//!
328	//! The `Encoding` type is common to both the streaming and non-streaming
329	//! modes. In the streaming mode, decoding operations are performed with a
330	//! `Decoder` and encoding operations with an `Encoder` object obtained via
331	//! `Encoding`. In the non-streaming mode, decoding and encoding operations are
332	//! performed using methods on `Encoding` objects themselves, so the `Decoder`
333	//! and `Encoder` objects are not used at all.
334	//!
335	//! [1]: https://github.com/hsivonen/encoding_c
336	//! [2]: https://github.com/hsivonen/encoding_c/blob/master/include/encoding_rs_cpp.h
337	//! [3]: https://github.com/Microsoft/GSL/
338	//! [4]: https://searchfox.org/mozilla-central/source/intl/Encoding.h
339	//!
340	//! # Memory management
341	//!
342	//! The non-streaming mode never performs heap allocations (even the methods
343	//! that write into a `Vec<u8>` or a `String` by taking them as arguments do
344	//! not reallocate the backing buffer of the `Vec<u8>` or the `String`). That
345	//! is, the non-streaming mode uses caller-allocated buffers exclusively.
346	//!
347	//! The methods of the streaming mode that return a `Vec<u8>` or a `String`
348	//! perform heap allocations but only to allocate the backing buffer of the
349	//! `Vec<u8>` or the `String`.
350	//!
351	//! `Encoding` is always statically allocated. `Decoder` and `Encoder` need no
352	//! `Drop` cleanup.
353	//!
354	//! # Buffer reading and writing behavior
355	//!
356	//! Based on experience gained with the `java.nio.charset` encoding converter
357	//! API and with the Gecko uconv encoding converter API, the buffer reading
358	//! and writing behaviors of encoding_rs are asymmetric: input buffers are
359	//! fully drained but output buffers are not always fully filled.
360	//!
361	//! When reading from an input buffer, encoding_rs always consumes all input
362	//! up to the next error or to the end of the buffer. In particular, when
363	//! decoding, even if the input buffer ends in the middle of a byte sequence
364	//! for a character, the decoder consumes all input. This has the benefit that
365	//! the caller of the API can always fill the next buffer from the start from
366	//! whatever source the bytes come from and never has to first copy the last
367	//! bytes of the previous buffer to the start of the next buffer. However, when
368	//! encoding, the UTF-8 input buffers have to end at a character boundary, which
369	//! is a requirement for the Rust `str` type anyway, and UTF-16 input buffer
370	//! boundaries falling in the middle of a surrogate pair result in both
371	//! suggorates being treated individually as unpaired surrogates.
372	//!
373	//! Additionally, decoders guarantee that they can be fed even one byte at a
374	//! time and encoders guarantee that they can be fed even one code point at a
375	//! time. This has the benefit of not placing restrictions on the size of
376	//! chunks the content arrives e.g. from network.
377	//!
378	//! When writing into an output buffer, encoding_rs makes sure that the code
379	//! unit sequence for a character is never split across output buffer
380	//! boundaries. This may result in wasted space at the end of an output buffer,
381	//! but the advantages are that the output side of both decoders and encoders
382	//! is greatly simplified compared to designs that attempt to fill output
383	//! buffers exactly even when that entails splitting a code unit sequence and
384	//! when encoding_rs methods return to the caller, the output produces thus
385	//! far is always valid taken as whole. (In the case of encoding to ISO-2022-JP,
386	//! the output needs to be considered as a whole, because the latest output
387	//! buffer taken alone might not be valid taken alone if the transition away
388	//! from the ASCII state occurred in an earlier output buffer. However, since
389	//! the ISO-2022-JP decoder doesn't treat streams that don't end in the ASCII
390	//! state as being in error despite the encoder generating a transition to the
391	//! ASCII state at the end, the claim about the partial output taken as a whole
392	//! being valid is true even for ISO-2022-JP.)
393	//!
394	//! # Error Reporting
395	//!
396	//! Based on experience gained with the `java.nio.charset` encoding converter
397	//! API and with the Gecko uconv encoding converter API, the error reporting
398	//! behaviors of encoding_rs are asymmetric: decoder errors include offsets
399	//! that leave it up to the caller to extract the erroneous bytes from the
400	//! input stream if the caller wishes to do so but encoder errors provide the
401	//! code point associated with the error without requiring the caller to
402	//! extract it from the input on its own.
403	//!
404	//! On the encoder side, an error is always triggered by the most recently
405	//! pushed Unicode scalar, which makes it simple to pass the `char` to the
406	//! caller. Also, it's very typical for the caller to wish to do something with
407	//! this data: generate a numeric escape for the character. Additionally, the
408	//! ISO-2022-JP encoder reports U+FFFD instead of the actual input character in
409	//! certain cases, so requiring the caller to extract the character from the
410	//! input buffer would require the caller to handle ISO-2022-JP details.
411	//! Furthermore, requiring the caller to extract the character from the input
412	//! buffer would require the caller to implement UTF-8 or UTF-16 math, which is
413	//! the job of an encoding conversion library.
414	//!
415	//! On the decoder side, errors are triggered in more complex ways. For
416	//! example, when decoding the sequence ESC, '$', _buffer boundary_, 'A' as
417	//! ISO-2022-JP, the ESC byte is in error, but this is discovered only after
418	//! the buffer boundary when processing 'A'. Thus, the bytes in error might not
419	//! be the ones most recently pushed to the decoder and the error might not even
420	//! be in the current buffer.
421	//!
422	//! Some encoding conversion APIs address the problem by not acknowledging
423	//! trailing bytes of an input buffer as consumed if it's still possible for
424	//! future bytes to cause the trailing bytes to be in error. This way, error
425	//! reporting can always refer to the most recently pushed buffer. This has the
426	//! problem that the caller of the API has to copy the unconsumed trailing
427	//! bytes to the start of the next buffer before being able to fill the rest
428	//! of the next buffer. This is annoying, error-prone and inefficient.
429	//!
430	//! A possible solution would be making the decoder remember recently consumed
431	//! bytes in order to be able to include a copy of the erroneous bytes when
432	//! reporting an error. This has two problem: First, callers a rarely
433	//! interested in the erroneous bytes, so attempts to identify them are most
434	//! often just overhead anyway. Second, the rare applications that are
435	//! interested typically care about the location of the error in the input
436	//! stream.
437	//!
438	//! To keep the API convenient for common uses and the overhead low while making
439	//! it possible to develop applications, such as HTML validators, that care
440	//! about which bytes were in error, encoding_rs reports the length of the
441	//! erroneous sequence and the number of bytes consumed after the erroneous
442	//! sequence. As long as the caller doesn't discard the 6 most recent bytes,
443	//! this makes it possible for callers that care about the erroneous bytes to
444	//! locate them.
445	//!
446	//! # No Convenience API for Custom Replacements
447	//!
448	//! The Web Platform and, therefore, the Encoding Standard supports only one
449	//! error recovery mode for decoders and only one error recovery mode for
450	//! encoders. The supported error recovery mode for decoders is emitting the
451	//! REPLACEMENT CHARACTER on error. The supported error recovery mode for
452	//! encoders is emitting an HTML decimal numeric character reference for
453	//! unmappable characters.
454	//!
455	//! Since encoding_rs is Web-focused, these are the only error recovery modes
456	//! for which convenient support is provided. Moreover, on the decoder side,
457	//! there aren't really good alternatives for emitting the REPLACEMENT CHARACTER
458	//! on error (other than treating errors as fatal). In particular, simply
459	//! ignoring errors is a
460	//! [security problem](http://www.unicode.org/reports/tr36/#Substituting_for_Ill_Formed_Subsequences),
461	//! so it would be a bad idea for encoding_rs to provide a mode that encouraged
462	//! callers to ignore errors.
463	//!
464	//! On the encoder side, there are plausible alternatives for HTML decimal
465	//! numeric character references. For example, when outputting CSS, CSS-style
466	//! escapes would seem to make sense. However, instead of facilitating the
467	//! output of CSS, JS, etc. in non-UTF-8 encodings, encoding_rs takes the design
468	//! position that you shouldn't generate output in encodings other than UTF-8,
469	//! except where backward compatibility with interacting with the legacy Web
470	//! requires it. The legacy Web requires it only when parsing the query strings
471	//! of URLs and when submitting forms, and those two both use HTML decimal
472	//! numeric character references.
473	//!
474	//! While encoding_rs doesn't make encoder replacements other than HTML decimal
475	//! numeric character references easy, it does make them _possible_.
476	//! `encode_from_utf8()`, which emits HTML decimal numeric character references
477	//! for unmappable characters, is implemented on top of
478	//! `encode_from_utf8_without_replacement()`. Applications that really, really
479	//! want other replacement schemes for unmappable characters can likewise
480	//! implement them on top of `encode_from_utf8_without_replacement()`.
481	//!
482	//! # No Extensibility by Design
483	//!
484	//! The set of encodings supported by encoding_rs is not extensible by design.
485	//! That is, `Encoding`, `Decoder` and `Encoder` are intentionally `struct`s
486	//! rather than `trait`s. encoding_rs takes the design position that all future
487	//! text interchange should be done using UTF-8, which can represent all of
488	//! Unicode. (It is, in fact, the only encoding supported by the Encoding
489	//! Standard and encoding_rs that can represent all of Unicode and that has
490	//! encoder support. UTF-16LE and UTF-16BE don't have encoder support, and
491	//! gb18030 cannot encode U+E5E5.) The other encodings are supported merely for
492	//! legacy compatibility and not due to non-UTF-8 encodings having benefits
493	//! other than being able to consume legacy content.
494	//!
495	//! Considering that UTF-8 can represent all of Unicode and is already supported
496	//! by all Web browsers, introducing a new encoding wouldn't add to the
497	//! expressiveness but would add to compatibility problems. In that sense,
498	//! adding new encodings to the Web Platform doesn't make sense, and, in fact,
499	//! post-UTF-8 attempts at encodings, such as BOCU-1, have been rejected from
500	//! the Web Platform. On the other hand, the set of legacy encodings that must
501	//! be supported for a Web browser to be able to be successful is not going to
502	//! expand. Empirically, the set of encodings specified in the Encoding Standard
503	//! is already sufficient and the set of legacy encodings won't grow
504	//! retroactively.
505	//!
506	//! Since extensibility doesn't make sense considering the Web focus of
507	//! encoding_rs and adding encodings to Web clients would be actively harmful,
508	//! it makes sense to make the set of encodings that encoding_rs supports
509	//! non-extensible and to take the (admittedly small) benefits arising from
510	//! that, such as the size of `Decoder` and `Encoder` objects being known ahead
511	//! of time, which enables stack allocation thereof.
512	//!
513	//! This does have downsides for applications that might want to put encoding_rs
514	//! to non-Web uses if those non-Web uses involve legacy encodings that aren't
515	//! needed for Web uses. The needs of such applications should not complicate
516	//! encoding_rs itself, though. It is up to those applications to provide a
517	//! framework that delegates the operations with encodings that encoding_rs
518	//! supports to encoding_rs and operations with other encodings to something
519	//! else (as opposed to encoding_rs itself providing an extensibility
520	//! framework).
521	//!
522	//! # Panics
523	//!
524	//! Methods in encoding_rs can panic if the API is used against the requirements
525	//! stated in the documentation, if a state that's supposed to be impossible
526	//! is reached due to an internal bug or on integer overflow. When used
527	//! according to documentation with buffer sizes that stay below integer
528	//! overflow, in the absence of internal bugs, encoding_rs does not panic.
529	//!
530	//! Panics arising from API misuse aren't documented beyond this on individual
531	//! methods.
532	//!
533	//! # At-Risk Parts of the API
534	//!
535	//! The foreseeable source of partially backward-incompatible API change is the
536	//! way the instances of `Encoding` are made available.
537	//!
538	//! If Rust changes to allow the entries of `[&'static Encoding; N]` to be
539	//! initialized with `static`s of type `&'static Encoding`, the non-reference
540	//! `FOO_INIT` public `Encoding` instances will be removed from the public API.
541	//!
542	//! If Rust changes to make the referent of `pub const FOO: &'static Encoding`
543	//! unique when the constant is used in different crates, the reference-typed
544	//! `static`s for the encoding instances will be changed from `static` to
545	//! `const` and the non-reference-typed `_INIT` instances will be removed.
546	//!
547	//! # Mapping Spec Concepts onto the API
548	//!
549	//! <table>
550	//! <thead>
551	//! <tr><th>Spec Concept</th><th>Streaming</th><th>Non-Streaming</th></tr>
552	//! </thead>
553	//! <tbody>
554	//! <tr><td><a href="https://encoding.spec.whatwg.org/#encoding">encoding</a></td><td><code>&'static Encoding</code></td><td><code>&'static Encoding</code></td></tr>
555	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8">UTF-8 encoding</a></td><td><code>UTF_8</code></td><td><code>UTF_8</code></td></tr>
556	//! <tr><td><a href="https://encoding.spec.whatwg.org/#concept-encoding-get">get an encoding</a></td><td><code>Encoding::for_label(<var>label</var>)</code></td><td><code>Encoding::for_label(<var>label</var>)</code></td></tr>
557	//! <tr><td><a href="https://encoding.spec.whatwg.org/#name">name</a></td><td><code><var>encoding</var>.name()</code></td><td><code><var>encoding</var>.name()</code></td></tr>
558	//! <tr><td><a href="https://encoding.spec.whatwg.org/#get-an-output-encoding">get an output encoding</a></td><td><code><var>encoding</var>.output_encoding()</code></td><td><code><var>encoding</var>.output_encoding()</code></td></tr>
559	//! <tr><td><a href="https://encoding.spec.whatwg.org/#decode">decode</a></td><td><code>let d = <var>encoding</var>.new_decoder();<br>let res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, false);<br>// …</br>let last_res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, true);</code></td><td><code><var>encoding</var>.decode(<var>src</var>)</code></td></tr>
560	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8-decode">UTF-8 decode</a></td><td><code>let d = UTF_8.new_decoder_with_bom_removal();<br>let res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, false);<br>// …</br>let last_res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, true);</code></td><td><code>UTF_8.decode_with_bom_removal(<var>src</var>)</code></td></tr>
561	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8-decode-without-bom">UTF-8 decode without BOM</a></td><td><code>let d = UTF_8.new_decoder_without_bom_handling();<br>let res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, false);<br>// …</br>let last_res = d.decode_to_<var></var>(<var>src</var>, <var>dst</var>, true);</code></td><td><code>UTF_8.decode_without_bom_handling(<var>src</var>)</code></td></tr>
562	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8-decode-without-bom-or-fail">UTF-8 decode without BOM or fail</a></td><td><code>let d = UTF_8.new_decoder_without_bom_handling();<br>let res = d.decode_to_<var></var>_without_replacement(<var>src</var>, <var>dst</var>, false);<br>// … (fail if malformed)</br>let last_res = d.decode_to_<var></var>_without_replacement(<var>src</var>, <var>dst</var>, true);<br>// (fail if malformed)</code></td><td><code>UTF_8.decode_without_bom_handling_and_without_replacement(<var>src</var>)</code></td></tr>
563	//! <tr><td><a href="https://encoding.spec.whatwg.org/#encode">encode</a></td><td><code>let e = <var>encoding</var>.new_encoder();<br>let res = e.encode_to_<var></var>(<var>src</var>, <var>dst</var>, false);<br>// …</br>let last_res = e.encode_to_<var></var>(<var>src</var>, <var>dst</var>, true);</code></td><td><code><var>encoding</var>.encode(<var>src</var>)</code></td></tr>
564	//! <tr><td><a href="https://encoding.spec.whatwg.org/#utf-8-encode">UTF-8 encode</a></td><td>Use the UTF-8 nature of Rust strings directly:<br><code><var>write</var>(<var>src</var>.as_bytes());<br>// refill src<br><var>write</var>(<var>src</var>.as_bytes());<br>// refill src<br><var>write</var>(<var>src</var>.as_bytes());<br>// …</code></td><td>Use the UTF-8 nature of Rust strings directly:<br><code><var>src</var>.as_bytes()</code></td></tr>
565	//! </tbody>
566	//! </table>
567	//!
568	//! # Compatibility with the rust-encoding API
569	//!
570	//! The crate
571	//! [encoding_rs_compat](https://github.com/hsivonen/encoding_rs_compat/)
572	//! is a drop-in replacement for rust-encoding 0.2.32 that implements (most of)
573	//! the API of rust-encoding 0.2.32 on top of encoding_rs.
574	//!
575	//! # Mapping rust-encoding concepts to encoding_rs concepts
576	//!
577	//! The following table provides a mapping from rust-encoding constructs to
578	//! encoding_rs ones.
579	//!
580	//! <table>
581	//! <thead>
582	//! <tr><th>rust-encoding</th><th>encoding_rs</th></tr>
583	//! </thead>
584	//! <tbody>
585	//! <tr><td><code>encoding::EncodingRef</code></td><td><code>&'static encoding_rs::Encoding</code></td></tr>
586	//! <tr><td><code>encoding::all::<var>WINDOWS_31J</var></code> (not based on the WHATWG name for some encodings)</td><td><code>encoding_rs::<var>SHIFT_JIS</var></code> (always the WHATWG name uppercased and hyphens replaced with underscores)</td></tr>
587	//! <tr><td><code>encoding::all::ERROR</code></td><td>Not available because not in the Encoding Standard</td></tr>
588	//! <tr><td><code>encoding::all::ASCII</code></td><td>Not available because not in the Encoding Standard</td></tr>
589	//! <tr><td><code>encoding::all::ISO_8859_1</code></td><td>Not available because not in the Encoding Standard</td></tr>
590	//! <tr><td><code>encoding::all::HZ</code></td><td>Not available because not in the Encoding Standard</td></tr>
591	//! <tr><td><code>encoding::label::encoding_from_whatwg_label(<var>string</var>)</code></td><td><code>encoding_rs::Encoding::for_label(<var>string</var>)</code></td></tr>
592	//! <tr><td><code><var>enc</var>.whatwg_name()</code> (always lower case)</td><td><code><var>enc</var>.name()</code> (potentially mixed case)</td></tr>
593	//! <tr><td><code><var>enc</var>.name()</code></td><td>Not available because not in the Encoding Standard</td></tr>
594	//! <tr><td><code>encoding::decode(<var>bytes</var>, encoding::DecoderTrap::Replace, <var>enc</var>)</code></td><td><code><var>enc</var>.decode(<var>bytes</var>)</code></td></tr>
595	//! <tr><td><code><var>enc</var>.decode(<var>bytes</var>, encoding::DecoderTrap::Replace)</code></td><td><code><var>enc</var>.decode_without_bom_handling(<var>bytes</var>)</code></td></tr>
596	//! <tr><td><code><var>enc</var>.encode(<var>string</var>, encoding::EncoderTrap::NcrEscape)</code></td><td><code><var>enc</var>.encode(<var>string</var>)</code></td></tr>
597	//! <tr><td><code><var>enc</var>.raw_decoder()</code></td><td><code><var>enc</var>.new_decoder_without_bom_handling()</code></td></tr>
598	//! <tr><td><code><var>enc</var>.raw_encoder()</code></td><td><code><var>enc</var>.new_encoder()</code></td></tr>
599	//! <tr><td><code>encoding::RawDecoder</code></td><td><code>encoding_rs::Decoder</code></td></tr>
600	//! <tr><td><code>encoding::RawEncoder</code></td><td><code>encoding_rs::Encoder</code></td></tr>
601	//! <tr><td><code><var>raw_decoder</var>.raw_feed(<var>src</var>, <var>dst_string</var>)</code></td><td><code><var>dst_string</var>.reserve(<var>decoder</var>.max_utf8_buffer_length_without_replacement(<var>src</var>.len()));<br><var>decoder</var>.decode_to_string_without_replacement(<var>src</var>, <var>dst_string</var>, false)</code></td></tr>
602	//! <tr><td><code><var>raw_encoder</var>.raw_feed(<var>src</var>, <var>dst_vec</var>)</code></td><td><code><var>dst_vec</var>.reserve(<var>encoder</var>.max_buffer_length_from_utf8_without_replacement(<var>src</var>.len()));<br><var>encoder</var>.encode_from_utf8_to_vec_without_replacement(<var>src</var>, <var>dst_vec</var>, false)</code></td></tr>
603	//! <tr><td><code><var>raw_decoder</var>.raw_finish(<var>dst</var>)</code></td><td><code><var>dst_string</var>.reserve(<var>decoder</var>.max_utf8_buffer_length_without_replacement(0));<br><var>decoder</var>.decode_to_string_without_replacement(b"", <var>dst</var>, true)</code></td></tr>
604	//! <tr><td><code><var>raw_encoder</var>.raw_finish(<var>dst</var>)</code></td><td><code><var>dst_vec</var>.reserve(<var>encoder</var>.max_buffer_length_from_utf8_without_replacement(0));<br><var>encoder</var>.encode_from_utf8_to_vec_without_replacement("", <var>dst</var>, true)</code></td></tr>
605	//! <tr><td><code>encoding::DecoderTrap::Strict</code></td><td><code>decode*</code> methods that have <code>_without_replacement</code> in their name (and treating the `Malformed` result as fatal).</td></tr>
606	//! <tr><td><code>encoding::DecoderTrap::Replace</code></td><td><code>decode</code> methods that <i>do not</i> have <code>_without_replacement</code> in their name.</td></tr>*
607	//! <tr><td><code>encoding::DecoderTrap::Ignore</code></td><td>It is a bad idea to ignore errors due to security issues, but this could be implemented using <code>decode*</code> methods that have <code>_without_replacement</code> in their name.</td></tr>
608	//! <tr><td><code>encoding::DecoderTrap::Call(DecoderTrapFunc)</code></td><td>Can be implemented using <code>decode</code> methods that have <code>_without_replacement</code> in their name.</td></tr>*
609	//! <tr><td><code>encoding::EncoderTrap::Strict</code></td><td><code>encode*</code> methods that have <code>_without_replacement</code> in their name (and treating the `Unmappable` result as fatal).</td></tr>
610	//! <tr><td><code>encoding::EncoderTrap::Replace</code></td><td>Can be implemented using <code>encode</code> methods that have <code>_without_replacement</code> in their name.</td></tr>*
611	//! <tr><td><code>encoding::EncoderTrap::Ignore</code></td><td>It is a bad idea to ignore errors due to security issues, but this could be implemented using <code>encode*</code> methods that have <code>_without_replacement</code> in their name.</td></tr>
612	//! <tr><td><code>encoding::EncoderTrap::NcrEscape</code></td><td><code>encode</code> methods that <i>do not</i> have <code>_without_replacement</code> in their name.</td></tr>*
613	//! <tr><td><code>encoding::EncoderTrap::Call(EncoderTrapFunc)</code></td><td>Can be implemented using <code>encode</code> methods that have <code>_without_replacement</code> in their name.</td></tr>*
614	//! </tbody>
615	//! </table>
616	//!
617	//! # Relationship with Windows Code Pages
618	//!
619	//! Despite the Web and browser focus, the encodings defined by the Encoding
620	//! Standard and implemented by this crate may be useful for decoding legacy
621	//! data that uses Windows code pages. The following table names the single-byte
622	//! encodings
623	//! that have a closely related Windows code page, the number of the closest
624	//! code page, a column indicating whether Windows maps unassigned code points
625	//! to the Unicode Private Use Area instead of U+FFFD and a remark number
626	//! indicating remarks in the list after the table.
627	//!
628	//! <table>
629	//! <thead>
630	//! <tr><th>Encoding</th><th>Code Page</th><th>PUA</th><th>Remarks</th></tr>
631	//! </thead>
632	//! <tbody>
633	//! <tr><td>Shift_JIS</td><td>932</td><td></td><td></td></tr>
634	//! <tr><td>GBK</td><td>936</td><td></td><td></td></tr>
635	//! <tr><td>EUC-KR</td><td>949</td><td></td><td></td></tr>
636	//! <tr><td>Big5</td><td>950</td><td></td><td></td></tr>
637	//! <tr><td>IBM866</td><td>866</td><td></td><td></td></tr>
638	//! <tr><td>windows-874</td><td>874</td><td>&bullet;</td><td></td></tr>
639	//! <tr><td>UTF-16LE</td><td>1200</td><td></td><td></td></tr>
640	//! <tr><td>UTF-16BE</td><td>1201</td><td></td><td></td></tr>
641	//! <tr><td>windows-1250</td><td>1250</td><td></td><td></td></tr>
642	//! <tr><td>windows-1251</td><td>1251</td><td></td><td></td></tr>
643	//! <tr><td>windows-1252</td><td>1252</td><td></td><td></td></tr>
644	//! <tr><td>windows-1253</td><td>1253</td><td>&bullet;</td><td></td></tr>
645	//! <tr><td>windows-1254</td><td>1254</td><td></td><td></td></tr>
646	//! <tr><td>windows-1255</td><td>1255</td><td>&bullet;</td><td></td></tr>
647	//! <tr><td>windows-1256</td><td>1256</td><td></td><td></td></tr>
648	//! <tr><td>windows-1257</td><td>1257</td><td>&bullet;</td><td></td></tr>
649	//! <tr><td>windows-1258</td><td>1258</td><td></td><td></td></tr>
650	//! <tr><td>macintosh</td><td>10000</td><td></td><td>1</td></tr>
651	//! <tr><td>x-mac-cyrillic</td><td>10017</td><td></td><td>2</td></tr>
652	//! <tr><td>KOI8-R</td><td>20866</td><td></td><td></td></tr>
653	//! <tr><td>EUC-JP</td><td>20932</td><td></td><td></td></tr>
654	//! <tr><td>KOI8-U</td><td>21866</td><td></td><td></td></tr>
655	//! <tr><td>ISO-8859-2</td><td>28592</td><td></td><td></td></tr>
656	//! <tr><td>ISO-8859-3</td><td>28593</td><td></td><td></td></tr>
657	//! <tr><td>ISO-8859-4</td><td>28594</td><td></td><td></td></tr>
658	//! <tr><td>ISO-8859-5</td><td>28595</td><td></td><td></td></tr>
659	//! <tr><td>ISO-8859-6</td><td>28596</td><td>&bullet;</td><td></td></tr>
660	//! <tr><td>ISO-8859-7</td><td>28597</td><td>&bullet;</td><td>3</td></tr>
661	//! <tr><td>ISO-8859-8</td><td>28598</td><td>&bullet;</td><td>4</td></tr>
662	//! <tr><td>ISO-8859-13</td><td>28603</td><td>&bullet;</td><td></td></tr>
663	//! <tr><td>ISO-8859-15</td><td>28605</td><td></td><td></td></tr>
664	//! <tr><td>ISO-8859-8-I</td><td>38598</td><td></td><td>5</td></tr>
665	//! <tr><td>ISO-2022-JP</td><td>50220</td><td></td><td></td></tr>
666	//! <tr><td>gb18030</td><td>54936</td><td></td><td></td></tr>
667	//! <tr><td>UTF-8</td><td>65001</td><td></td><td></td></tr>
668	//! </tbody>
669	//! </table>
670	//!
671	//! 1. Windows decodes 0xBD to U+2126 OHM SIGN instead of U+03A9 GREEK CAPITAL LETTER OMEGA.
672	//! 2. Windows decodes 0xFF to U+00A4 CURRENCY SIGN instead of U+20AC EURO SIGN.
673	//! 3. Windows decodes the currency signs at 0xA4 and 0xA5 as well as 0xAA,
674	//! which should be U+037A GREEK YPOGEGRAMMENI, to PUA code points. Windows
675	//! decodes 0xA1 to U+02BD MODIFIER LETTER REVERSED COMMA instead of U+2018
676	//! LEFT SINGLE QUOTATION MARK and 0xA2 to U+02BC MODIFIER LETTER APOSTROPHE
677	//! instead of U+2019 RIGHT SINGLE QUOTATION MARK.
678	//! 4. Windows decodes 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to PUA instead
679	//! of LRM and RLM.
680	//! 5. Remarks from the previous item apply.
681	//!
682	//! The differences between this crate and Windows in the case of multibyte encodings
683	//! are not yet fully documented here. The lack of remarks above should not be taken
684	//! as indication of lack of differences.
685	//!
686	//! # Notable Differences from IANA Naming
687	//!
688	//! In some cases, the Encoding Standard specifies the popular unextended encoding
689	//! name where in IANA terms one of the other labels would be more precise considering
690	//! the extensions that the Encoding Standard has unified into the encoding.
691	//!
692	//! <table>
693	//! <thead>
694	//! <tr><th>Encoding</th><th>IANA</th></tr>
695	//! </thead>
696	//! <tbody>
697	//! <tr><td>Big5</td><td>Big5-HKSCS</td></tr>
698	//! <tr><td>EUC-KR</td><td>windows-949</td></tr>
699	//! <tr><td>Shift_JIS</td><td>windows-31j</td></tr>
700	//! <tr><td>x-mac-cyrillic</td><td>x-mac-ukrainian</td></tr>
701	//! </tbody>
702	//! </table>
703	//!
704	//! In other cases where the Encoding Standard unifies unextended and extended
705	//! variants of an encoding, the encoding gets the name of the extended
706	//! variant.
707	//!
708	//! <table>
709	//! <thead>
710	//! <tr><th>IANA</th><th>Unified into Encoding</th></tr>
711	//! </thead>
712	//! <tbody>
713	//! <tr><td>ISO-8859-1</td><td>windows-1252</td></tr>
714	//! <tr><td>ISO-8859-9</td><td>windows-1254</td></tr>
715	//! <tr><td>TIS-620</td><td>windows-874</td></tr>
716	//! </tbody>
717	//! </table>
718	//!
719	//! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes)
720	//! for discussion about the UTF-16 family.
721
722	#![no_std]
723	#![cfg_attr(feature = "simd-accel", feature(core_intrinsics, portable_simd))]
724
725	#[cfg(feature = "alloc")]
726	#[cfg_attr(test, macro_use)]
727	extern crate alloc;
728
729	extern crate core;
730	#[macro_use]
731	extern crate cfg_if;
732
733	#[cfg(feature = "serde")]
734	extern crate serde;
735
736	#[cfg(all(test, feature = "serde"))]
737	extern crate bincode;
738	#[cfg(all(test, feature = "serde"))]
739	#[macro_use]
740	extern crate serde_derive;
741	#[cfg(all(test, feature = "serde"))]
742	extern crate serde_json;
743
744	#[macro_use]
745	mod macros;
746
747	#[cfg(all(
748	feature = "simd-accel",
749	any(
750	target_feature = "sse2",
751	all(target_endian = "little", target_arch = "aarch64"),
752	all(target_endian = "little", target_feature = "neon")
753	)
754	))]
755	mod simd_funcs;
756
757	#[cfg(all(test, feature = "alloc"))]
758	mod testing;
759
760	mod big5;
761	mod euc_jp;
762	mod euc_kr;
763	mod gb18030;
764	mod gb18030_2022;
765	mod iso_2022_jp;
766	mod replacement;
767	mod shift_jis;
768	mod single_byte;
769	mod utf_16;
770	mod utf_8;
771	mod x_user_defined;
772
773	mod ascii;
774	mod data;
775	mod handles;
776	mod variant;
777
778	pub mod mem;
779
780	use crate::ascii::ascii_valid_up_to;
781	use crate::ascii::iso_2022_jp_ascii_valid_up_to;
782	use crate::utf_8::utf8_valid_up_to;
783	use crate::variant::*;
784
785	#[cfg(feature = "alloc")]
786	use alloc::borrow::Cow;
787	#[cfg(feature = "alloc")]
788	use alloc::string::String;
789	#[cfg(feature = "alloc")]
790	use alloc::vec::Vec;
791	use core::cmp::Ordering;
792	use core::hash::Hash;
793	use core::hash::Hasher;
794
795	#[cfg(feature = "serde")]
796	use serde::de::Visitor;
797	#[cfg(feature = "serde")]
798	use serde::{Deserialize, Deserializer, Serialize, Serializer};
799
800	/// This has to be the max length of an NCR instead of max
801	/// minus one, because we can't rely on getting the minus
802	/// one from the space reserved for the current unmappable,
803	/// because the ISO-2022-JP encoder can fill up that space
804	/// with a state transition escape.
805	const NCR_EXTRA: usize = `10`; // 􏿿
806
807	// BEGIN GENERATED CODE. PLEASE DO NOT EDIT.
808	// Instead, please regenerate using generate-encoding-data.py
809
810	const LONGEST_LABEL_LENGTH: usize = `19`; // cseucpkdfmtjapanese
811
812	/// The initializer for the [Big5](static.BIG5.html) encoding.
813	///
814	/// For use only for taking the address of this form when
815	/// Rust prohibits the use of the non-`_INIT` form directly,
816	/// such as in initializers of other `static`s. If in doubt,
817	/// use the corresponding non-`_INIT` reference-typed `static`.
818	///
819	/// This part of the public API will go away if Rust changes
820	/// to make the referent of `pub const FOO: &'static Encoding`
821	/// unique cross-crate or if Rust starts allowing static arrays
822	/// to be initialized with `pub static FOO: &'static Encoding`
823	/// items.
824	pub static BIG5_INIT: Encoding = Encoding {
825	name: "Big5",
826	variant: VariantEncoding::Big5,
827	};
828
829	/// The Big5 encoding.
830	///
831	/// This is Big5 with HKSCS with mappings to more recent Unicode assignments
832	/// instead of the Private Use Area code points that have been used historically.
833	/// It is believed to be able to decode existing Web content in a way that makes
834	/// sense.
835	///
836	/// To avoid form submissions generating data that Web servers don't understand,
837	/// the encoder doesn't use the HKSCS byte sequences that precede the unextended
838	/// Big5 in the lexical order.
839	///
840	/// [Index visualization](https://encoding.spec.whatwg.org/big5.html),
841	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/big5-bmp.html)
842	///
843	/// This encoding is designed to be suited for decoding the Windows code page 950
844	/// and its HKSCS patched "951" variant such that the text makes sense, given
845	/// assignments that Unicode has made after those encodings used Private Use
846	/// Area characters.
847	///
848	/// This will change from `static` to `const` if Rust changes
849	/// to make the referent of `pub const FOO: &'static Encoding`
850	/// unique cross-crate, so don't take the address of this
851	/// `static`.
852	pub static BIG5: &'static Encoding = &BIG5_INIT;
853
854	/// The initializer for the [EUC-JP](static.EUC_JP.html) encoding.
855	///
856	/// For use only for taking the address of this form when
857	/// Rust prohibits the use of the non-`_INIT` form directly,
858	/// such as in initializers of other `static`s. If in doubt,
859	/// use the corresponding non-`_INIT` reference-typed `static`.
860	///
861	/// This part of the public API will go away if Rust changes
862	/// to make the referent of `pub const FOO: &'static Encoding`
863	/// unique cross-crate or if Rust starts allowing static arrays
864	/// to be initialized with `pub static FOO: &'static Encoding`
865	/// items.
866	pub static EUC_JP_INIT: Encoding = Encoding {
867	name: "EUC-JP",
868	variant: VariantEncoding::EucJp,
869	};
870
871	/// The EUC-JP encoding.
872	///
873	/// This is the legacy Unix encoding for Japanese.
874	///
875	/// For compatibility with Web servers that don't expect three-byte sequences
876	/// in form submissions, the encoder doesn't generate three-byte sequences.
877	/// That is, the JIS X 0212 support is decode-only.
878	///
879	/// [Index visualization](https://encoding.spec.whatwg.org/euc-jp.html),
880	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-jp-bmp.html)
881	///
882	/// This encoding roughly matches the Windows code page 20932. There are error
883	/// handling differences and a handful of 2-byte sequences that decode differently.
884	/// Additionall, Windows doesn't support 3-byte sequences.
885	///
886	/// This will change from `static` to `const` if Rust changes
887	/// to make the referent of `pub const FOO: &'static Encoding`
888	/// unique cross-crate, so don't take the address of this
889	/// `static`.
890	pub static EUC_JP: &'static Encoding = &EUC_JP_INIT;
891
892	/// The initializer for the [EUC-KR](static.EUC_KR.html) encoding.
893	///
894	/// For use only for taking the address of this form when
895	/// Rust prohibits the use of the non-`_INIT` form directly,
896	/// such as in initializers of other `static`s. If in doubt,
897	/// use the corresponding non-`_INIT` reference-typed `static`.
898	///
899	/// This part of the public API will go away if Rust changes
900	/// to make the referent of `pub const FOO: &'static Encoding`
901	/// unique cross-crate or if Rust starts allowing static arrays
902	/// to be initialized with `pub static FOO: &'static Encoding`
903	/// items.
904	pub static EUC_KR_INIT: Encoding = Encoding {
905	name: "EUC-KR",
906	variant: VariantEncoding::EucKr,
907	};
908
909	/// The EUC-KR encoding.
910	///
911	/// This is the Korean encoding for Windows. It extends the Unix legacy encoding
912	/// for Korean, based on KS X 1001 (which also formed the base of MacKorean on Mac OS
913	/// Classic), with all the characters from the Hangul Syllables block of Unicode.
914	///
915	/// [Index visualization](https://encoding.spec.whatwg.org/euc-kr.html),
916	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/euc-kr-bmp.html)
917	///
918	/// This encoding matches the Windows code page 949, except Windows decodes byte 0x80
919	/// to U+0080 and some byte sequences that are error per the Encoding Standard to
920	/// the question mark or the Private Use Area.
921	///
922	/// This will change from `static` to `const` if Rust changes
923	/// to make the referent of `pub const FOO: &'static Encoding`
924	/// unique cross-crate, so don't take the address of this
925	/// `static`.
926	pub static EUC_KR: &'static Encoding = &EUC_KR_INIT;
927
928	/// The initializer for the [GBK](static.GBK.html) encoding.
929	///
930	/// For use only for taking the address of this form when
931	/// Rust prohibits the use of the non-`_INIT` form directly,
932	/// such as in initializers of other `static`s. If in doubt,
933	/// use the corresponding non-`_INIT` reference-typed `static`.
934	///
935	/// This part of the public API will go away if Rust changes
936	/// to make the referent of `pub const FOO: &'static Encoding`
937	/// unique cross-crate or if Rust starts allowing static arrays
938	/// to be initialized with `pub static FOO: &'static Encoding`
939	/// items.
940	pub static GBK_INIT: Encoding = Encoding {
941	name: "GBK",
942	variant: VariantEncoding::Gbk,
943	};
944
945	/// The GBK encoding.
946	///
947	/// The decoder for this encoding is the same as the decoder for gb18030.
948	/// The encoder side of this encoding is GBK with Windows code page 936 euro
949	/// sign behavior and with the changes to two-byte sequences made in GB18030-2022.
950	/// GBK extends GB2312-80 to cover the CJK Unified Ideographs Unicode block as
951	/// well as a handful of ideographs from the CJK Unified Ideographs Extension A
952	/// and CJK Compatibility Ideographs blocks.
953	///
954	/// Unlike e.g. in the case of ISO-8859-1 and windows-1252, GBK encoder wasn't
955	/// unified with the gb18030 encoder in the Encoding Standard out of concern
956	/// that servers that expect GBK form submissions might not be able to handle
957	/// the four-byte sequences.
958	///
959	/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
960	/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
961	///
962	/// The encoder of this encoding roughly matches the Windows code page 936.
963	/// The decoder side is a superset.
964	///
965	/// This will change from `static` to `const` if Rust changes
966	/// to make the referent of `pub const FOO: &'static Encoding`
967	/// unique cross-crate, so don't take the address of this
968	/// `static`.
969	pub static GBK: &'static Encoding = &GBK_INIT;
970
971	/// The initializer for the [IBM866](static.IBM866.html) encoding.
972	///
973	/// For use only for taking the address of this form when
974	/// Rust prohibits the use of the non-`_INIT` form directly,
975	/// such as in initializers of other `static`s. If in doubt,
976	/// use the corresponding non-`_INIT` reference-typed `static`.
977	///
978	/// This part of the public API will go away if Rust changes
979	/// to make the referent of `pub const FOO: &'static Encoding`
980	/// unique cross-crate or if Rust starts allowing static arrays
981	/// to be initialized with `pub static FOO: &'static Encoding`
982	/// items.
983	pub static IBM866_INIT: Encoding = Encoding {
984	name: "IBM866",
985	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.ibm866, `0x0440`, `96`, `16`),
986	};
987
988	/// The IBM866 encoding.
989	///
990	/// This the most notable one of the DOS Cyrillic code pages. It has the same
991	/// box drawing characters as code page 437, so it can be used for decoding
992	/// DOS-era ASCII + box drawing data.
993	///
994	/// [Index visualization](https://encoding.spec.whatwg.org/ibm866.html),
995	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/ibm866-bmp.html)
996	///
997	/// This encoding matches the Windows code page 866.
998	///
999	/// This will change from `static` to `const` if Rust changes
1000	/// to make the referent of `pub const FOO: &'static Encoding`
1001	/// unique cross-crate, so don't take the address of this
1002	/// `static`.
1003	pub static IBM866: &'static Encoding = &IBM866_INIT;
1004
1005	/// The initializer for the [ISO-2022-JP](static.ISO_2022_JP.html) encoding.
1006	///
1007	/// For use only for taking the address of this form when
1008	/// Rust prohibits the use of the non-`_INIT` form directly,
1009	/// such as in initializers of other `static`s. If in doubt,
1010	/// use the corresponding non-`_INIT` reference-typed `static`.
1011	///
1012	/// This part of the public API will go away if Rust changes
1013	/// to make the referent of `pub const FOO: &'static Encoding`
1014	/// unique cross-crate or if Rust starts allowing static arrays
1015	/// to be initialized with `pub static FOO: &'static Encoding`
1016	/// items.
1017	pub static ISO_2022_JP_INIT: Encoding = Encoding {
1018	name: "ISO-2022-JP",
1019	variant: VariantEncoding::Iso2022Jp,
1020	};
1021
1022	/// The ISO-2022-JP encoding.
1023	///
1024	/// This the primary pre-UTF-8 encoding for Japanese email. It uses the ASCII
1025	/// byte range to encode non-Basic Latin characters. It's the only encoding
1026	/// supported by this crate whose encoder is stateful.
1027	///
1028	/// [Index visualization](https://encoding.spec.whatwg.org/jis0208.html),
1029	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/jis0208-bmp.html)
1030	///
1031	/// This encoding roughly matches the Windows code page 50220. Notably, Windows
1032	/// uses U+30FB in place of the REPLACEMENT CHARACTER and otherwise differs in
1033	/// error handling.
1034	///
1035	/// This will change from `static` to `const` if Rust changes
1036	/// to make the referent of `pub const FOO: &'static Encoding`
1037	/// unique cross-crate, so don't take the address of this
1038	/// `static`.
1039	pub static ISO_2022_JP: &'static Encoding = &ISO_2022_JP_INIT;
1040
1041	/// The initializer for the [ISO-8859-10](static.ISO_8859_10.html) encoding.
1042	///
1043	/// For use only for taking the address of this form when
1044	/// Rust prohibits the use of the non-`_INIT` form directly,
1045	/// such as in initializers of other `static`s. If in doubt,
1046	/// use the corresponding non-`_INIT` reference-typed `static`.
1047	///
1048	/// This part of the public API will go away if Rust changes
1049	/// to make the referent of `pub const FOO: &'static Encoding`
1050	/// unique cross-crate or if Rust starts allowing static arrays
1051	/// to be initialized with `pub static FOO: &'static Encoding`
1052	/// items.
1053	pub static ISO_8859_10_INIT: Encoding = Encoding {
1054	name: "ISO-8859-10",
1055	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_10, `0x00DA`, `90`, `6`),
1056	};
1057
1058	/// The ISO-8859-10 encoding.
1059	///
1060	/// This is the Nordic part of the ISO/IEC 8859 encoding family. This encoding
1061	/// is also known as Latin 6.
1062	///
1063	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-10.html),
1064	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-10-bmp.html)
1065	///
1066	/// The Windows code page number for this encoding is 28600, but kernel32.dll
1067	/// does not support this encoding.
1068	///
1069	/// This will change from `static` to `const` if Rust changes
1070	/// to make the referent of `pub const FOO: &'static Encoding`
1071	/// unique cross-crate, so don't take the address of this
1072	/// `static`.
1073	pub static ISO_8859_10: &'static Encoding = &ISO_8859_10_INIT;
1074
1075	/// The initializer for the [ISO-8859-13](static.ISO_8859_13.html) encoding.
1076	///
1077	/// For use only for taking the address of this form when
1078	/// Rust prohibits the use of the non-`_INIT` form directly,
1079	/// such as in initializers of other `static`s. If in doubt,
1080	/// use the corresponding non-`_INIT` reference-typed `static`.
1081	///
1082	/// This part of the public API will go away if Rust changes
1083	/// to make the referent of `pub const FOO: &'static Encoding`
1084	/// unique cross-crate or if Rust starts allowing static arrays
1085	/// to be initialized with `pub static FOO: &'static Encoding`
1086	/// items.
1087	pub static ISO_8859_13_INIT: Encoding = Encoding {
1088	name: "ISO-8859-13",
1089	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_13, `0x00DF`, `95`, `1`),
1090	};
1091
1092	/// The ISO-8859-13 encoding.
1093	///
1094	/// This is the Baltic part of the ISO/IEC 8859 encoding family. This encoding
1095	/// is also known as Latin 7.
1096	///
1097	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-13.html),
1098	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-13-bmp.html)
1099	///
1100	/// This encoding matches the Windows code page 28603, except Windows decodes
1101	/// unassigned code points to the Private Use Area of Unicode.
1102	///
1103	/// This will change from `static` to `const` if Rust changes
1104	/// to make the referent of `pub const FOO: &'static Encoding`
1105	/// unique cross-crate, so don't take the address of this
1106	/// `static`.
1107	pub static ISO_8859_13: &'static Encoding = &ISO_8859_13_INIT;
1108
1109	/// The initializer for the [ISO-8859-14](static.ISO_8859_14.html) encoding.
1110	///
1111	/// For use only for taking the address of this form when
1112	/// Rust prohibits the use of the non-`_INIT` form directly,
1113	/// such as in initializers of other `static`s. If in doubt,
1114	/// use the corresponding non-`_INIT` reference-typed `static`.
1115	///
1116	/// This part of the public API will go away if Rust changes
1117	/// to make the referent of `pub const FOO: &'static Encoding`
1118	/// unique cross-crate or if Rust starts allowing static arrays
1119	/// to be initialized with `pub static FOO: &'static Encoding`
1120	/// items.
1121	pub static ISO_8859_14_INIT: Encoding = Encoding {
1122	name: "ISO-8859-14",
1123	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_14, `0x00DF`, `95`, `17`),
1124	};
1125
1126	/// The ISO-8859-14 encoding.
1127	///
1128	/// This is the Celtic part of the ISO/IEC 8859 encoding family. This encoding
1129	/// is also known as Latin 8.
1130	///
1131	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-14.html),
1132	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-14-bmp.html)
1133	///
1134	/// The Windows code page number for this encoding is 28604, but kernel32.dll
1135	/// does not support this encoding.
1136	///
1137	/// This will change from `static` to `const` if Rust changes
1138	/// to make the referent of `pub const FOO: &'static Encoding`
1139	/// unique cross-crate, so don't take the address of this
1140	/// `static`.
1141	pub static ISO_8859_14: &'static Encoding = &ISO_8859_14_INIT;
1142
1143	/// The initializer for the [ISO-8859-15](static.ISO_8859_15.html) encoding.
1144	///
1145	/// For use only for taking the address of this form when
1146	/// Rust prohibits the use of the non-`_INIT` form directly,
1147	/// such as in initializers of other `static`s. If in doubt,
1148	/// use the corresponding non-`_INIT` reference-typed `static`.
1149	///
1150	/// This part of the public API will go away if Rust changes
1151	/// to make the referent of `pub const FOO: &'static Encoding`
1152	/// unique cross-crate or if Rust starts allowing static arrays
1153	/// to be initialized with `pub static FOO: &'static Encoding`
1154	/// items.
1155	pub static ISO_8859_15_INIT: Encoding = Encoding {
1156	name: "ISO-8859-15",
1157	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_15, `0x00BF`, `63`, `65`),
1158	};
1159
1160	/// The ISO-8859-15 encoding.
1161	///
1162	/// This is the revised Western European part of the ISO/IEC 8859 encoding
1163	/// family. This encoding is also known as Latin 9.
1164	///
1165	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-15.html),
1166	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-15-bmp.html)
1167	///
1168	/// This encoding matches the Windows code page 28605.
1169	///
1170	/// This will change from `static` to `const` if Rust changes
1171	/// to make the referent of `pub const FOO: &'static Encoding`
1172	/// unique cross-crate, so don't take the address of this
1173	/// `static`.
1174	pub static ISO_8859_15: &'static Encoding = &ISO_8859_15_INIT;
1175
1176	/// The initializer for the [ISO-8859-16](static.ISO_8859_16.html) encoding.
1177	///
1178	/// For use only for taking the address of this form when
1179	/// Rust prohibits the use of the non-`_INIT` form directly,
1180	/// such as in initializers of other `static`s. If in doubt,
1181	/// use the corresponding non-`_INIT` reference-typed `static`.
1182	///
1183	/// This part of the public API will go away if Rust changes
1184	/// to make the referent of `pub const FOO: &'static Encoding`
1185	/// unique cross-crate or if Rust starts allowing static arrays
1186	/// to be initialized with `pub static FOO: &'static Encoding`
1187	/// items.
1188	pub static ISO_8859_16_INIT: Encoding = Encoding {
1189	name: "ISO-8859-16",
1190	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_16, `0x00DF`, `95`, `4`),
1191	};
1192
1193	/// The ISO-8859-16 encoding.
1194	///
1195	/// This is the South-Eastern European part of the ISO/IEC 8859 encoding
1196	/// family. This encoding is also known as Latin 10.
1197	///
1198	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-16.html),
1199	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-16-bmp.html)
1200	///
1201	/// The Windows code page number for this encoding is 28606, but kernel32.dll
1202	/// does not support this encoding.
1203	///
1204	/// This will change from `static` to `const` if Rust changes
1205	/// to make the referent of `pub const FOO: &'static Encoding`
1206	/// unique cross-crate, so don't take the address of this
1207	/// `static`.
1208	pub static ISO_8859_16: &'static Encoding = &ISO_8859_16_INIT;
1209
1210	/// The initializer for the [ISO-8859-2](static.ISO_8859_2.html) encoding.
1211	///
1212	/// For use only for taking the address of this form when
1213	/// Rust prohibits the use of the non-`_INIT` form directly,
1214	/// such as in initializers of other `static`s. If in doubt,
1215	/// use the corresponding non-`_INIT` reference-typed `static`.
1216	///
1217	/// This part of the public API will go away if Rust changes
1218	/// to make the referent of `pub const FOO: &'static Encoding`
1219	/// unique cross-crate or if Rust starts allowing static arrays
1220	/// to be initialized with `pub static FOO: &'static Encoding`
1221	/// items.
1222	pub static ISO_8859_2_INIT: Encoding = Encoding {
1223	name: "ISO-8859-2",
1224	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_2, `0x00DF`, `95`, `1`),
1225	};
1226
1227	/// The ISO-8859-2 encoding.
1228	///
1229	/// This is the Central European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 2.
1230	///
1231	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-2.html),
1232	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-2-bmp.html)
1233	///
1234	/// This encoding matches the Windows code page 28592.
1235	///
1236	/// This will change from `static` to `const` if Rust changes
1237	/// to make the referent of `pub const FOO: &'static Encoding`
1238	/// unique cross-crate, so don't take the address of this
1239	/// `static`.
1240	pub static ISO_8859_2: &'static Encoding = &ISO_8859_2_INIT;
1241
1242	/// The initializer for the [ISO-8859-3](static.ISO_8859_3.html) encoding.
1243	///
1244	/// For use only for taking the address of this form when
1245	/// Rust prohibits the use of the non-`_INIT` form directly,
1246	/// such as in initializers of other `static`s. If in doubt,
1247	/// use the corresponding non-`_INIT` reference-typed `static`.
1248	///
1249	/// This part of the public API will go away if Rust changes
1250	/// to make the referent of `pub const FOO: &'static Encoding`
1251	/// unique cross-crate or if Rust starts allowing static arrays
1252	/// to be initialized with `pub static FOO: &'static Encoding`
1253	/// items.
1254	pub static ISO_8859_3_INIT: Encoding = Encoding {
1255	name: "ISO-8859-3",
1256	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_3, `0x00DF`, `95`, `4`),
1257	};
1258
1259	/// The ISO-8859-3 encoding.
1260	///
1261	/// This is the South European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 3.
1262	///
1263	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-3.html),
1264	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-3-bmp.html)
1265	///
1266	/// This encoding matches the Windows code page 28593.
1267	///
1268	/// This will change from `static` to `const` if Rust changes
1269	/// to make the referent of `pub const FOO: &'static Encoding`
1270	/// unique cross-crate, so don't take the address of this
1271	/// `static`.
1272	pub static ISO_8859_3: &'static Encoding = &ISO_8859_3_INIT;
1273
1274	/// The initializer for the [ISO-8859-4](static.ISO_8859_4.html) encoding.
1275	///
1276	/// For use only for taking the address of this form when
1277	/// Rust prohibits the use of the non-`_INIT` form directly,
1278	/// such as in initializers of other `static`s. If in doubt,
1279	/// use the corresponding non-`_INIT` reference-typed `static`.
1280	///
1281	/// This part of the public API will go away if Rust changes
1282	/// to make the referent of `pub const FOO: &'static Encoding`
1283	/// unique cross-crate or if Rust starts allowing static arrays
1284	/// to be initialized with `pub static FOO: &'static Encoding`
1285	/// items.
1286	pub static ISO_8859_4_INIT: Encoding = Encoding {
1287	name: "ISO-8859-4",
1288	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_4, `0x00DF`, `95`, `1`),
1289	};
1290
1291	/// The ISO-8859-4 encoding.
1292	///
1293	/// This is the North European part of the ISO/IEC 8859 encoding family. This encoding is also known as Latin 4.
1294	///
1295	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-4.html),
1296	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-4-bmp.html)
1297	///
1298	/// This encoding matches the Windows code page 28594.
1299	///
1300	/// This will change from `static` to `const` if Rust changes
1301	/// to make the referent of `pub const FOO: &'static Encoding`
1302	/// unique cross-crate, so don't take the address of this
1303	/// `static`.
1304	pub static ISO_8859_4: &'static Encoding = &ISO_8859_4_INIT;
1305
1306	/// The initializer for the [ISO-8859-5](static.ISO_8859_5.html) encoding.
1307	///
1308	/// For use only for taking the address of this form when
1309	/// Rust prohibits the use of the non-`_INIT` form directly,
1310	/// such as in initializers of other `static`s. If in doubt,
1311	/// use the corresponding non-`_INIT` reference-typed `static`.
1312	///
1313	/// This part of the public API will go away if Rust changes
1314	/// to make the referent of `pub const FOO: &'static Encoding`
1315	/// unique cross-crate or if Rust starts allowing static arrays
1316	/// to be initialized with `pub static FOO: &'static Encoding`
1317	/// items.
1318	pub static ISO_8859_5_INIT: Encoding = Encoding {
1319	name: "ISO-8859-5",
1320	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_5, `0x040E`, `46`, `66`),
1321	};
1322
1323	/// The ISO-8859-5 encoding.
1324	///
1325	/// This is the Cyrillic part of the ISO/IEC 8859 encoding family.
1326	///
1327	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-5.html),
1328	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-5-bmp.html)
1329	///
1330	/// This encoding matches the Windows code page 28595.
1331	///
1332	/// This will change from `static` to `const` if Rust changes
1333	/// to make the referent of `pub const FOO: &'static Encoding`
1334	/// unique cross-crate, so don't take the address of this
1335	/// `static`.
1336	pub static ISO_8859_5: &'static Encoding = &ISO_8859_5_INIT;
1337
1338	/// The initializer for the [ISO-8859-6](static.ISO_8859_6.html) encoding.
1339	///
1340	/// For use only for taking the address of this form when
1341	/// Rust prohibits the use of the non-`_INIT` form directly,
1342	/// such as in initializers of other `static`s. If in doubt,
1343	/// use the corresponding non-`_INIT` reference-typed `static`.
1344	///
1345	/// This part of the public API will go away if Rust changes
1346	/// to make the referent of `pub const FOO: &'static Encoding`
1347	/// unique cross-crate or if Rust starts allowing static arrays
1348	/// to be initialized with `pub static FOO: &'static Encoding`
1349	/// items.
1350	pub static ISO_8859_6_INIT: Encoding = Encoding {
1351	name: "ISO-8859-6",
1352	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_6, `0x0621`, `65`, `26`),
1353	};
1354
1355	/// The ISO-8859-6 encoding.
1356	///
1357	/// This is the Arabic part of the ISO/IEC 8859 encoding family.
1358	///
1359	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-6.html),
1360	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-6-bmp.html)
1361	///
1362	/// This encoding matches the Windows code page 28596, except Windows decodes
1363	/// unassigned code points to the Private Use Area of Unicode.
1364	///
1365	/// This will change from `static` to `const` if Rust changes
1366	/// to make the referent of `pub const FOO: &'static Encoding`
1367	/// unique cross-crate, so don't take the address of this
1368	/// `static`.
1369	pub static ISO_8859_6: &'static Encoding = &ISO_8859_6_INIT;
1370
1371	/// The initializer for the [ISO-8859-7](static.ISO_8859_7.html) encoding.
1372	///
1373	/// For use only for taking the address of this form when
1374	/// Rust prohibits the use of the non-`_INIT` form directly,
1375	/// such as in initializers of other `static`s. If in doubt,
1376	/// use the corresponding non-`_INIT` reference-typed `static`.
1377	///
1378	/// This part of the public API will go away if Rust changes
1379	/// to make the referent of `pub const FOO: &'static Encoding`
1380	/// unique cross-crate or if Rust starts allowing static arrays
1381	/// to be initialized with `pub static FOO: &'static Encoding`
1382	/// items.
1383	pub static ISO_8859_7_INIT: Encoding = Encoding {
1384	name: "ISO-8859-7",
1385	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_7, `0x03A3`, `83`, `44`),
1386	};
1387
1388	/// The ISO-8859-7 encoding.
1389	///
1390	/// This is the Greek part of the ISO/IEC 8859 encoding family.
1391	///
1392	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-7.html),
1393	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-7-bmp.html)
1394	///
1395	/// This encoding roughly matches the Windows code page 28597. Windows decodes
1396	/// unassigned code points, the currency signs at 0xA4 and 0xA5 as well as
1397	/// 0xAA, which should be U+037A GREEK YPOGEGRAMMENI, to the Private Use Area
1398	/// of Unicode. Windows decodes 0xA1 to U+02BD MODIFIER LETTER REVERSED COMMA
1399	/// instead of U+2018 LEFT SINGLE QUOTATION MARK and 0xA2 to U+02BC MODIFIER
1400	/// LETTER APOSTROPHE instead of U+2019 RIGHT SINGLE QUOTATION MARK.
1401	///
1402	/// This will change from `static` to `const` if Rust changes
1403	/// to make the referent of `pub const FOO: &'static Encoding`
1404	/// unique cross-crate, so don't take the address of this
1405	/// `static`.
1406	pub static ISO_8859_7: &'static Encoding = &ISO_8859_7_INIT;
1407
1408	/// The initializer for the [ISO-8859-8](static.ISO_8859_8.html) encoding.
1409	///
1410	/// For use only for taking the address of this form when
1411	/// Rust prohibits the use of the non-`_INIT` form directly,
1412	/// such as in initializers of other `static`s. If in doubt,
1413	/// use the corresponding non-`_INIT` reference-typed `static`.
1414	///
1415	/// This part of the public API will go away if Rust changes
1416	/// to make the referent of `pub const FOO: &'static Encoding`
1417	/// unique cross-crate or if Rust starts allowing static arrays
1418	/// to be initialized with `pub static FOO: &'static Encoding`
1419	/// items.
1420	pub static ISO_8859_8_INIT: Encoding = Encoding {
1421	name: "ISO-8859-8",
1422	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_8, `0x05D0`, `96`, `27`),
1423	};
1424
1425	/// The ISO-8859-8 encoding.
1426	///
1427	/// This is the Hebrew part of the ISO/IEC 8859 encoding family in visual order.
1428	///
1429	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
1430	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
1431	///
1432	/// This encoding roughly matches the Windows code page 28598. Windows decodes
1433	/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
1434	/// Area instead of LRM and RLM. Windows decodes unassigned code points to
1435	/// the private use area.
1436	///
1437	/// This will change from `static` to `const` if Rust changes
1438	/// to make the referent of `pub const FOO: &'static Encoding`
1439	/// unique cross-crate, so don't take the address of this
1440	/// `static`.
1441	pub static ISO_8859_8: &'static Encoding = &ISO_8859_8_INIT;
1442
1443	/// The initializer for the [ISO-8859-8-I](static.ISO_8859_8_I.html) encoding.
1444	///
1445	/// For use only for taking the address of this form when
1446	/// Rust prohibits the use of the non-`_INIT` form directly,
1447	/// such as in initializers of other `static`s. If in doubt,
1448	/// use the corresponding non-`_INIT` reference-typed `static`.
1449	///
1450	/// This part of the public API will go away if Rust changes
1451	/// to make the referent of `pub const FOO: &'static Encoding`
1452	/// unique cross-crate or if Rust starts allowing static arrays
1453	/// to be initialized with `pub static FOO: &'static Encoding`
1454	/// items.
1455	pub static ISO_8859_8_I_INIT: Encoding = Encoding {
1456	name: "ISO-8859-8-I",
1457	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.iso_8859_8, `0x05D0`, `96`, `27`),
1458	};
1459
1460	/// The ISO-8859-8-I encoding.
1461	///
1462	/// This is the Hebrew part of the ISO/IEC 8859 encoding family in logical order.
1463	///
1464	/// [Index visualization](https://encoding.spec.whatwg.org/iso-8859-8.html),
1465	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/iso-8859-8-bmp.html)
1466	///
1467	/// This encoding roughly matches the Windows code page 38598. Windows decodes
1468	/// 0xAF to OVERLINE instead of MACRON and 0xFE and 0xFD to the Private Use
1469	/// Area instead of LRM and RLM. Windows decodes unassigned code points to
1470	/// the private use area.
1471	///
1472	/// This will change from `static` to `const` if Rust changes
1473	/// to make the referent of `pub const FOO: &'static Encoding`
1474	/// unique cross-crate, so don't take the address of this
1475	/// `static`.
1476	pub static ISO_8859_8_I: &'static Encoding = &ISO_8859_8_I_INIT;
1477
1478	/// The initializer for the [KOI8-R](static.KOI8_R.html) encoding.
1479	///
1480	/// For use only for taking the address of this form when
1481	/// Rust prohibits the use of the non-`_INIT` form directly,
1482	/// such as in initializers of other `static`s. If in doubt,
1483	/// use the corresponding non-`_INIT` reference-typed `static`.
1484	///
1485	/// This part of the public API will go away if Rust changes
1486	/// to make the referent of `pub const FOO: &'static Encoding`
1487	/// unique cross-crate or if Rust starts allowing static arrays
1488	/// to be initialized with `pub static FOO: &'static Encoding`
1489	/// items.
1490	pub static KOI8_R_INIT: Encoding = Encoding {
1491	name: "KOI8-R",
1492	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.koi8_r, `0x044E`, `64`, `1`),
1493	};
1494
1495	/// The KOI8-R encoding.
1496	///
1497	/// This is an encoding for Russian from [RFC 1489](https://tools.ietf.org/html/rfc1489).
1498	///
1499	/// [Index visualization](https://encoding.spec.whatwg.org/koi8-r.html),
1500	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-r-bmp.html)
1501	///
1502	/// This encoding matches the Windows code page 20866.
1503	///
1504	/// This will change from `static` to `const` if Rust changes
1505	/// to make the referent of `pub const FOO: &'static Encoding`
1506	/// unique cross-crate, so don't take the address of this
1507	/// `static`.
1508	pub static KOI8_R: &'static Encoding = &KOI8_R_INIT;
1509
1510	/// The initializer for the [KOI8-U](static.KOI8_U.html) encoding.
1511	///
1512	/// For use only for taking the address of this form when
1513	/// Rust prohibits the use of the non-`_INIT` form directly,
1514	/// such as in initializers of other `static`s. If in doubt,
1515	/// use the corresponding non-`_INIT` reference-typed `static`.
1516	///
1517	/// This part of the public API will go away if Rust changes
1518	/// to make the referent of `pub const FOO: &'static Encoding`
1519	/// unique cross-crate or if Rust starts allowing static arrays
1520	/// to be initialized with `pub static FOO: &'static Encoding`
1521	/// items.
1522	pub static KOI8_U_INIT: Encoding = Encoding {
1523	name: "KOI8-U",
1524	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.koi8_u, `0x044E`, `64`, `1`),
1525	};
1526
1527	/// The KOI8-U encoding.
1528	///
1529	/// This is an encoding for Ukrainian adapted from KOI8-R.
1530	///
1531	/// [Index visualization](https://encoding.spec.whatwg.org/koi8-u.html),
1532	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/koi8-u-bmp.html)
1533	///
1534	/// This encoding matches the Windows code page 21866.
1535	///
1536	/// This will change from `static` to `const` if Rust changes
1537	/// to make the referent of `pub const FOO: &'static Encoding`
1538	/// unique cross-crate, so don't take the address of this
1539	/// `static`.
1540	pub static KOI8_U: &'static Encoding = &KOI8_U_INIT;
1541
1542	/// The initializer for the [Shift_JIS](static.SHIFT_JIS.html) encoding.
1543	///
1544	/// For use only for taking the address of this form when
1545	/// Rust prohibits the use of the non-`_INIT` form directly,
1546	/// such as in initializers of other `static`s. If in doubt,
1547	/// use the corresponding non-`_INIT` reference-typed `static`.
1548	///
1549	/// This part of the public API will go away if Rust changes
1550	/// to make the referent of `pub const FOO: &'static Encoding`
1551	/// unique cross-crate or if Rust starts allowing static arrays
1552	/// to be initialized with `pub static FOO: &'static Encoding`
1553	/// items.
1554	pub static SHIFT_JIS_INIT: Encoding = Encoding {
1555	name: "Shift_JIS",
1556	variant: VariantEncoding::ShiftJis,
1557	};
1558
1559	/// The Shift_JIS encoding.
1560	///
1561	/// This is the Japanese encoding for Windows.
1562	///
1563	/// [Index visualization](https://encoding.spec.whatwg.org/shift_jis.html),
1564	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/shift_jis-bmp.html)
1565	///
1566	/// This encoding matches the Windows code page 932, except Windows decodes some byte
1567	/// sequences that are error per the Encoding Standard to the question mark or the
1568	/// Private Use Area and generally uses U+30FB in place of the REPLACEMENT CHARACTER.
1569	///
1570	/// This will change from `static` to `const` if Rust changes
1571	/// to make the referent of `pub const FOO: &'static Encoding`
1572	/// unique cross-crate, so don't take the address of this
1573	/// `static`.
1574	pub static SHIFT_JIS: &'static Encoding = &SHIFT_JIS_INIT;
1575
1576	/// The initializer for the [UTF-16BE](static.UTF_16BE.html) encoding.
1577	///
1578	/// For use only for taking the address of this form when
1579	/// Rust prohibits the use of the non-`_INIT` form directly,
1580	/// such as in initializers of other `static`s. If in doubt,
1581	/// use the corresponding non-`_INIT` reference-typed `static`.
1582	///
1583	/// This part of the public API will go away if Rust changes
1584	/// to make the referent of `pub const FOO: &'static Encoding`
1585	/// unique cross-crate or if Rust starts allowing static arrays
1586	/// to be initialized with `pub static FOO: &'static Encoding`
1587	/// items.
1588	pub static UTF_16BE_INIT: Encoding = Encoding {
1589	name: "UTF-16BE",
1590	variant: VariantEncoding::Utf16Be,
1591	};
1592
1593	/// The UTF-16BE encoding.
1594	///
1595	/// This decode-only encoding uses 16-bit code units due to Unicode originally
1596	/// having been designed as a 16-bit reportoire. In the absence of a byte order
1597	/// mark the big endian byte order is assumed.
1598	///
1599	/// There is no corresponding encoder in this crate or in the Encoding
1600	/// Standard. The output encoding of this encoding is UTF-8.
1601	///
1602	/// This encoding matches the Windows code page 1201.
1603	///
1604	/// This will change from `static` to `const` if Rust changes
1605	/// to make the referent of `pub const FOO: &'static Encoding`
1606	/// unique cross-crate, so don't take the address of this
1607	/// `static`.
1608	pub static UTF_16BE: &'static Encoding = &UTF_16BE_INIT;
1609
1610	/// The initializer for the [UTF-16LE](static.UTF_16LE.html) encoding.
1611	///
1612	/// For use only for taking the address of this form when
1613	/// Rust prohibits the use of the non-`_INIT` form directly,
1614	/// such as in initializers of other `static`s. If in doubt,
1615	/// use the corresponding non-`_INIT` reference-typed `static`.
1616	///
1617	/// This part of the public API will go away if Rust changes
1618	/// to make the referent of `pub const FOO: &'static Encoding`
1619	/// unique cross-crate or if Rust starts allowing static arrays
1620	/// to be initialized with `pub static FOO: &'static Encoding`
1621	/// items.
1622	pub static UTF_16LE_INIT: Encoding = Encoding {
1623	name: "UTF-16LE",
1624	variant: VariantEncoding::Utf16Le,
1625	};
1626
1627	/// The UTF-16LE encoding.
1628	///
1629	/// This decode-only encoding uses 16-bit code units due to Unicode originally
1630	/// having been designed as a 16-bit reportoire. In the absence of a byte order
1631	/// mark the little endian byte order is assumed.
1632	///
1633	/// There is no corresponding encoder in this crate or in the Encoding
1634	/// Standard. The output encoding of this encoding is UTF-8.
1635	///
1636	/// This encoding matches the Windows code page 1200.
1637	///
1638	/// This will change from `static` to `const` if Rust changes
1639	/// to make the referent of `pub const FOO: &'static Encoding`
1640	/// unique cross-crate, so don't take the address of this
1641	/// `static`.
1642	pub static UTF_16LE: &'static Encoding = &UTF_16LE_INIT;
1643
1644	/// The initializer for the [UTF-8](static.UTF_8.html) encoding.
1645	///
1646	/// For use only for taking the address of this form when
1647	/// Rust prohibits the use of the non-`_INIT` form directly,
1648	/// such as in initializers of other `static`s. If in doubt,
1649	/// use the corresponding non-`_INIT` reference-typed `static`.
1650	///
1651	/// This part of the public API will go away if Rust changes
1652	/// to make the referent of `pub const FOO: &'static Encoding`
1653	/// unique cross-crate or if Rust starts allowing static arrays
1654	/// to be initialized with `pub static FOO: &'static Encoding`
1655	/// items.
1656	pub static UTF_8_INIT: Encoding = Encoding {
1657	name: "UTF-8",
1658	variant: VariantEncoding::Utf8,
1659	};
1660
1661	/// The UTF-8 encoding.
1662	///
1663	/// This is the encoding that should be used for all new development it can
1664	/// represent all of Unicode.
1665	///
1666	/// This encoding matches the Windows code page 65001, except Windows differs
1667	/// in the number of errors generated for some erroneous byte sequences.
1668	///
1669	/// This will change from `static` to `const` if Rust changes
1670	/// to make the referent of `pub const FOO: &'static Encoding`
1671	/// unique cross-crate, so don't take the address of this
1672	/// `static`.
1673	pub static UTF_8: &'static Encoding = &UTF_8_INIT;
1674
1675	/// The initializer for the [gb18030](static.GB18030.html) encoding.
1676	///
1677	/// For use only for taking the address of this form when
1678	/// Rust prohibits the use of the non-`_INIT` form directly,
1679	/// such as in initializers of other `static`s. If in doubt,
1680	/// use the corresponding non-`_INIT` reference-typed `static`.
1681	///
1682	/// This part of the public API will go away if Rust changes
1683	/// to make the referent of `pub const FOO: &'static Encoding`
1684	/// unique cross-crate or if Rust starts allowing static arrays
1685	/// to be initialized with `pub static FOO: &'static Encoding`
1686	/// items.
1687	pub static GB18030_INIT: Encoding = Encoding {
1688	name: "gb18030",
1689	variant: VariantEncoding::Gb18030,
1690	};
1691
1692	/// The gb18030 encoding.
1693	///
1694	/// This encoding matches GB18030-2022 except the two-byte sequence 0xA3 0xA0
1695	/// maps to U+3000 for compatibility with existing Web content and the four-byte
1696	/// sequences for the non-PUA characters that got two-byte sequences still decode
1697	/// to the same non-PUA characters as in GB18030-2005. As a result, this encoding
1698	/// can represent all of Unicode except for 19 private-use characters.
1699	///
1700	/// [Index visualization for the two-byte sequences](https://encoding.spec.whatwg.org/gb18030.html),
1701	/// [Visualization of BMP coverage of the two-byte index](https://encoding.spec.whatwg.org/gb18030-bmp.html)
1702	///
1703	/// This encoding matches the Windows code page 54936.
1704	///
1705	/// This will change from `static` to `const` if Rust changes
1706	/// to make the referent of `pub const FOO: &'static Encoding`
1707	/// unique cross-crate, so don't take the address of this
1708	/// `static`.
1709	pub static GB18030: &'static Encoding = &GB18030_INIT;
1710
1711	/// The initializer for the [macintosh](static.MACINTOSH.html) encoding.
1712	///
1713	/// For use only for taking the address of this form when
1714	/// Rust prohibits the use of the non-`_INIT` form directly,
1715	/// such as in initializers of other `static`s. If in doubt,
1716	/// use the corresponding non-`_INIT` reference-typed `static`.
1717	///
1718	/// This part of the public API will go away if Rust changes
1719	/// to make the referent of `pub const FOO: &'static Encoding`
1720	/// unique cross-crate or if Rust starts allowing static arrays
1721	/// to be initialized with `pub static FOO: &'static Encoding`
1722	/// items.
1723	pub static MACINTOSH_INIT: Encoding = Encoding {
1724	name: "macintosh",
1725	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.macintosh, `0x00CD`, `106`, `3`),
1726	};
1727
1728	/// The macintosh encoding.
1729	///
1730	/// This is the MacRoman encoding from Mac OS Classic.
1731	///
1732	/// [Index visualization](https://encoding.spec.whatwg.org/macintosh.html),
1733	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/macintosh-bmp.html)
1734	///
1735	/// This encoding matches the Windows code page 10000, except Windows decodes
1736	/// 0xBD to U+2126 OHM SIGN instead of U+03A9 GREEK CAPITAL LETTER OMEGA.
1737	///
1738	/// This will change from `static` to `const` if Rust changes
1739	/// to make the referent of `pub const FOO: &'static Encoding`
1740	/// unique cross-crate, so don't take the address of this
1741	/// `static`.
1742	pub static MACINTOSH: &'static Encoding = &MACINTOSH_INIT;
1743
1744	/// The initializer for the [replacement](static.REPLACEMENT.html) encoding.
1745	///
1746	/// For use only for taking the address of this form when
1747	/// Rust prohibits the use of the non-`_INIT` form directly,
1748	/// such as in initializers of other `static`s. If in doubt,
1749	/// use the corresponding non-`_INIT` reference-typed `static`.
1750	///
1751	/// This part of the public API will go away if Rust changes
1752	/// to make the referent of `pub const FOO: &'static Encoding`
1753	/// unique cross-crate or if Rust starts allowing static arrays
1754	/// to be initialized with `pub static FOO: &'static Encoding`
1755	/// items.
1756	pub static REPLACEMENT_INIT: Encoding = Encoding {
1757	name: "replacement",
1758	variant: VariantEncoding::Replacement,
1759	};
1760
1761	/// The replacement encoding.
1762	///
1763	/// This decode-only encoding decodes all non-zero-length streams to a single
1764	/// REPLACEMENT CHARACTER. Its purpose is to avoid the use of an
1765	/// ASCII-compatible fallback encoding (typically windows-1252) for some
1766	/// encodings that are no longer supported by the Web Platform and that
1767	/// would be dangerous to treat as ASCII-compatible.
1768	///
1769	/// There is no corresponding encoder. The output encoding of this encoding
1770	/// is UTF-8.
1771	///
1772	/// This encoding does not have a Windows code page number.
1773	///
1774	/// This will change from `static` to `const` if Rust changes
1775	/// to make the referent of `pub const FOO: &'static Encoding`
1776	/// unique cross-crate, so don't take the address of this
1777	/// `static`.
1778	pub static REPLACEMENT: &'static Encoding = &REPLACEMENT_INIT;
1779
1780	/// The initializer for the [windows-1250](static.WINDOWS_1250.html) encoding.
1781	///
1782	/// For use only for taking the address of this form when
1783	/// Rust prohibits the use of the non-`_INIT` form directly,
1784	/// such as in initializers of other `static`s. If in doubt,
1785	/// use the corresponding non-`_INIT` reference-typed `static`.
1786	///
1787	/// This part of the public API will go away if Rust changes
1788	/// to make the referent of `pub const FOO: &'static Encoding`
1789	/// unique cross-crate or if Rust starts allowing static arrays
1790	/// to be initialized with `pub static FOO: &'static Encoding`
1791	/// items.
1792	pub static WINDOWS_1250_INIT: Encoding = Encoding {
1793	name: "windows-1250",
1794	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1250, `0x00DC`, `92`, `2`),
1795	};
1796
1797	/// The windows-1250 encoding.
1798	///
1799	/// This is the Central European encoding for Windows.
1800	///
1801	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1250.html),
1802	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1250-bmp.html)
1803	///
1804	/// This encoding matches the Windows code page 1250.
1805	///
1806	/// This will change from `static` to `const` if Rust changes
1807	/// to make the referent of `pub const FOO: &'static Encoding`
1808	/// unique cross-crate, so don't take the address of this
1809	/// `static`.
1810	pub static WINDOWS_1250: &'static Encoding = &WINDOWS_1250_INIT;
1811
1812	/// The initializer for the [windows-1251](static.WINDOWS_1251.html) encoding.
1813	///
1814	/// For use only for taking the address of this form when
1815	/// Rust prohibits the use of the non-`_INIT` form directly,
1816	/// such as in initializers of other `static`s. If in doubt,
1817	/// use the corresponding non-`_INIT` reference-typed `static`.
1818	///
1819	/// This part of the public API will go away if Rust changes
1820	/// to make the referent of `pub const FOO: &'static Encoding`
1821	/// unique cross-crate or if Rust starts allowing static arrays
1822	/// to be initialized with `pub static FOO: &'static Encoding`
1823	/// items.
1824	pub static WINDOWS_1251_INIT: Encoding = Encoding {
1825	name: "windows-1251",
1826	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1251, `0x0410`, `64`, `64`),
1827	};
1828
1829	/// The windows-1251 encoding.
1830	///
1831	/// This is the Cyrillic encoding for Windows.
1832	///
1833	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1251.html),
1834	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1251-bmp.html)
1835	///
1836	/// This encoding matches the Windows code page 1251.
1837	///
1838	/// This will change from `static` to `const` if Rust changes
1839	/// to make the referent of `pub const FOO: &'static Encoding`
1840	/// unique cross-crate, so don't take the address of this
1841	/// `static`.
1842	pub static WINDOWS_1251: &'static Encoding = &WINDOWS_1251_INIT;
1843
1844	/// The initializer for the [windows-1252](static.WINDOWS_1252.html) encoding.
1845	///
1846	/// For use only for taking the address of this form when
1847	/// Rust prohibits the use of the non-`_INIT` form directly,
1848	/// such as in initializers of other `static`s. If in doubt,
1849	/// use the corresponding non-`_INIT` reference-typed `static`.
1850	///
1851	/// This part of the public API will go away if Rust changes
1852	/// to make the referent of `pub const FOO: &'static Encoding`
1853	/// unique cross-crate or if Rust starts allowing static arrays
1854	/// to be initialized with `pub static FOO: &'static Encoding`
1855	/// items.
1856	pub static WINDOWS_1252_INIT: Encoding = Encoding {
1857	name: "windows-1252",
1858	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1252, `0x00A0`, `32`, `96`),
1859	};
1860
1861	/// The windows-1252 encoding.
1862	///
1863	/// This is the Western encoding for Windows. It is an extension of ISO-8859-1,
1864	/// which is known as Latin 1.
1865	///
1866	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1252.html),
1867	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1252-bmp.html)
1868	///
1869	/// This encoding matches the Windows code page 1252.
1870	///
1871	/// This will change from `static` to `const` if Rust changes
1872	/// to make the referent of `pub const FOO: &'static Encoding`
1873	/// unique cross-crate, so don't take the address of this
1874	/// `static`.
1875	pub static WINDOWS_1252: &'static Encoding = &WINDOWS_1252_INIT;
1876
1877	/// The initializer for the [windows-1253](static.WINDOWS_1253.html) encoding.
1878	///
1879	/// For use only for taking the address of this form when
1880	/// Rust prohibits the use of the non-`_INIT` form directly,
1881	/// such as in initializers of other `static`s. If in doubt,
1882	/// use the corresponding non-`_INIT` reference-typed `static`.
1883	///
1884	/// This part of the public API will go away if Rust changes
1885	/// to make the referent of `pub const FOO: &'static Encoding`
1886	/// unique cross-crate or if Rust starts allowing static arrays
1887	/// to be initialized with `pub static FOO: &'static Encoding`
1888	/// items.
1889	pub static WINDOWS_1253_INIT: Encoding = Encoding {
1890	name: "windows-1253",
1891	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1253, `0x03A3`, `83`, `44`),
1892	};
1893
1894	/// The windows-1253 encoding.
1895	///
1896	/// This is the Greek encoding for Windows. It is mostly an extension of
1897	/// ISO-8859-7, but U+0386 is mapped to a different byte.
1898	///
1899	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1253.html),
1900	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1253-bmp.html)
1901	///
1902	/// This encoding matches the Windows code page 1253, except Windows decodes
1903	/// unassigned code points to the Private Use Area of Unicode.
1904	///
1905	/// This will change from `static` to `const` if Rust changes
1906	/// to make the referent of `pub const FOO: &'static Encoding`
1907	/// unique cross-crate, so don't take the address of this
1908	/// `static`.
1909	pub static WINDOWS_1253: &'static Encoding = &WINDOWS_1253_INIT;
1910
1911	/// The initializer for the [windows-1254](static.WINDOWS_1254.html) encoding.
1912	///
1913	/// For use only for taking the address of this form when
1914	/// Rust prohibits the use of the non-`_INIT` form directly,
1915	/// such as in initializers of other `static`s. If in doubt,
1916	/// use the corresponding non-`_INIT` reference-typed `static`.
1917	///
1918	/// This part of the public API will go away if Rust changes
1919	/// to make the referent of `pub const FOO: &'static Encoding`
1920	/// unique cross-crate or if Rust starts allowing static arrays
1921	/// to be initialized with `pub static FOO: &'static Encoding`
1922	/// items.
1923	pub static WINDOWS_1254_INIT: Encoding = Encoding {
1924	name: "windows-1254",
1925	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1254, `0x00DF`, `95`, `17`),
1926	};
1927
1928	/// The windows-1254 encoding.
1929	///
1930	/// This is the Turkish encoding for Windows. It is an extension of ISO-8859-9,
1931	/// which is known as Latin 5.
1932	///
1933	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1254.html),
1934	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1254-bmp.html)
1935	///
1936	/// This encoding matches the Windows code page 1254.
1937	///
1938	/// This will change from `static` to `const` if Rust changes
1939	/// to make the referent of `pub const FOO: &'static Encoding`
1940	/// unique cross-crate, so don't take the address of this
1941	/// `static`.
1942	pub static WINDOWS_1254: &'static Encoding = &WINDOWS_1254_INIT;
1943
1944	/// The initializer for the [windows-1255](static.WINDOWS_1255.html) encoding.
1945	///
1946	/// For use only for taking the address of this form when
1947	/// Rust prohibits the use of the non-`_INIT` form directly,
1948	/// such as in initializers of other `static`s. If in doubt,
1949	/// use the corresponding non-`_INIT` reference-typed `static`.
1950	///
1951	/// This part of the public API will go away if Rust changes
1952	/// to make the referent of `pub const FOO: &'static Encoding`
1953	/// unique cross-crate or if Rust starts allowing static arrays
1954	/// to be initialized with `pub static FOO: &'static Encoding`
1955	/// items.
1956	pub static WINDOWS_1255_INIT: Encoding = Encoding {
1957	name: "windows-1255",
1958	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1255, `0x05D0`, `96`, `27`),
1959	};
1960
1961	/// The windows-1255 encoding.
1962	///
1963	/// This is the Hebrew encoding for Windows. It is an extension of ISO-8859-8-I,
1964	/// except for a currency sign swap.
1965	///
1966	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1255.html),
1967	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1255-bmp.html)
1968	///
1969	/// This encoding matches the Windows code page 1255, except Windows decodes
1970	/// unassigned code points to the Private Use Area of Unicode.
1971	///
1972	/// This will change from `static` to `const` if Rust changes
1973	/// to make the referent of `pub const FOO: &'static Encoding`
1974	/// unique cross-crate, so don't take the address of this
1975	/// `static`.
1976	pub static WINDOWS_1255: &'static Encoding = &WINDOWS_1255_INIT;
1977
1978	/// The initializer for the [windows-1256](static.WINDOWS_1256.html) encoding.
1979	///
1980	/// For use only for taking the address of this form when
1981	/// Rust prohibits the use of the non-`_INIT` form directly,
1982	/// such as in initializers of other `static`s. If in doubt,
1983	/// use the corresponding non-`_INIT` reference-typed `static`.
1984	///
1985	/// This part of the public API will go away if Rust changes
1986	/// to make the referent of `pub const FOO: &'static Encoding`
1987	/// unique cross-crate or if Rust starts allowing static arrays
1988	/// to be initialized with `pub static FOO: &'static Encoding`
1989	/// items.
1990	pub static WINDOWS_1256_INIT: Encoding = Encoding {
1991	name: "windows-1256",
1992	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1256, `0x0621`, `65`, `22`),
1993	};
1994
1995	/// The windows-1256 encoding.
1996	///
1997	/// This is the Arabic encoding for Windows.
1998	///
1999	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1256.html),
2000	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1256-bmp.html)
2001	///
2002	/// This encoding matches the Windows code page 1256.
2003	///
2004	/// This will change from `static` to `const` if Rust changes
2005	/// to make the referent of `pub const FOO: &'static Encoding`
2006	/// unique cross-crate, so don't take the address of this
2007	/// `static`.
2008	pub static WINDOWS_1256: &'static Encoding = &WINDOWS_1256_INIT;
2009
2010	/// The initializer for the [windows-1257](static.WINDOWS_1257.html) encoding.
2011	///
2012	/// For use only for taking the address of this form when
2013	/// Rust prohibits the use of the non-`_INIT` form directly,
2014	/// such as in initializers of other `static`s. If in doubt,
2015	/// use the corresponding non-`_INIT` reference-typed `static`.
2016	///
2017	/// This part of the public API will go away if Rust changes
2018	/// to make the referent of `pub const FOO: &'static Encoding`
2019	/// unique cross-crate or if Rust starts allowing static arrays
2020	/// to be initialized with `pub static FOO: &'static Encoding`
2021	/// items.
2022	pub static WINDOWS_1257_INIT: Encoding = Encoding {
2023	name: "windows-1257",
2024	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1257, `0x00DF`, `95`, `1`),
2025	};
2026
2027	/// The windows-1257 encoding.
2028	///
2029	/// This is the Baltic encoding for Windows.
2030	///
2031	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1257.html),
2032	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1257-bmp.html)
2033	///
2034	/// This encoding matches the Windows code page 1257, except Windows decodes
2035	/// unassigned code points to the Private Use Area of Unicode.
2036	///
2037	/// This will change from `static` to `const` if Rust changes
2038	/// to make the referent of `pub const FOO: &'static Encoding`
2039	/// unique cross-crate, so don't take the address of this
2040	/// `static`.
2041	pub static WINDOWS_1257: &'static Encoding = &WINDOWS_1257_INIT;
2042
2043	/// The initializer for the [windows-1258](static.WINDOWS_1258.html) encoding.
2044	///
2045	/// For use only for taking the address of this form when
2046	/// Rust prohibits the use of the non-`_INIT` form directly,
2047	/// such as in initializers of other `static`s. If in doubt,
2048	/// use the corresponding non-`_INIT` reference-typed `static`.
2049	///
2050	/// This part of the public API will go away if Rust changes
2051	/// to make the referent of `pub const FOO: &'static Encoding`
2052	/// unique cross-crate or if Rust starts allowing static arrays
2053	/// to be initialized with `pub static FOO: &'static Encoding`
2054	/// items.
2055	pub static WINDOWS_1258_INIT: Encoding = Encoding {
2056	name: "windows-1258",
2057	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_1258, `0x00DF`, `95`, `4`),
2058	};
2059
2060	/// The windows-1258 encoding.
2061	///
2062	/// This is the Vietnamese encoding for Windows.
2063	///
2064	/// [Index visualization](https://encoding.spec.whatwg.org/windows-1258.html),
2065	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-1258-bmp.html)
2066	///
2067	/// This encoding matches the Windows code page 1258 when used in the
2068	/// non-normalizing mode. Unlike with the other single-byte encodings, the
2069	/// result of decoding is not necessarily in Normalization Form C. On the
2070	/// other hand, input in the Normalization Form C is not encoded without
2071	/// replacement. In general, it's a bad idea to encode to encodings other
2072	/// than UTF-8, but this encoding is especially hazardous to encode to.
2073	///
2074	/// This will change from `static` to `const` if Rust changes
2075	/// to make the referent of `pub const FOO: &'static Encoding`
2076	/// unique cross-crate, so don't take the address of this
2077	/// `static`.
2078	pub static WINDOWS_1258: &'static Encoding = &WINDOWS_1258_INIT;
2079
2080	/// The initializer for the [windows-874](static.WINDOWS_874.html) encoding.
2081	///
2082	/// For use only for taking the address of this form when
2083	/// Rust prohibits the use of the non-`_INIT` form directly,
2084	/// such as in initializers of other `static`s. If in doubt,
2085	/// use the corresponding non-`_INIT` reference-typed `static`.
2086	///
2087	/// This part of the public API will go away if Rust changes
2088	/// to make the referent of `pub const FOO: &'static Encoding`
2089	/// unique cross-crate or if Rust starts allowing static arrays
2090	/// to be initialized with `pub static FOO: &'static Encoding`
2091	/// items.
2092	pub static WINDOWS_874_INIT: Encoding = Encoding {
2093	name: "windows-874",
2094	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.windows_874, `0x0E01`, `33`, `58`),
2095	};
2096
2097	/// The windows-874 encoding.
2098	///
2099	/// This is the Thai encoding for Windows. It is an extension of TIS-620 / ISO-8859-11.
2100	///
2101	/// [Index visualization](https://encoding.spec.whatwg.org/windows-874.html),
2102	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/windows-874-bmp.html)
2103	///
2104	/// This encoding matches the Windows code page 874, except Windows decodes
2105	/// unassigned code points to the Private Use Area of Unicode.
2106	///
2107	/// This will change from `static` to `const` if Rust changes
2108	/// to make the referent of `pub const FOO: &'static Encoding`
2109	/// unique cross-crate, so don't take the address of this
2110	/// `static`.
2111	pub static WINDOWS_874: &'static Encoding = &WINDOWS_874_INIT;
2112
2113	/// The initializer for the [x-mac-cyrillic](static.X_MAC_CYRILLIC.html) encoding.
2114	///
2115	/// For use only for taking the address of this form when
2116	/// Rust prohibits the use of the non-`_INIT` form directly,
2117	/// such as in initializers of other `static`s. If in doubt,
2118	/// use the corresponding non-`_INIT` reference-typed `static`.
2119	///
2120	/// This part of the public API will go away if Rust changes
2121	/// to make the referent of `pub const FOO: &'static Encoding`
2122	/// unique cross-crate or if Rust starts allowing static arrays
2123	/// to be initialized with `pub static FOO: &'static Encoding`
2124	/// items.
2125	pub static X_MAC_CYRILLIC_INIT: Encoding = Encoding {
2126	name: "x-mac-cyrillic",
2127	variant: VariantEncoding::SingleByte(&data::SINGLE_BYTE_DATA.x_mac_cyrillic, `0x0430`, `96`, `31`),
2128	};
2129
2130	/// The x-mac-cyrillic encoding.
2131	///
2132	/// This is the MacUkrainian encoding from Mac OS Classic.
2133	///
2134	/// [Index visualization](https://encoding.spec.whatwg.org/x-mac-cyrillic.html),
2135	/// [Visualization of BMP coverage](https://encoding.spec.whatwg.org/x-mac-cyrillic-bmp.html)
2136	///
2137	/// This encoding matches the Windows code page 10017.
2138	///
2139	/// This will change from `static` to `const` if Rust changes
2140	/// to make the referent of `pub const FOO: &'static Encoding`
2141	/// unique cross-crate, so don't take the address of this
2142	/// `static`.
2143	pub static X_MAC_CYRILLIC: &'static Encoding = &X_MAC_CYRILLIC_INIT;
2144
2145	/// The initializer for the [x-user-defined](static.X_USER_DEFINED.html) encoding.
2146	///
2147	/// For use only for taking the address of this form when
2148	/// Rust prohibits the use of the non-`_INIT` form directly,
2149	/// such as in initializers of other `static`s. If in doubt,
2150	/// use the corresponding non-`_INIT` reference-typed `static`.
2151	///
2152	/// This part of the public API will go away if Rust changes
2153	/// to make the referent of `pub const FOO: &'static Encoding`
2154	/// unique cross-crate or if Rust starts allowing static arrays
2155	/// to be initialized with `pub static FOO: &'static Encoding`
2156	/// items.
2157	pub static X_USER_DEFINED_INIT: Encoding = Encoding {
2158	name: "x-user-defined",
2159	variant: VariantEncoding::UserDefined,
2160	};
2161
2162	/// The x-user-defined encoding.
2163	///
2164	/// This encoding offsets the non-ASCII bytes by `0xF700` thereby decoding
2165	/// them to the Private Use Area of Unicode. It was used for loading binary
2166	/// data into a JavaScript string using `XMLHttpRequest` before XHR supported
2167	/// the `"arraybuffer"` response type.
2168	///
2169	/// This encoding does not have a Windows code page number.
2170	///
2171	/// This will change from `static` to `const` if Rust changes
2172	/// to make the referent of `pub const FOO: &'static Encoding`
2173	/// unique cross-crate, so don't take the address of this
2174	/// `static`.
2175	pub static X_USER_DEFINED: &'static Encoding = &X_USER_DEFINED_INIT;
2176
2177	static LABELS_SORTED: [&'static str; `228`] = [
2178	"l1",
2179	"l2",
2180	"l3",
2181	"l4",
2182	"l5",
2183	"l6",
2184	"l9",
2185	"866",
2186	"mac",
2187	"koi",
2188	"gbk",
2189	"big5",
2190	"utf8",
2191	"koi8",
2192	"sjis",
2193	"ucs-2",
2194	"ms932",
2195	"cp866",
2196	"utf-8",
2197	"cp819",
2198	"ascii",
2199	"x-gbk",
2200	"greek",
2201	"cp1250",
2202	"cp1251",
2203	"latin1",
2204	"gb2312",
2205	"cp1252",
2206	"latin2",
2207	"cp1253",
2208	"latin3",
2209	"cp1254",
2210	"latin4",
2211	"cp1255",
2212	"csbig5",
2213	"latin5",
2214	"utf-16",
2215	"cp1256",
2216	"ibm866",
2217	"latin6",
2218	"cp1257",
2219	"cp1258",
2220	"greek8",
2221	"ibm819",
2222	"arabic",
2223	"visual",
2224	"korean",
2225	"euc-jp",
2226	"koi8-r",
2227	"koi8_r",
2228	"euc-kr",
2229	"x-sjis",
2230	"koi8-u",
2231	"hebrew",
2232	"tis-620",
2233	"gb18030",
2234	"ksc5601",
2235	"gb_2312",
2236	"dos-874",
2237	"cn-big5",
2238	"unicode",
2239	"chinese",
2240	"logical",
2241	"cskoi8r",
2242	"cseuckr",
2243	"koi8-ru",
2244	"x-cp1250",
2245	"ksc_5601",
2246	"x-cp1251",
2247	"iso88591",
2248	"csgb2312",
2249	"x-cp1252",
2250	"iso88592",
2251	"x-cp1253",
2252	"iso88593",
2253	"ecma-114",
2254	"x-cp1254",
2255	"iso88594",
2256	"x-cp1255",
2257	"iso88595",
2258	"x-x-big5",
2259	"x-cp1256",
2260	"csibm866",
2261	"iso88596",
2262	"x-cp1257",
2263	"iso88597",
2264	"asmo-708",
2265	"ecma-118",
2266	"elot_928",
2267	"x-cp1258",
2268	"iso88598",
2269	"iso88599",
2270	"cyrillic",
2271	"utf-16be",
2272	"utf-16le",
2273	"us-ascii",
2274	"ms_kanji",
2275	"x-euc-jp",
2276	"iso885910",
2277	"iso8859-1",
2278	"iso885911",
2279	"iso8859-2",
2280	"iso8859-3",
2281	"iso885913",
2282	"iso8859-4",
2283	"iso885914",
2284	"iso8859-5",
2285	"iso885915",
2286	"iso8859-6",
2287	"iso8859-7",
2288	"iso8859-8",
2289	"iso-ir-58",
2290	"iso8859-9",
2291	"csunicode",
2292	"macintosh",
2293	"shift-jis",
2294	"shift_jis",
2295	"iso-ir-100",
2296	"iso8859-10",
2297	"iso-ir-110",
2298	"gb_2312-80",
2299	"iso-8859-1",
2300	"iso_8859-1",
2301	"iso-ir-101",
2302	"iso8859-11",
2303	"iso-8859-2",
2304	"iso_8859-2",
2305	"hz-gb-2312",
2306	"iso-8859-3",
2307	"iso_8859-3",
2308	"iso8859-13",
2309	"iso-8859-4",
2310	"iso_8859-4",
2311	"iso8859-14",
2312	"iso-ir-144",
2313	"iso-8859-5",
2314	"iso_8859-5",
2315	"iso8859-15",
2316	"iso-8859-6",
2317	"iso_8859-6",
2318	"iso-ir-126",
2319	"iso-8859-7",
2320	"iso_8859-7",
2321	"iso-ir-127",
2322	"iso-ir-157",
2323	"iso-8859-8",
2324	"iso_8859-8",
2325	"iso-ir-138",
2326	"iso-ir-148",
2327	"iso-8859-9",
2328	"iso_8859-9",
2329	"iso-ir-109",
2330	"iso-ir-149",
2331	"big5-hkscs",
2332	"csshiftjis",
2333	"iso-8859-10",
2334	"iso-8859-11",
2335	"csisolatin1",
2336	"csisolatin2",
2337	"iso-8859-13",
2338	"csisolatin3",
2339	"iso-8859-14",
2340	"windows-874",
2341	"csisolatin4",
2342	"iso-8859-15",
2343	"iso_8859-15",
2344	"csisolatin5",
2345	"iso-8859-16",
2346	"csisolatin6",
2347	"windows-949",
2348	"csisolatin9",
2349	"csiso88596e",
2350	"csiso88598e",
2351	"unicodefffe",
2352	"unicodefeff",
2353	"csmacintosh",
2354	"csiso88596i",
2355	"csiso88598i",
2356	"windows-31j",
2357	"x-mac-roman",
2358	"iso-2022-cn",
2359	"iso-2022-jp",
2360	"csiso2022jp",
2361	"iso-2022-kr",
2362	"csiso2022kr",
2363	"replacement",
2364	"windows-1250",
2365	"windows-1251",
2366	"windows-1252",
2367	"windows-1253",
2368	"windows-1254",
2369	"windows-1255",
2370	"windows-1256",
2371	"windows-1257",
2372	"windows-1258",
2373	"iso-8859-6-e",
2374	"iso-8859-8-e",
2375	"iso-8859-6-i",
2376	"iso-8859-8-i",
2377	"sun_eu_greek",
2378	"csksc56011987",
2379	"unicode20utf8",
2380	"unicode11utf8",
2381	"ks_c_5601-1987",
2382	"ansi_x3.4-1968",
2383	"ks_c_5601-1989",
2384	"x-mac-cyrillic",
2385	"x-user-defined",
2386	"csiso58gb231280",
2387	"iso-10646-ucs-2",
2388	"iso_8859-1:1987",
2389	"iso_8859-2:1987",
2390	"iso_8859-6:1987",
2391	"iso_8859-7:1987",
2392	"iso_8859-3:1988",
2393	"iso_8859-4:1988",
2394	"iso_8859-5:1988",
2395	"iso_8859-8:1988",
2396	"x-unicode20utf8",
2397	"iso_8859-9:1989",
2398	"csisolatingreek",
2399	"x-mac-ukrainian",
2400	"iso-2022-cn-ext",
2401	"csisolatinarabic",
2402	"csisolatinhebrew",
2403	"unicode-1-1-utf-8",
2404	"csisolatincyrillic",
2405	"cseucpkdfmtjapanese",
2406	];
2407
2408	static ENCODINGS_IN_LABEL_SORT: [&'static Encoding; `228`] = [
2409	&WINDOWS_1252_INIT,
2410	&ISO_8859_2_INIT,
2411	&ISO_8859_3_INIT,
2412	&ISO_8859_4_INIT,
2413	&WINDOWS_1254_INIT,
2414	&ISO_8859_10_INIT,
2415	&ISO_8859_15_INIT,
2416	&IBM866_INIT,
2417	&MACINTOSH_INIT,
2418	&KOI8_R_INIT,
2419	&GBK_INIT,
2420	&BIG5_INIT,
2421	&UTF_8_INIT,
2422	&KOI8_R_INIT,
2423	&SHIFT_JIS_INIT,
2424	&UTF_16LE_INIT,
2425	&SHIFT_JIS_INIT,
2426	&IBM866_INIT,
2427	&UTF_8_INIT,
2428	&WINDOWS_1252_INIT,
2429	&WINDOWS_1252_INIT,
2430	&GBK_INIT,
2431	&ISO_8859_7_INIT,
2432	&WINDOWS_1250_INIT,
2433	&WINDOWS_1251_INIT,
2434	&WINDOWS_1252_INIT,
2435	&GBK_INIT,
2436	&WINDOWS_1252_INIT,
2437	&ISO_8859_2_INIT,
2438	&WINDOWS_1253_INIT,
2439	&ISO_8859_3_INIT,
2440	&WINDOWS_1254_INIT,
2441	&ISO_8859_4_INIT,
2442	&WINDOWS_1255_INIT,
2443	&BIG5_INIT,
2444	&WINDOWS_1254_INIT,
2445	&UTF_16LE_INIT,
2446	&WINDOWS_1256_INIT,
2447	&IBM866_INIT,
2448	&ISO_8859_10_INIT,
2449	&WINDOWS_1257_INIT,
2450	&WINDOWS_1258_INIT,
2451	&ISO_8859_7_INIT,
2452	&WINDOWS_1252_INIT,
2453	&ISO_8859_6_INIT,
2454	&ISO_8859_8_INIT,
2455	&EUC_KR_INIT,
2456	&EUC_JP_INIT,
2457	&KOI8_R_INIT,
2458	&KOI8_R_INIT,
2459	&EUC_KR_INIT,
2460	&SHIFT_JIS_INIT,
2461	&KOI8_U_INIT,
2462	&ISO_8859_8_INIT,
2463	&WINDOWS_874_INIT,
2464	&GB18030_INIT,
2465	&EUC_KR_INIT,
2466	&GBK_INIT,
2467	&WINDOWS_874_INIT,
2468	&BIG5_INIT,
2469	&UTF_16LE_INIT,
2470	&GBK_INIT,
2471	&ISO_8859_8_I_INIT,
2472	&KOI8_R_INIT,
2473	&EUC_KR_INIT,
2474	&KOI8_U_INIT,
2475	&WINDOWS_1250_INIT,
2476	&EUC_KR_INIT,
2477	&WINDOWS_1251_INIT,
2478	&WINDOWS_1252_INIT,
2479	&GBK_INIT,
2480	&WINDOWS_1252_INIT,
2481	&ISO_8859_2_INIT,
2482	&WINDOWS_1253_INIT,
2483	&ISO_8859_3_INIT,
2484	&ISO_8859_6_INIT,
2485	&WINDOWS_1254_INIT,
2486	&ISO_8859_4_INIT,
2487	&WINDOWS_1255_INIT,
2488	&ISO_8859_5_INIT,
2489	&BIG5_INIT,
2490	&WINDOWS_1256_INIT,
2491	&IBM866_INIT,
2492	&ISO_8859_6_INIT,
2493	&WINDOWS_1257_INIT,
2494	&ISO_8859_7_INIT,
2495	&ISO_8859_6_INIT,
2496	&ISO_8859_7_INIT,
2497	&ISO_8859_7_INIT,
2498	&WINDOWS_1258_INIT,
2499	&ISO_8859_8_INIT,
2500	&WINDOWS_1254_INIT,
2501	&ISO_8859_5_INIT,
2502	&UTF_16BE_INIT,
2503	&UTF_16LE_INIT,
2504	&WINDOWS_1252_INIT,
2505	&SHIFT_JIS_INIT,
2506	&EUC_JP_INIT,
2507	&ISO_8859_10_INIT,
2508	&WINDOWS_1252_INIT,
2509	&WINDOWS_874_INIT,
2510	&ISO_8859_2_INIT,
2511	&ISO_8859_3_INIT,
2512	&ISO_8859_13_INIT,
2513	&ISO_8859_4_INIT,
2514	&ISO_8859_14_INIT,
2515	&ISO_8859_5_INIT,
2516	&ISO_8859_15_INIT,
2517	&ISO_8859_6_INIT,
2518	&ISO_8859_7_INIT,
2519	&ISO_8859_8_INIT,
2520	&GBK_INIT,
2521	&WINDOWS_1254_INIT,
2522	&UTF_16LE_INIT,
2523	&MACINTOSH_INIT,
2524	&SHIFT_JIS_INIT,
2525	&SHIFT_JIS_INIT,
2526	&WINDOWS_1252_INIT,
2527	&ISO_8859_10_INIT,
2528	&ISO_8859_4_INIT,
2529	&GBK_INIT,
2530	&WINDOWS_1252_INIT,
2531	&WINDOWS_1252_INIT,
2532	&ISO_8859_2_INIT,
2533	&WINDOWS_874_INIT,
2534	&ISO_8859_2_INIT,
2535	&ISO_8859_2_INIT,
2536	&REPLACEMENT_INIT,
2537	&ISO_8859_3_INIT,
2538	&ISO_8859_3_INIT,
2539	&ISO_8859_13_INIT,
2540	&ISO_8859_4_INIT,
2541	&ISO_8859_4_INIT,
2542	&ISO_8859_14_INIT,
2543	&ISO_8859_5_INIT,
2544	&ISO_8859_5_INIT,
2545	&ISO_8859_5_INIT,
2546	&ISO_8859_15_INIT,
2547	&ISO_8859_6_INIT,
2548	&ISO_8859_6_INIT,
2549	&ISO_8859_7_INIT,
2550	&ISO_8859_7_INIT,
2551	&ISO_8859_7_INIT,
2552	&ISO_8859_6_INIT,
2553	&ISO_8859_10_INIT,
2554	&ISO_8859_8_INIT,
2555	&ISO_8859_8_INIT,
2556	&ISO_8859_8_INIT,
2557	&WINDOWS_1254_INIT,
2558	&WINDOWS_1254_INIT,
2559	&WINDOWS_1254_INIT,
2560	&ISO_8859_3_INIT,
2561	&EUC_KR_INIT,
2562	&BIG5_INIT,
2563	&SHIFT_JIS_INIT,
2564	&ISO_8859_10_INIT,
2565	&WINDOWS_874_INIT,
2566	&WINDOWS_1252_INIT,
2567	&ISO_8859_2_INIT,
2568	&ISO_8859_13_INIT,
2569	&ISO_8859_3_INIT,
2570	&ISO_8859_14_INIT,
2571	&WINDOWS_874_INIT,
2572	&ISO_8859_4_INIT,
2573	&ISO_8859_15_INIT,
2574	&ISO_8859_15_INIT,
2575	&WINDOWS_1254_INIT,
2576	&ISO_8859_16_INIT,
2577	&ISO_8859_10_INIT,
2578	&EUC_KR_INIT,
2579	&ISO_8859_15_INIT,
2580	&ISO_8859_6_INIT,
2581	&ISO_8859_8_INIT,
2582	&UTF_16BE_INIT,
2583	&UTF_16LE_INIT,
2584	&MACINTOSH_INIT,
2585	&ISO_8859_6_INIT,
2586	&ISO_8859_8_I_INIT,
2587	&SHIFT_JIS_INIT,
2588	&MACINTOSH_INIT,
2589	&REPLACEMENT_INIT,
2590	&ISO_2022_JP_INIT,
2591	&ISO_2022_JP_INIT,
2592	&REPLACEMENT_INIT,
2593	&REPLACEMENT_INIT,
2594	&REPLACEMENT_INIT,
2595	&WINDOWS_1250_INIT,
2596	&WINDOWS_1251_INIT,
2597	&WINDOWS_1252_INIT,
2598	&WINDOWS_1253_INIT,
2599	&WINDOWS_1254_INIT,
2600	&WINDOWS_1255_INIT,
2601	&WINDOWS_1256_INIT,
2602	&WINDOWS_1257_INIT,
2603	&WINDOWS_1258_INIT,
2604	&ISO_8859_6_INIT,
2605	&ISO_8859_8_INIT,
2606	&ISO_8859_6_INIT,
2607	&ISO_8859_8_I_INIT,
2608	&ISO_8859_7_INIT,
2609	&EUC_KR_INIT,
2610	&UTF_8_INIT,
2611	&UTF_8_INIT,
2612	&EUC_KR_INIT,
2613	&WINDOWS_1252_INIT,
2614	&EUC_KR_INIT,
2615	&X_MAC_CYRILLIC_INIT,
2616	&X_USER_DEFINED_INIT,
2617	&GBK_INIT,
2618	&UTF_16LE_INIT,
2619	&WINDOWS_1252_INIT,
2620	&ISO_8859_2_INIT,
2621	&ISO_8859_6_INIT,
2622	&ISO_8859_7_INIT,
2623	&ISO_8859_3_INIT,
2624	&ISO_8859_4_INIT,
2625	&ISO_8859_5_INIT,
2626	&ISO_8859_8_INIT,
2627	&UTF_8_INIT,
2628	&WINDOWS_1254_INIT,
2629	&ISO_8859_7_INIT,
2630	&X_MAC_CYRILLIC_INIT,
2631	&REPLACEMENT_INIT,
2632	&ISO_8859_6_INIT,
2633	&ISO_8859_8_INIT,
2634	&UTF_8_INIT,
2635	&ISO_8859_5_INIT,
2636	&EUC_JP_INIT,
2637	];
2638
2639	// END GENERATED CODE
2640
2641	/// An encoding as defined in the [Encoding Standard][1].
2642	///
2643	/// An _encoding_ defines a mapping from a `u8` sequence to a `char` sequence
2644	/// and, in most cases, vice versa. Each encoding has a name, an output
2645	/// encoding, and one or more labels.
2646	///
2647	/// _Labels_ are ASCII-case-insensitive strings that are used to identify an
2648	/// encoding in formats and protocols. The _name_ of the encoding is the
2649	/// preferred label in the case appropriate for returning from the
2650	/// [`characterSet`][2] property of the `Document` DOM interface.
2651	///
2652	/// The _output encoding_ is the encoding used for form submission and URL
2653	/// parsing on Web pages in the encoding. This is UTF-8 for the replacement,
2654	/// UTF-16LE and UTF-16BE encodings and the encoding itself for other
2655	/// encodings.
2656	///
2657	/// [1]: https://encoding.spec.whatwg.org/
2658	/// [2]: https://dom.spec.whatwg.org/#dom-document-characterset
2659	///
2660	/// # Streaming vs. Non-Streaming
2661	///
2662	/// When you have the entire input in a single buffer, you can use the
2663	/// methods [`decode()`][3], [`decode_with_bom_removal()`][3],
2664	/// [`decode_without_bom_handling()`][5],
2665	/// [`decode_without_bom_handling_and_without_replacement()`][6] and
2666	/// [`encode()`][7]. (These methods are available to Rust callers only and are
2667	/// not available in the C API.) Unlike the rest of the API available to Rust,
2668	/// these methods perform heap allocations. You should the `Decoder` and
2669	/// `Encoder` objects when your input is split into multiple buffers or when
2670	/// you want to control the allocation of the output buffers.
2671	///
2672	/// [3]: #method.decode
2673	/// [4]: #method.decode_with_bom_removal
2674	/// [5]: #method.decode_without_bom_handling
2675	/// [6]: #method.decode_without_bom_handling_and_without_replacement
2676	/// [7]: #method.encode
2677	///
2678	/// # Instances
2679	///
2680	/// All instances of `Encoding` are statically allocated and have the `'static`
2681	/// lifetime. There is precisely one unique `Encoding` instance for each
2682	/// encoding defined in the Encoding Standard.
2683	///
2684	/// To obtain a reference to a particular encoding whose identity you know at
2685	/// compile time, use a `static` that refers to encoding. There is a `static`
2686	/// for each encoding. The `static`s are named in all caps with hyphens
2687	/// replaced with underscores (and in C/C++ have `_ENCODING` appended to the
2688	/// name). For example, if you know at compile time that you will want to
2689	/// decode using the UTF-8 encoding, use the `UTF_8` `static` (`UTF_8_ENCODING`
2690	/// in C/C++).
2691	///
2692	/// Additionally, there are non-reference-typed forms ending with `_INIT` to
2693	/// work around the problem that `static`s of the type `&'static Encoding`
2694	/// cannot be used to initialize items of an array whose type is
2695	/// `[&'static Encoding; N]`.
2696	///
2697	/// If you don't know what encoding you need at compile time and need to
2698	/// dynamically get an encoding by label, use
2699	/// <code>Encoding::<a href="#method.for_label">for_label</a>(<var>label</var>)</code>.
2700	///
2701	/// Instances of `Encoding` can be compared with `==` (in both Rust and in
2702	/// C/C++).
2703	pub struct Encoding {
2704	name: &'static str,
2705	variant: VariantEncoding,
2706	}
2707
2708	impl Encoding {
2709	/// Implements the
2710	/// [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
2711	/// algorithm.
2712	///
2713	/// If, after ASCII-lowercasing and removing leading and trailing
2714	/// whitespace, the argument matches a label defined in the Encoding
2715	/// Standard, `Some(&'static Encoding)` representing the corresponding
2716	/// encoding is returned. If there is no match, `None` is returned.
2717	///
2718	/// This is the right method to use if the action upon the method returning
2719	/// `None` is to use a fallback encoding (e.g. `WINDOWS_1252`) instead.
2720	/// When the action upon the method returning `None` is not to proceed with
2721	/// a fallback but to refuse processing, `for_label_no_replacement()` is more
2722	/// appropriate.
2723	///
2724	/// The argument is of type `&[u8]` instead of `&str` to save callers
2725	/// that are extracting the label from a non-UTF-8 protocol the trouble
2726	/// of conversion to UTF-8. (If you have a `&str`, just call `.as_bytes()`
2727	/// on it.)
2728	///
2729	/// Available via the C wrapper.
2730	///
2731	/// # Example
2732	/// ```
2733	/// use encoding_rs::Encoding;
2734	///
2735	/// assert_eq!(Some(encoding_rs::UTF_8), Encoding::for_label(b"utf-8"));
2736	/// assert_eq!(Some(encoding_rs::UTF_8), Encoding::for_label(b"unicode11utf8"));
2737	///
2738	/// assert_eq!(Some(encoding_rs::ISO_8859_2), Encoding::for_label(b"latin2"));
2739	///
2740	/// assert_eq!(Some(encoding_rs::UTF_16BE), Encoding::for_label(b"utf-16be"));
2741	///
2742	/// assert_eq!(None, Encoding::for_label(b"unrecognized label"));
2743	/// ```
2744	pub fn for_label(label: &[u8]) -> Option<&'static Encoding> {
2745	let mut trimmed = [`0u8`; LONGEST_LABEL_LENGTH];
2746	let mut trimmed_pos = `0usize`;
2747	let mut iter = label.into_iter();
2748	// before
2749	loop {
2750	match iter.next() {
2751	None => {
2752	return None;
2753	}
2754	Some(byte) => {
2755	// The characters used in labels are:
2756	// a-z (except q, but excluding it below seems excessive)
2757	// 0-9
2758	// . _ - :
2759	match *byte {
2760	`0x09u8` \| `0x0Au8` \| `0x0Cu8` \| `0x0Du8` \| `0x20u8` => {
2761	continue;
2762	}
2763	b'A'..=b'Z' => {
2764	trimmed[trimmed_pos] = *byte + `0x20u8`;
2765	trimmed_pos = `1usize`;
2766	break;
2767	}
2768	b'a'..=b'z' \| b'0'..=b'9' \| b'-' \| b'_' \| b':' \| b'.' => {
2769	trimmed[trimmed_pos] = *byte;
2770	trimmed_pos = `1usize`;
2771	break;
2772	}
2773	_ => {
2774	return None;
2775	}
2776	}
2777	}
2778	}
2779	}
2780	// inside
2781	loop {
2782	match iter.next() {
2783	None => {
2784	break;
2785	}
2786	Some(byte) => {
2787	match *byte {
2788	`0x09u8` \| `0x0Au8` \| `0x0Cu8` \| `0x0Du8` \| `0x20u8` => {
2789	break;
2790	}
2791	b'A'..=b'Z' => {
2792	if trimmed_pos == LONGEST_LABEL_LENGTH {
2793	// There's no encoding with a label this long
2794	return None;
2795	}
2796	trimmed[trimmed_pos] = *byte + `0x20u8`;
2797	trimmed_pos += `1usize`;
2798	continue;
2799	}
2800	b'a'..=b'z' \| b'0'..=b'9' \| b'-' \| b'_' \| b':' \| b'.' => {
2801	if trimmed_pos == LONGEST_LABEL_LENGTH {
2802	// There's no encoding with a label this long
2803	return None;
2804	}
2805	trimmed[trimmed_pos] = *byte;
2806	trimmed_pos += `1usize`;
2807	continue;
2808	}
2809	_ => {
2810	return None;
2811	}
2812	}
2813	}
2814	}
2815	}
2816	// after
2817	loop {
2818	match iter.next() {
2819	None => {
2820	break;
2821	}
2822	Some(byte) => {
2823	match *byte {
2824	`0x09u8` \| `0x0Au8` \| `0x0Cu8` \| `0x0Du8` \| `0x20u8` => {
2825	continue;
2826	}
2827	_ => {
2828	// There's no label with space in the middle
2829	return None;
2830	}
2831	}
2832	}
2833	}
2834	}
2835	let candidate = &trimmed[..trimmed_pos];
2836	match LABELS_SORTED.binary_search_by(\|probe\| {
2837	let bytes = probe.as_bytes();
2838	let c = bytes.len().cmp(&candidate.len());
2839	if c != Ordering::Equal {
2840	return c;
2841	}
2842	let probe_iter = bytes.iter().rev();
2843	let candidate_iter = candidate.iter().rev();
2844	probe_iter.cmp(candidate_iter)
2845	}) {
2846	Ok(i) => Some(ENCODINGS_IN_LABEL_SORT[i]),
2847	Err(_) => None,
2848	}
2849	}
2850
2851	/// This method behaves the same as `for_label()`, except when `for_label()`
2852	/// would return `Some(REPLACEMENT)`, this method returns `None` instead.
2853	///
2854	/// This method is useful in scenarios where a fatal error is required
2855	/// upon invalid label, because in those cases the caller typically wishes
2856	/// to treat the labels that map to the replacement encoding as fatal
2857	/// errors, too.
2858	///
2859	/// It is not OK to use this method when the action upon the method returning
2860	/// `None` is to use a fallback encoding (e.g. `WINDOWS_1252`). In such a
2861	/// case, the `for_label()` method should be used instead in order to avoid
2862	/// unsafe fallback for labels that `for_label()` maps to `Some(REPLACEMENT)`.
2863	///
2864	/// Available via the C wrapper.
2865	#[inline]
2866	pub fn for_label_no_replacement(label: &[u8]) -> Option<&'static Encoding> {
2867	match Encoding::for_label(label) {
2868	None => None,
2869	Some(encoding) => {
2870	if encoding == REPLACEMENT {
2871	None
2872	} else {
2873	Some(encoding)
2874	}
2875	}
2876	}
2877	}
2878
2879	/// Performs non-incremental BOM sniffing.
2880	///
2881	/// The argument must either be a buffer representing the entire input
2882	/// stream (non-streaming case) or a buffer representing at least the first
2883	/// three bytes of the input stream (streaming case).
2884	///
2885	/// Returns `Some((UTF_8, 3))`, `Some((UTF_16LE, 2))` or
2886	/// `Some((UTF_16BE, 2))` if the argument starts with the UTF-8, UTF-16LE
2887	/// or UTF-16BE BOM or `None` otherwise.
2888	///
2889	/// Available via the C wrapper.
2890	#[inline]
2891	pub fn for_bom(buffer: &[u8]) -> Option<(&'static Encoding, usize)> {
2892	if buffer.starts_with(b"`\xEF\xBB\xBF`") {
2893	Some((UTF_8, `3`))
2894	} else if buffer.starts_with(b"`\xFF\xFE`") {
2895	Some((UTF_16LE, `2`))
2896	} else if buffer.starts_with(b"`\xFE\xFF`") {
2897	Some((UTF_16BE, `2`))
2898	} else {
2899	None
2900	}
2901	}
2902
2903	/// Returns the name of this encoding.
2904	///
2905	/// This name is appropriate to return as-is from the DOM
2906	/// `document.characterSet` property.
2907	///
2908	/// Available via the C wrapper.
2909	#[inline]
2910	pub fn name(&'static self) -> &'static str {
2911	self.name
2912	}
2913
2914	/// Checks whether the _output encoding_ of this encoding can encode every
2915	/// `char`. (Only true if the output encoding is UTF-8.)
2916	///
2917	/// Available via the C wrapper.
2918	#[inline]
2919	pub fn can_encode_everything(&'static self) -> bool {
2920	self.output_encoding() == UTF_8
2921	}
2922
2923	/// Checks whether the bytes 0x00...0x7F map exclusively to the characters
2924	/// U+0000...U+007F and vice versa.
2925	///
2926	/// Available via the C wrapper.
2927	#[inline]
2928	pub fn is_ascii_compatible(&'static self) -> bool {
2929	!(self == REPLACEMENT \|\| self == UTF_16BE \|\| self == UTF_16LE \|\| self == ISO_2022_JP)
2930	}
2931
2932	/// Checks whether this encoding maps one byte to one Basic Multilingual
2933	/// Plane code point (i.e. byte length equals decoded UTF-16 length) and
2934	/// vice versa (for mappable characters).
2935	///
2936	/// `true` iff this encoding is on the list of [Legacy single-byte
2937	/// encodings](https://encoding.spec.whatwg.org/#legacy-single-byte-encodings)
2938	/// in the spec or x-user-defined.
2939	///
2940	/// Available via the C wrapper.
2941	#[inline]
2942	pub fn is_single_byte(&'static self) -> bool {
2943	self.variant.is_single_byte()
2944	}
2945
2946	/// Checks whether the bytes 0x00...0x7F map mostly to the characters
2947	/// U+0000...U+007F and vice versa.
2948	#[cfg(feature = "alloc")]
2949	#[inline]
2950	fn is_potentially_borrowable(&'static self) -> bool {
2951	!(self == REPLACEMENT \|\| self == UTF_16BE \|\| self == UTF_16LE)
2952	}
2953
2954	/// Returns the _output encoding_ of this encoding. This is UTF-8 for
2955	/// UTF-16BE, UTF-16LE, and replacement and the encoding itself otherwise.
2956	///
2957	/// _Note:_ The _output encoding_ concept is needed for form submission and
2958	/// error handling in the query strings of URLs in the Web Platform.
2959	///
2960	/// Available via the C wrapper.
2961	#[inline]
2962	pub fn output_encoding(&'static self) -> &'static Encoding {
2963	if self == REPLACEMENT \|\| self == UTF_16BE \|\| self == UTF_16LE {
2964	UTF_8
2965	} else {
2966	self
2967	}
2968	}
2969
2970	/// Decode complete input to `Cow<'a, str>` _with BOM sniffing_ and with
2971	/// malformed sequences replaced with the REPLACEMENT CHARACTER when the
2972	/// entire input is available as a single buffer (i.e. the end of the
2973	/// buffer marks the end of the stream).
2974	///
2975	/// The BOM, if any, does not appear in the output.
2976	///
2977	/// This method implements the (non-streaming version of) the
2978	/// [_decode_](https://encoding.spec.whatwg.org/#decode) spec concept.
2979	///
2980	/// The second item in the returned tuple is the encoding that was actually
2981	/// used (which may differ from this encoding thanks to BOM sniffing).
2982	///
2983	/// The third item in the returned tuple indicates whether there were
2984	/// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
2985	///
2986	/// _Note:_ It is wrong to use this when the input buffer represents only
2987	/// a segment of the input instead of the whole input. Use `new_decoder()`
2988	/// when decoding segmented input.
2989	///
2990	/// This method performs a one or two heap allocations for the backing
2991	/// buffer of the `String` when unable to borrow. (One allocation if not
2992	/// errors and potentially another one in the presence of errors.) The
2993	/// first allocation assumes jemalloc and may not be optimal with
2994	/// allocators that do not use power-of-two buckets. A borrow is performed
2995	/// if decoding UTF-8 and the input is valid UTF-8, if decoding an
2996	/// ASCII-compatible encoding and the input is ASCII-only, or when decoding
2997	/// ISO-2022-JP and the input is entirely in the ASCII state without state
2998	/// transitions.
2999	///
3000	/// # Panics
3001	///
3002	/// If the size calculation for a heap-allocated backing buffer overflows
3003	/// `usize`.
3004	///
3005	/// Available to Rust only and only with the `alloc` feature enabled (enabled
3006	/// by default).
3007	#[cfg(feature = "alloc")]
3008	#[inline]
3009	pub fn decode<'a>(&'static self, bytes: &'a [u8]) -> (Cow<'a, str>, &'static Encoding, bool) {
3010	let (encoding, without_bom) = match Encoding::for_bom(bytes) {
3011	Some((encoding, bom_length)) => (encoding, &bytes[bom_length..]),
3012	None => (self, bytes),
3013	};
3014	let (cow, had_errors) = encoding.decode_without_bom_handling(without_bom);
3015	(cow, encoding, had_errors)
3016	}
3017
3018	/// Decode complete input to `Cow<'a, str>` _with BOM removal_ and with
3019	/// malformed sequences replaced with the REPLACEMENT CHARACTER when the
3020	/// entire input is available as a single buffer (i.e. the end of the
3021	/// buffer marks the end of the stream).
3022	///
3023	/// Only an initial byte sequence that is a BOM for this encoding is removed.
3024	///
3025	/// When invoked on `UTF_8`, this method implements the (non-streaming
3026	/// version of) the
3027	/// [_UTF-8 decode_](https://encoding.spec.whatwg.org/#utf-8-decode) spec
3028	/// concept.
3029	///
3030	/// The second item in the returned pair indicates whether there were
3031	/// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
3032	///
3033	/// _Note:_ It is wrong to use this when the input buffer represents only
3034	/// a segment of the input instead of the whole input. Use
3035	/// `new_decoder_with_bom_removal()` when decoding segmented input.
3036	///
3037	/// This method performs a one or two heap allocations for the backing
3038	/// buffer of the `String` when unable to borrow. (One allocation if not
3039	/// errors and potentially another one in the presence of errors.) The
3040	/// first allocation assumes jemalloc and may not be optimal with
3041	/// allocators that do not use power-of-two buckets. A borrow is performed
3042	/// if decoding UTF-8 and the input is valid UTF-8, if decoding an
3043	/// ASCII-compatible encoding and the input is ASCII-only, or when decoding
3044	/// ISO-2022-JP and the input is entirely in the ASCII state without state
3045	/// transitions.
3046	///
3047	/// # Panics
3048	///
3049	/// If the size calculation for a heap-allocated backing buffer overflows
3050	/// `usize`.
3051	///
3052	/// Available to Rust only and only with the `alloc` feature enabled (enabled
3053	/// by default).
3054	#[cfg(feature = "alloc")]
3055	#[inline]
3056	pub fn decode_with_bom_removal<'a>(&'static self, bytes: &'a [u8]) -> (Cow<'a, str>, bool) {
3057	let without_bom = if self == UTF_8 && bytes.starts_with(b"`\xEF\xBB\xBF`") {
3058	&bytes[`3`..]
3059	} else if (self == UTF_16LE && bytes.starts_with(b"`\xFF\xFE`"))
3060	\|\| (self == UTF_16BE && bytes.starts_with(b"`\xFE\xFF`"))
3061	{
3062	&bytes[`2`..]
3063	} else {
3064	bytes
3065	};
3066	self.decode_without_bom_handling(without_bom)
3067	}
3068
3069	/// Decode complete input to `Cow<'a, str>` _without BOM handling_ and
3070	/// with malformed sequences replaced with the REPLACEMENT CHARACTER when
3071	/// the entire input is available as a single buffer (i.e. the end of the
3072	/// buffer marks the end of the stream).
3073	///
3074	/// When invoked on `UTF_8`, this method implements the (non-streaming
3075	/// version of) the
3076	/// [_UTF-8 decode without BOM_](https://encoding.spec.whatwg.org/#utf-8-decode-without-bom)
3077	/// spec concept.
3078	///
3079	/// The second item in the returned pair indicates whether there were
3080	/// malformed sequences (that were replaced with the REPLACEMENT CHARACTER).
3081	///
3082	/// _Note:_ It is wrong to use this when the input buffer represents only
3083	/// a segment of the input instead of the whole input. Use
3084	/// `new_decoder_without_bom_handling()` when decoding segmented input.
3085	///
3086	/// This method performs a one or two heap allocations for the backing
3087	/// buffer of the `String` when unable to borrow. (One allocation if not
3088	/// errors and potentially another one in the presence of errors.) The
3089	/// first allocation assumes jemalloc and may not be optimal with
3090	/// allocators that do not use power-of-two buckets. A borrow is performed
3091	/// if decoding UTF-8 and the input is valid UTF-8, if decoding an
3092	/// ASCII-compatible encoding and the input is ASCII-only, or when decoding
3093	/// ISO-2022-JP and the input is entirely in the ASCII state without state
3094	/// transitions.
3095	///
3096	/// # Panics
3097	///
3098	/// If the size calculation for a heap-allocated backing buffer overflows
3099	/// `usize`.
3100	///
3101	/// Available to Rust only and only with the `alloc` feature enabled (enabled
3102	/// by default).
3103	#[cfg(feature = "alloc")]
3104	pub fn decode_without_bom_handling<'a>(&'static self, bytes: &'a [u8]) -> (Cow<'a, str>, bool) {
3105	let (mut decoder, mut string, mut total_read) = if self.is_potentially_borrowable() {
3106	let valid_up_to = if self == UTF_8 {
3107	utf8_valid_up_to(bytes)
3108	} else if self == ISO_2022_JP {
3109	iso_2022_jp_ascii_valid_up_to(bytes)
3110	} else {
3111	ascii_valid_up_to(bytes)
3112	};
3113	if valid_up_to == bytes.len() {
3114	let str: &str = unsafe { core::str::from_utf8_unchecked(bytes) };
3115	return (Cow::Borrowed(str), `false`);
3116	}
3117	let decoder = self.new_decoder_without_bom_handling();
3118
3119	let rounded_without_replacement = checked_next_power_of_two(checked_add(
3120	valid_up_to,
3121	decoder.max_utf8_buffer_length_without_replacement(bytes.len() - valid_up_to),
3122	));
3123	let with_replacement = checked_add(
3124	valid_up_to,
3125	decoder.max_utf8_buffer_length(bytes.len() - valid_up_to),
3126	);
3127	let mut string = String::with_capacity(
3128	checked_min(rounded_without_replacement, with_replacement).unwrap(),
3129	);
3130	unsafe {
3131	let vec = string.as_mut_vec();
3132	vec.set_len(valid_up_to);
3133	core::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to);
3134	}
3135	(decoder, string, valid_up_to)
3136	} else {
3137	let decoder = self.new_decoder_without_bom_handling();
3138	let rounded_without_replacement = checked_next_power_of_two(
3139	decoder.max_utf8_buffer_length_without_replacement(bytes.len()),
3140	);
3141	let with_replacement = decoder.max_utf8_buffer_length(bytes.len());
3142	let string = String::with_capacity(
3143	checked_min(rounded_without_replacement, with_replacement).unwrap(),
3144	);
3145	(decoder, string, `0`)
3146	};
3147
3148	let mut total_had_errors = `false`;
3149	loop {
3150	let (result, read, had_errors) =
3151	decoder.decode_to_string(&bytes[total_read..], &mut string, `true`);
3152	total_read += read;
3153	total_had_errors \|= had_errors;
3154	match result {
3155	CoderResult::InputEmpty => {
3156	debug_assert_eq!(total_read, bytes.len());
3157	return (Cow::Owned(string), total_had_errors);
3158	}
3159	CoderResult::OutputFull => {
3160	// Allocate for the worst case. That is, we should come
3161	// here at most once per invocation of this method.
3162	let needed = decoder.max_utf8_buffer_length(bytes.len() - total_read);
3163	string.reserve(needed.unwrap());
3164	}
3165	}
3166	}
3167	}
3168
3169	/// Decode complete input to `Cow<'a, str>` _without BOM handling_ and
3170	/// _with malformed sequences treated as fatal_ when the entire input is
3171	/// available as a single buffer (i.e. the end of the buffer marks the end
3172	/// of the stream).
3173	///
3174	/// When invoked on `UTF_8`, this method implements the (non-streaming
3175	/// version of) the
3176	/// [_UTF-8 decode without BOM or fail_](https://encoding.spec.whatwg.org/#utf-8-decode-without-bom-or-fail)
3177	/// spec concept.
3178	///
3179	/// Returns `None` if a malformed sequence was encountered and the result
3180	/// of the decode as `Some(String)` otherwise.
3181	///
3182	/// _Note:_ It is wrong to use this when the input buffer represents only
3183	/// a segment of the input instead of the whole input. Use
3184	/// `new_decoder_without_bom_handling()` when decoding segmented input.
3185	///
3186	/// This method performs a single heap allocation for the backing
3187	/// buffer of the `String` when unable to borrow. A borrow is performed if
3188	/// decoding UTF-8 and the input is valid UTF-8, if decoding an
3189	/// ASCII-compatible encoding and the input is ASCII-only, or when decoding
3190	/// ISO-2022-JP and the input is entirely in the ASCII state without state
3191	/// transitions.
3192	///
3193	/// # Panics
3194	///
3195	/// If the size calculation for a heap-allocated backing buffer overflows
3196	/// `usize`.
3197	///
3198	/// Available to Rust only and only with the `alloc` feature enabled (enabled
3199	/// by default).
3200	#[cfg(feature = "alloc")]
3201	pub fn decode_without_bom_handling_and_without_replacement<'a>(
3202	&'static self,
3203	bytes: &'a [u8],
3204	) -> Option<Cow<'a, str>> {
3205	if self == UTF_8 {
3206	let valid_up_to = utf8_valid_up_to(bytes);
3207	if valid_up_to == bytes.len() {
3208	let str: &str = unsafe { core::str::from_utf8_unchecked(bytes) };
3209	return Some(Cow::Borrowed(str));
3210	}
3211	return None;
3212	}
3213	let (mut decoder, mut string, input) = if self.is_potentially_borrowable() {
3214	let valid_up_to = if self == ISO_2022_JP {
3215	iso_2022_jp_ascii_valid_up_to(bytes)
3216	} else {
3217	ascii_valid_up_to(bytes)
3218	};
3219	if valid_up_to == bytes.len() {
3220	let str: &str = unsafe { core::str::from_utf8_unchecked(bytes) };
3221	return Some(Cow::Borrowed(str));
3222	}
3223	let decoder = self.new_decoder_without_bom_handling();
3224	let mut string = String::with_capacity(
3225	checked_add(
3226	valid_up_to,
3227	decoder.max_utf8_buffer_length_without_replacement(bytes.len() - valid_up_to),
3228	)
3229	.unwrap(),
3230	);
3231	unsafe {
3232	let vec = string.as_mut_vec();
3233	vec.set_len(valid_up_to);
3234	core::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to);
3235	}
3236	(decoder, string, &bytes[valid_up_to..])
3237	} else {
3238	let decoder = self.new_decoder_without_bom_handling();
3239	let string = String::with_capacity(
3240	decoder
3241	.max_utf8_buffer_length_without_replacement(bytes.len())
3242	.unwrap(),
3243	);
3244	(decoder, string, bytes)
3245	};
3246	let (result, read) = decoder.decode_to_string_without_replacement(input, &mut string, `true`);
3247	match result {
3248	DecoderResult::InputEmpty => {
3249	debug_assert_eq!(read, input.len());
3250	Some(Cow::Owned(string))
3251	}
3252	DecoderResult::Malformed(_, _) => None,
3253	DecoderResult::OutputFull => unreachable!(),
3254	}
3255	}
3256
3257	/// Encode complete input to `Cow<'a, [u8]>` using the
3258	/// [_output encoding_](Encoding::output_encoding) of this encoding with
3259	/// unmappable characters replaced with decimal numeric character references
3260	/// when the entire input is available as a single buffer (i.e. the end of
3261	/// the buffer marks the end of the stream).
3262	///
3263	/// This method implements the (non-streaming version of) the
3264	/// [_encode_](https://encoding.spec.whatwg.org/#encode) spec concept. For
3265	/// the [_UTF-8 encode_](https://encoding.spec.whatwg.org/#utf-8-encode)
3266	/// spec concept, it is slightly more efficient to use
3267	/// <code><var>string</var>.as_bytes()</code> instead of invoking this
3268	/// method on `UTF_8`.
3269	///
3270	/// The second item in the returned tuple is the encoding that was actually
3271	/// used (which may differ from this encoding thanks to some encodings*
3272	/// having UTF-8 as their output encoding).*
3273	///
3274	/// The third item in the returned tuple indicates whether there were
3275	/// unmappable characters (that were replaced with HTML numeric character
3276	/// references).
3277	///
3278	/// _Note:_ It is wrong to use this when the input buffer represents only
3279	/// a segment of the input instead of the whole input. Use `new_encoder()`
3280	/// when encoding segmented output.
3281	///
3282	/// When encoding to UTF-8 or when encoding an ASCII-only input to a
3283	/// ASCII-compatible encoding, this method returns a borrow of the input
3284	/// without a heap allocation. Otherwise, this method performs a single
3285	/// heap allocation for the backing buffer of the `Vec<u8>` if there are no
3286	/// unmappable characters and potentially multiple heap allocations if
3287	/// there are. These allocations are tuned for jemalloc and may not be
3288	/// optimal when using a different allocator that doesn't use power-of-two
3289	/// buckets.
3290	///
3291	/// # Panics
3292	///
3293	/// If the size calculation for a heap-allocated backing buffer overflows
3294	/// `usize`.
3295	///
3296	/// Available to Rust only and only with the `alloc` feature enabled (enabled
3297	/// by default).
3298	#[cfg(feature = "alloc")]
3299	pub fn encode<'a>(&'static self, string: &'a str) -> (Cow<'a, [u8]>, &'static Encoding, bool) {
3300	let output_encoding = self.output_encoding();
3301	if output_encoding == UTF_8 {
3302	return (Cow::Borrowed(string.as_bytes()), output_encoding, `false`);
3303	}
3304	debug_assert!(output_encoding.is_potentially_borrowable());
3305	let bytes = string.as_bytes();
3306	let valid_up_to = if output_encoding == ISO_2022_JP {
3307	iso_2022_jp_ascii_valid_up_to(bytes)
3308	} else {
3309	ascii_valid_up_to(bytes)
3310	};
3311	if valid_up_to == bytes.len() {
3312	return (Cow::Borrowed(bytes), output_encoding, `false`);
3313	}
3314	let mut encoder = output_encoding.new_encoder();
3315	let mut vec: Vec<u8> = Vec::with_capacity(
3316	(checked_add(
3317	valid_up_to,
3318	encoder.max_buffer_length_from_utf8_if_no_unmappables(string.len() - valid_up_to),
3319	))
3320	.unwrap()
3321	.next_power_of_two(),
3322	);
3323	unsafe {
3324	vec.set_len(valid_up_to);
3325	core::ptr::copy_nonoverlapping(bytes.as_ptr(), vec.as_mut_ptr(), valid_up_to);
3326	}
3327	let mut total_read = valid_up_to;
3328	let mut total_had_errors = `false`;
3329	loop {
3330	let (result, read, had_errors) =
3331	encoder.encode_from_utf8_to_vec(&string[total_read..], &mut vec, `true`);
3332	total_read += read;
3333	total_had_errors \|= had_errors;
3334	match result {
3335	CoderResult::InputEmpty => {
3336	debug_assert_eq!(total_read, string.len());
3337	return (Cow::Owned(vec), output_encoding, total_had_errors);
3338	}
3339	CoderResult::OutputFull => {
3340	// reserve_exact wants to know how much more on top of current
3341	// length--not current capacity.
3342	let needed = encoder
3343	.max_buffer_length_from_utf8_if_no_unmappables(string.len() - total_read);
3344	let rounded = (checked_add(vec.capacity(), needed))
3345	.unwrap()
3346	.next_power_of_two();
3347	let additional = rounded - vec.len();
3348	vec.reserve_exact(additional);
3349	}
3350	}
3351	}
3352	}
3353
3354	fn new_variant_decoder(&'static self) -> VariantDecoder {
3355	self.variant.new_variant_decoder()
3356	}
3357
3358	/// Instantiates a new decoder for this encoding with BOM sniffing enabled.
3359	///
3360	/// BOM sniffing may cause the returned decoder to morph into a decoder
3361	/// for UTF-8, UTF-16LE or UTF-16BE instead of this encoding. The BOM
3362	/// does not appear in the output.
3363	///
3364	/// Available via the C wrapper.
3365	#[inline]
3366	pub fn new_decoder(&'static self) -> Decoder {
3367	Decoder::new(self, self.new_variant_decoder(), BomHandling::Sniff)
3368	}
3369
3370	/// Instantiates a new decoder for this encoding with BOM removal.
3371	///
3372	/// If the input starts with bytes that are the BOM for this encoding,
3373	/// those bytes are removed. However, the decoder never morphs into a
3374	/// decoder for another encoding: A BOM for another encoding is treated as
3375	/// (potentially malformed) input to the decoding algorithm for this
3376	/// encoding.
3377	///
3378	/// Available via the C wrapper.
3379	#[inline]
3380	pub fn new_decoder_with_bom_removal(&'static self) -> Decoder {
3381	Decoder::new(self, self.new_variant_decoder(), BomHandling::Remove)
3382	}
3383
3384	/// Instantiates a new decoder for this encoding with BOM handling disabled.
3385	///
3386	/// If the input starts with bytes that look like a BOM, those bytes are
3387	/// not treated as a BOM. (Hence, the decoder never morphs into a decoder
3388	/// for another encoding.)
3389	///
3390	/// _Note:_ If the caller has performed BOM sniffing on its own but has not
3391	/// removed the BOM, the caller should use `new_decoder_with_bom_removal()`
3392	/// instead of this method to cause the BOM to be removed.
3393	///
3394	/// Available via the C wrapper.
3395	#[inline]
3396	pub fn new_decoder_without_bom_handling(&'static self) -> Decoder {
3397	Decoder::new(self, self.new_variant_decoder(), BomHandling::Off)
3398	}
3399
3400	/// Instantiates a new encoder for the [_output encoding_](Encoding::output_encoding)
3401	/// of this encoding.
3402	///
3403	/// _Note:_ The output encoding of UTF-16BE, UTF-16LE, and replacement is UTF-8. There
3404	/// is no encoder for UTF-16BE, UTF-16LE, and replacement themselves.
3405	///
3406	/// Available via the C wrapper.
3407	#[inline]
3408	pub fn new_encoder(&'static self) -> Encoder {
3409	let enc = self.output_encoding();
3410	enc.variant.new_encoder(enc)
3411	}
3412
3413	/// Validates UTF-8.
3414	///
3415	/// Returns the index of the first byte that makes the input malformed as
3416	/// UTF-8 or the length of the slice if the slice is entirely valid.
3417	///
3418	/// This is currently faster than the corresponding standard library
3419	/// functionality. If this implementation gets upstreamed to the standard
3420	/// library, this method may be removed in the future.
3421	///
3422	/// Available via the C wrapper.
3423	pub fn utf8_valid_up_to(bytes: &[u8]) -> usize {
3424	utf8_valid_up_to(bytes)
3425	}
3426
3427	/// Validates ASCII.
3428	///
3429	/// Returns the index of the first byte that makes the input malformed as
3430	/// ASCII or the length of the slice if the slice is entirely valid.
3431	///
3432	/// Available via the C wrapper.
3433	pub fn ascii_valid_up_to(bytes: &[u8]) -> usize {
3434	ascii_valid_up_to(bytes)
3435	}
3436
3437	/// Validates ISO-2022-JP ASCII-state data.
3438	///
3439	/// Returns the index of the first byte that makes the input not
3440	/// representable in the ASCII state of ISO-2022-JP or the length of the
3441	/// slice if the slice is entirely representable in the ASCII state of
3442	/// ISO-2022-JP.
3443	///
3444	/// Available via the C wrapper.
3445	pub fn iso_2022_jp_ascii_valid_up_to(bytes: &[u8]) -> usize {
3446	iso_2022_jp_ascii_valid_up_to(bytes)
3447	}
3448	}
3449
3450	impl PartialEq for Encoding {
3451	#[inline]
3452	fn eq(&self, other: &Encoding) -> bool {
3453	(self as *const Encoding) == (other as *const Encoding)
3454	}
3455	}
3456
3457	impl Eq for Encoding {}
3458
3459	#[cfg(test)]
3460	impl PartialOrd for Encoding {
3461	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
3462	(self as *const Encoding as usize).partial_cmp(&(other as *const Encoding as usize))
3463	}
3464	}
3465
3466	#[cfg(test)]
3467	impl Ord for Encoding {
3468	fn cmp(&self, other: &Self) -> Ordering {
3469	(self as *const Encoding as usize).cmp(&(other as *const Encoding as usize))
3470	}
3471	}
3472
3473	impl Hash for Encoding {
3474	#[inline]
3475	fn hash<H: Hasher>(&self, state: &mut H) {
3476	(self as *const Encoding).hash(state);
3477	}
3478	}
3479
3480	impl core::fmt::Debug for Encoding {
3481	#[inline]
3482	fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
3483	write!(f, "Encoding `{{` {} `}}`", self.name)
3484	}
3485	}
3486
3487	#[cfg(feature = "serde")]
3488	impl Serialize for Encoding {
3489	#[inline]
3490	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
3491	where
3492	S: Serializer,
3493	{
3494	serializer.serialize_str(self.name)
3495	}
3496	}
3497
3498	#[cfg(feature = "serde")]
3499	struct EncodingVisitor;
3500
3501	#[cfg(feature = "serde")]
3502	impl<'de> Visitor<'de> for EncodingVisitor {
3503	type Value = &'static Encoding;
3504
3505	fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
3506	formatter.write_str("a valid encoding label")
3507	}
3508
3509	fn visit_str<E>(self, value: &str) -> Result<&'static Encoding, E>
3510	where
3511	E: serde::de::Error,
3512	{
3513	if let Some(enc) = Encoding::for_label(value.as_bytes()) {
3514	Ok(enc)
3515	} else {
3516	Err(E::custom(alloc::format!(
3517	"invalid encoding label: {}",
3518	value
3519	)))
3520	}
3521	}
3522	}
3523
3524	#[cfg(feature = "serde")]
3525	impl<'de> Deserialize<'de> for &'static Encoding {
3526	fn deserialize<D>(deserializer: D) -> Result<&'static Encoding, D::Error>
3527	where
3528	D: Deserializer<'de>,
3529	{
3530	deserializer.deserialize_str(EncodingVisitor)
3531	}
3532	}
3533
3534	/// Tracks the life cycle of a decoder from BOM sniffing to conversion to end.
3535	#[derive(PartialEq, Debug, Copy, Clone)]
3536	enum DecoderLifeCycle {
3537	/// The decoder has seen no input yet.
3538	AtStart,
3539	/// The decoder has seen no input yet but expects UTF-8.
3540	AtUtf8Start,
3541	/// The decoder has seen no input yet but expects UTF-16BE.
3542	AtUtf16BeStart,
3543	/// The decoder has seen no input yet but expects UTF-16LE.
3544	AtUtf16LeStart,
3545	/// The decoder has seen EF.
3546	SeenUtf8First,
3547	/// The decoder has seen EF, BB.
3548	SeenUtf8Second,
3549	/// The decoder has seen FE.
3550	SeenUtf16BeFirst,
3551	/// The decoder has seen FF.
3552	SeenUtf16LeFirst,
3553	/// Saw EF, BB but not BF, there was a buffer boundary after BB and the
3554	/// underlying decoder reported EF as an error, so we need to remember to
3555	/// push BB before the next buffer.
3556	ConvertingWithPendingBB,
3557	/// No longer looking for a BOM and EOF not yet seen.
3558	Converting,
3559	/// EOF has been seen.
3560	Finished,
3561	}
3562
3563	/// Communicate the BOM handling mode.
3564	#[derive(Debug, Copy, Clone)]
3565	enum BomHandling {
3566	/// Don't handle the BOM
3567	Off,
3568	/// Sniff for UTF-8, UTF-16BE or UTF-16LE BOM
3569	Sniff,
3570	/// Remove the BOM only if it's the BOM for this encoding
3571	Remove,
3572	}
3573
3574	/// Result of a (potentially partial) decode or encode operation with
3575	/// replacement.
3576	#[must_use]
3577	#[derive(Debug, PartialEq, Eq)]
3578	pub enum CoderResult {
3579	/// The input was exhausted.
3580	///
3581	/// If this result was returned from a call where `last` was `true`, the
3582	/// conversion process has completed. Otherwise, the caller should call a
3583	/// decode or encode method again with more input.
3584	InputEmpty,
3585
3586	/// The converter cannot produce another unit of output, because the output
3587	/// buffer does not have enough space left.
3588	///
3589	/// The caller must provide more output space upon the next call and re-push
3590	/// the remaining input to the converter.
3591	OutputFull,
3592	}
3593
3594	/// Result of a (potentially partial) decode operation without replacement.
3595	#[must_use]
3596	#[derive(Debug, PartialEq, Eq)]
3597	pub enum DecoderResult {
3598	/// The input was exhausted.
3599	///
3600	/// If this result was returned from a call where `last` was `true`, the
3601	/// decoding process has completed. Otherwise, the caller should call a
3602	/// decode method again with more input.
3603	InputEmpty,
3604
3605	/// The decoder cannot produce another unit of output, because the output
3606	/// buffer does not have enough space left.
3607	///
3608	/// The caller must provide more output space upon the next call and re-push
3609	/// the remaining input to the decoder.
3610	OutputFull,
3611
3612	/// The decoder encountered a malformed byte sequence.
3613	///
3614	/// The caller must either treat this as a fatal error or must append one
3615	/// REPLACEMENT CHARACTER (U+FFFD) to the output and then re-push the
3616	/// the remaining input to the decoder.
3617	///
3618	/// The first wrapped integer indicates the length of the malformed byte
3619	/// sequence. The second wrapped integer indicates the number of bytes
3620	/// that were consumed after the malformed sequence. If the second
3621	/// integer is zero, the last byte that was consumed is the last byte of
3622	/// the malformed sequence. Note that the malformed bytes may have been part
3623	/// of an earlier input buffer.
3624	///
3625	/// The first wrapped integer can have values 1, 2, 3 or 4. The second
3626	/// wrapped integer can have values 0, 1, 2 or 3. The worst-case sum
3627	/// of the two is 6, which happens with ISO-2022-JP.
3628	Malformed(u8, u8), // u8 instead of usize to avoid useless bloat
3629	}
3630
3631	/// A converter that decodes a byte stream into Unicode according to a
3632	/// character encoding in a streaming (incremental) manner.
3633	///
3634	/// The various `decode_` methods take an input buffer (`src`) and an output*
3635	/// buffer `dst` both of which are caller-allocated. There are variants for
3636	/// both UTF-8 and UTF-16 output buffers.
3637	///
3638	/// A `decode_` method decodes bytes from `src` into Unicode characters stored*
3639	/// into `dst` until one of the following three things happens:
3640	///
3641	/// 1. A malformed byte sequence is encountered (`_without_replacement`*
3642	/// variants only).
3643	///
3644	/// 2. The output buffer has been filled so near capacity that the decoder
3645	/// cannot be sure that processing an additional byte of input wouldn't
3646	/// cause so much output that the output buffer would overflow.
3647	///
3648	/// 3. All the input bytes have been processed.
3649	///
3650	/// The `decode_` method then returns tuple of a status indicating which one*
3651	/// of the three reasons to return happened, how many input bytes were read,
3652	/// how many output code units (`u8` when decoding into UTF-8 and `u16`
3653	/// when decoding to UTF-16) were written (except when decoding into `String`,
3654	/// whose length change indicates this), and in the case of the
3655	/// variants performing replacement, a boolean indicating whether an error was
3656	/// replaced with the REPLACEMENT CHARACTER during the call.
3657	///
3658	/// The number of bytes "written" is what's logically written. Garbage may be
3659	/// written in the output buffer beyond the point logically written to.
3660	/// Therefore, if you wish to decode into an `&mut str`, you should use the
3661	/// methods that take an `&mut str` argument instead of the ones that take an
3662	/// `&mut [u8]` argument. The former take care of overwriting the trailing
3663	/// garbage to ensure the UTF-8 validity of the `&mut str` as a whole, but the
3664	/// latter don't.
3665	///
3666	/// In the case of the `_without_replacement` variants, the status is a*
3667	/// [`DecoderResult`][1] enumeration (possibilities `Malformed`, `OutputFull` and
3668	/// `InputEmpty` corresponding to the three cases listed above).
3669	///
3670	/// In the case of methods whose name does not end with
3671	/// `_without_replacement`, malformed sequences are automatically replaced*
3672	/// with the REPLACEMENT CHARACTER and errors do not cause the methods to
3673	/// return early.
3674	///
3675	/// When decoding to UTF-8, the output buffer must have at least 4 bytes of
3676	/// space. When decoding to UTF-16, the output buffer must have at least two
3677	/// UTF-16 code units (`u16`) of space.
3678	///
3679	/// When decoding to UTF-8 without replacement, the methods are guaranteed
3680	/// not to return indicating that more output space is needed if the length
3681	/// of the output buffer is at least the length returned by
3682	/// [`max_utf8_buffer_length_without_replacement()`][2]. When decoding to UTF-8
3683	/// with replacement, the length of the output buffer that guarantees the
3684	/// methods not to return indicating that more output space is needed is given
3685	/// by [`max_utf8_buffer_length()`][3]. When decoding to UTF-16 with
3686	/// or without replacement, the length of the output buffer that guarantees
3687	/// the methods not to return indicating that more output space is needed is
3688	/// given by [`max_utf16_buffer_length()`][4].
3689	///
3690	/// The output written into `dst` is guaranteed to be valid UTF-8 or UTF-16,
3691	/// and the output after each `decode_` call is guaranteed to consist of*
3692	/// complete characters. (I.e. the code unit sequence for the last character is
3693	/// guaranteed not to be split across output buffers.)
3694	///
3695	/// The boolean argument `last` indicates that the end of the stream is reached
3696	/// when all the bytes in `src` have been consumed.
3697	///
3698	/// A `Decoder` object can be used to incrementally decode a byte stream.
3699	///
3700	/// During the processing of a single stream, the caller must call `decode_`*
3701	/// zero or more times with `last` set to `false` and then call `decode_` at*
3702	/// least once with `last` set to `true`. If `decode_` returns `InputEmpty`,*
3703	/// the processing of the stream has ended. Otherwise, the caller must call
3704	/// `decode_` again with `last` set to `true` (or treat a `Malformed` result as*
3705	/// a fatal error).
3706	///
3707	/// Once the stream has ended, the `Decoder` object must not be used anymore.
3708	/// That is, you need to create another one to process another stream.
3709	///
3710	/// When the decoder returns `OutputFull` or the decoder returns `Malformed` and
3711	/// the caller does not wish to treat it as a fatal error, the input buffer
3712	/// `src` may not have been completely consumed. In that case, the caller must
3713	/// pass the unconsumed contents of `src` to `decode_` again upon the next*
3714	/// call.
3715	///
3716	/// [1]: enum.DecoderResult.html
3717	/// [2]: #method.max_utf8_buffer_length_without_replacement
3718	/// [3]: #method.max_utf8_buffer_length
3719	/// [4]: #method.max_utf16_buffer_length
3720	///
3721	/// # Infinite loops
3722	///
3723	/// When converting with a fixed-size output buffer whose size is too small to
3724	/// accommodate one character or (when applicable) one numeric character
3725	/// reference of output, an infinite loop ensues. When converting with a
3726	/// fixed-size output buffer, it generally makes sense to make the buffer
3727	/// fairly large (e.g. couple of kilobytes).
3728	pub struct Decoder {
3729	encoding: &'static Encoding,
3730	variant: VariantDecoder,
3731	life_cycle: DecoderLifeCycle,
3732	}
3733
3734	impl Decoder {
3735	fn new(enc: &'static Encoding, decoder: VariantDecoder, sniffing: BomHandling) -> Decoder {
3736	Decoder {
3737	encoding: enc,
3738	variant: decoder,
3739	life_cycle: match sniffing {
3740	BomHandling::Off => DecoderLifeCycle::Converting,
3741	BomHandling::Sniff => DecoderLifeCycle::AtStart,
3742	BomHandling::Remove => {
3743	if enc == UTF_8 {
3744	DecoderLifeCycle::AtUtf8Start
3745	} else if enc == UTF_16BE {
3746	DecoderLifeCycle::AtUtf16BeStart
3747	} else if enc == UTF_16LE {
3748	DecoderLifeCycle::AtUtf16LeStart
3749	} else {
3750	DecoderLifeCycle::Converting
3751	}
3752	}
3753	},
3754	}
3755	}
3756
3757	/// The `Encoding` this `Decoder` is for.
3758	///
3759	/// BOM sniffing can change the return value of this method during the life
3760	/// of the decoder.
3761	///
3762	/// Available via the C wrapper.
3763	#[inline]
3764	pub fn encoding(&self) -> &'static Encoding {
3765	self.encoding
3766	}
3767
3768	/// Query the worst-case UTF-8 output size _with replacement_.
3769	///
3770	/// Returns the size of the output buffer in UTF-8 code units (`u8`)
3771	/// that will not overflow given the current state of the decoder and
3772	/// `byte_length` number of additional input bytes when decoding with
3773	/// errors handled by outputting a REPLACEMENT CHARACTER for each malformed
3774	/// sequence or `None` if `usize` would overflow.
3775	///
3776	/// Available via the C wrapper.
3777	pub fn max_utf8_buffer_length(&self, byte_length: usize) -> Option<usize> {
3778	// Need to consider a) the decoder morphing due to the BOM and b) a partial
3779	// BOM getting pushed to the underlying decoder.
3780	match self.life_cycle {
3781	DecoderLifeCycle::Converting
3782	\| DecoderLifeCycle::AtUtf8Start
3783	\| DecoderLifeCycle::AtUtf16LeStart
3784	\| DecoderLifeCycle::AtUtf16BeStart => {
3785	return self.variant.max_utf8_buffer_length(byte_length);
3786	}
3787	DecoderLifeCycle::AtStart => {
3788	if let Some(utf8_bom) = checked_add(`3`, byte_length.checked_mul(`3`)) {
3789	if let Some(utf16_bom) = checked_add(
3790	`1`,
3791	checked_mul(`3`, checked_div(byte_length.checked_add(`1`), `2`)),
3792	) {
3793	let utf_bom = core::cmp::max(utf8_bom, utf16_bom);
3794	let encoding = self.encoding();
3795	if encoding == UTF_8 \|\| encoding == UTF_16LE \|\| encoding == UTF_16BE {
3796	// No need to consider the internal state of the underlying decoder,
3797	// because it is at start, because no data has reached it yet.
3798	return Some(utf_bom);
3799	} else if let Some(non_bom) =
3800	self.variant.max_utf8_buffer_length(byte_length)
3801	{
3802	return Some(core::cmp::max(utf_bom, non_bom));
3803	}
3804	}
3805	}
3806	}
3807	DecoderLifeCycle::SeenUtf8First \| DecoderLifeCycle::SeenUtf8Second => {
3808	// Add two bytes even when only one byte has been seen,
3809	// because the one byte can become a lead byte in multibyte
3810	// decoders, but only after the decoder has been queried
3811	// for max length, so the decoder's own logic for adding
3812	// one for a pending lead cannot work.
3813	if let Some(sum) = byte_length.checked_add(`2`) {
3814	if let Some(utf8_bom) = checked_add(`3`, sum.checked_mul(`3`)) {
3815	if self.encoding() == UTF_8 {
3816	// No need to consider the internal state of the underlying decoder,
3817	// because it is at start, because no data has reached it yet.
3818	return Some(utf8_bom);
3819	} else if let Some(non_bom) = self.variant.max_utf8_buffer_length(sum) {
3820	return Some(core::cmp::max(utf8_bom, non_bom));
3821	}
3822	}
3823	}
3824	}
3825	DecoderLifeCycle::ConvertingWithPendingBB => {
3826	if let Some(sum) = byte_length.checked_add(`2`) {
3827	return self.variant.max_utf8_buffer_length(sum);
3828	}
3829	}
3830	DecoderLifeCycle::SeenUtf16LeFirst \| DecoderLifeCycle::SeenUtf16BeFirst => {
3831	// Add two bytes even when only one byte has been seen,
3832	// because the one byte can become a lead byte in multibyte
3833	// decoders, but only after the decoder has been queried
3834	// for max length, so the decoder's own logic for adding
3835	// one for a pending lead cannot work.
3836	if let Some(sum) = byte_length.checked_add(`2`) {
3837	if let Some(utf16_bom) =
3838	checked_add(`1`, checked_mul(`3`, checked_div(sum.checked_add(`1`), `2`)))
3839	{
3840	let encoding = self.encoding();
3841	if encoding == UTF_16LE \|\| encoding == UTF_16BE {
3842	// No need to consider the internal state of the underlying decoder,
3843	// because it is at start, because no data has reached it yet.
3844	return Some(utf16_bom);
3845	} else if let Some(non_bom) = self.variant.max_utf8_buffer_length(sum) {
3846	return Some(core::cmp::max(utf16_bom, non_bom));
3847	}
3848	}
3849	}
3850	}
3851	DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),
3852	}
3853	None
3854	}
3855
3856	/// Query the worst-case UTF-8 output size _without replacement_.
3857	///
3858	/// Returns the size of the output buffer in UTF-8 code units (`u8`)
3859	/// that will not overflow given the current state of the decoder and
3860	/// `byte_length` number of additional input bytes when decoding without
3861	/// replacement error handling or `None` if `usize` would overflow.
3862	///
3863	/// Note that this value may be too small for the `_with_replacement` case.
3864	/// Use `max_utf8_buffer_length()` for that case.
3865	///
3866	/// Available via the C wrapper.
3867	pub fn max_utf8_buffer_length_without_replacement(&self, byte_length: usize) -> Option<usize> {
3868	// Need to consider a) the decoder morphing due to the BOM and b) a partial
3869	// BOM getting pushed to the underlying decoder.
3870	match self.life_cycle {
3871	DecoderLifeCycle::Converting
3872	\| DecoderLifeCycle::AtUtf8Start
3873	\| DecoderLifeCycle::AtUtf16LeStart
3874	\| DecoderLifeCycle::AtUtf16BeStart => {
3875	return self
3876	.variant
3877	.max_utf8_buffer_length_without_replacement(byte_length);
3878	}
3879	DecoderLifeCycle::AtStart => {
3880	if let Some(utf8_bom) = byte_length.checked_add(`3`) {
3881	if let Some(utf16_bom) = checked_add(
3882	`1`,
3883	checked_mul(`3`, checked_div(byte_length.checked_add(`1`), `2`)),
3884	) {
3885	let utf_bom = core::cmp::max(utf8_bom, utf16_bom);
3886	let encoding = self.encoding();
3887	if encoding == UTF_8 \|\| encoding == UTF_16LE \|\| encoding == UTF_16BE {
3888	// No need to consider the internal state of the underlying decoder,
3889	// because it is at start, because no data has reached it yet.
3890	return Some(utf_bom);
3891	} else if let Some(non_bom) = self
3892	.variant
3893	.max_utf8_buffer_length_without_replacement(byte_length)
3894	{
3895	return Some(core::cmp::max(utf_bom, non_bom));
3896	}
3897	}
3898	}
3899	}
3900	DecoderLifeCycle::SeenUtf8First \| DecoderLifeCycle::SeenUtf8Second => {
3901	// Add two bytes even when only one byte has been seen,
3902	// because the one byte can become a lead byte in multibyte
3903	// decoders, but only after the decoder has been queried
3904	// for max length, so the decoder's own logic for adding
3905	// one for a pending lead cannot work.
3906	if let Some(sum) = byte_length.checked_add(`2`) {
3907	if let Some(utf8_bom) = sum.checked_add(`3`) {
3908	if self.encoding() == UTF_8 {
3909	// No need to consider the internal state of the underlying decoder,
3910	// because it is at start, because no data has reached it yet.
3911	return Some(utf8_bom);
3912	} else if let Some(non_bom) =
3913	self.variant.max_utf8_buffer_length_without_replacement(sum)
3914	{
3915	return Some(core::cmp::max(utf8_bom, non_bom));
3916	}
3917	}
3918	}
3919	}
3920	DecoderLifeCycle::ConvertingWithPendingBB => {
3921	if let Some(sum) = byte_length.checked_add(`2`) {
3922	return self.variant.max_utf8_buffer_length_without_replacement(sum);
3923	}
3924	}
3925	DecoderLifeCycle::SeenUtf16LeFirst \| DecoderLifeCycle::SeenUtf16BeFirst => {
3926	// Add two bytes even when only one byte has been seen,
3927	// because the one byte can become a lead byte in multibyte
3928	// decoders, but only after the decoder has been queried
3929	// for max length, so the decoder's own logic for adding
3930	// one for a pending lead cannot work.
3931	if let Some(sum) = byte_length.checked_add(`2`) {
3932	if let Some(utf16_bom) =
3933	checked_add(`1`, checked_mul(`3`, checked_div(sum.checked_add(`1`), `2`)))
3934	{
3935	let encoding = self.encoding();
3936	if encoding == UTF_16LE \|\| encoding == UTF_16BE {
3937	// No need to consider the internal state of the underlying decoder,
3938	// because it is at start, because no data has reached it yet.
3939	return Some(utf16_bom);
3940	} else if let Some(non_bom) =
3941	self.variant.max_utf8_buffer_length_without_replacement(sum)
3942	{
3943	return Some(core::cmp::max(utf16_bom, non_bom));
3944	}
3945	}
3946	}
3947	}
3948	DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),
3949	}
3950	None
3951	}
3952
3953	/// Incrementally decode a byte stream into UTF-8 with malformed sequences
3954	/// replaced with the REPLACEMENT CHARACTER.
3955	///
3956	/// See the documentation of the struct for documentation for `decode_`*
3957	/// methods collectively.
3958	///
3959	/// Available via the C wrapper.
3960	pub fn decode_to_utf8(
3961	&mut self,
3962	src: &[u8],
3963	dst: &mut [u8],
3964	last: bool,
3965	) -> (CoderResult, usize, usize, bool) {
3966	let mut had_errors = `false`;
3967	let mut total_read = `0usize`;
3968	let mut total_written = `0usize`;
3969	loop {
3970	let (result, read, written) = self.decode_to_utf8_without_replacement(
3971	&src[total_read..],
3972	&mut dst[total_written..],
3973	last,
3974	);
3975	total_read += read;
3976	total_written += written;
3977	match result {
3978	DecoderResult::InputEmpty => {
3979	return (
3980	CoderResult::InputEmpty,
3981	total_read,
3982	total_written,
3983	had_errors,
3984	);
3985	}
3986	DecoderResult::OutputFull => {
3987	return (
3988	CoderResult::OutputFull,
3989	total_read,
3990	total_written,
3991	had_errors,
3992	);
3993	}
3994	DecoderResult::Malformed(_, _) => {
3995	had_errors = `true`;
3996	// There should always be space for the U+FFFD, because
3997	// otherwise we'd have gotten OutputFull already.
3998	// XXX: is the above comment actually true for UTF-8 itself?
3999	// TODO: Consider having fewer bound checks here.
4000	dst[total_written] = `0xEFu8`;
4001	total_written += `1`;
4002	dst[total_written] = `0xBFu8`;
4003	total_written += `1`;
4004	dst[total_written] = `0xBDu8`;
4005	total_written += `1`;
4006	}
4007	}
4008	}
4009	}
4010
4011	/// Incrementally decode a byte stream into UTF-8 with malformed sequences
4012	/// replaced with the REPLACEMENT CHARACTER with type system signaling
4013	/// of UTF-8 validity.
4014	///
4015	/// This methods calls `decode_to_utf8` and then zeroes
4016	/// out up to three bytes that aren't logically part of the write in order
4017	/// to retain the UTF-8 validity even for the unwritten part of the buffer.
4018	///
4019	/// See the documentation of the struct for documentation for `decode_`*
4020	/// methods collectively.
4021	///
4022	/// Available to Rust only.
4023	pub fn decode_to_str(
4024	&mut self,
4025	src: &[u8],
4026	dst: &mut str,
4027	last: bool,
4028	) -> (CoderResult, usize, usize, bool) {
4029	let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
4030	let (result, read, written, replaced) = self.decode_to_utf8(src, bytes, last);
4031	let len = bytes.len();
4032	let mut trail = written;
4033	// Non-UTF-8 ASCII-compatible decoders may write up to `MAX_STRIDE_SIZE`
4034	// bytes of trailing garbage. No need to optimize non-ASCII-compatible
4035	// encodings to avoid overwriting here.
4036	if self.encoding != UTF_8 {
4037	let max = core::cmp::min(len, trail + ascii::MAX_STRIDE_SIZE);
4038	while trail < max {
4039	bytes[trail] = `0`;
4040	trail += `1`;
4041	}
4042	}
4043	while trail < len && ((bytes[trail] & `0xC0`) == `0x80`) {
4044	bytes[trail] = `0`;
4045	trail += `1`;
4046	}
4047	(result, read, written, replaced)
4048	}
4049
4050	/// Incrementally decode a byte stream into UTF-8 with malformed sequences
4051	/// replaced with the REPLACEMENT CHARACTER using a `String` receiver.
4052	///
4053	/// Like the others, this method follows the logic that the output buffer is
4054	/// caller-allocated. This method treats the capacity of the `String` as
4055	/// the output limit. That is, this method guarantees not to cause a
4056	/// reallocation of the backing buffer of `String`.
4057	///
4058	/// The return value is a tuple that contains the `DecoderResult`, the
4059	/// number of bytes read and a boolean indicating whether replacements
4060	/// were done. The number of bytes written is signaled via the length of
4061	/// the `String` changing.
4062	///
4063	/// See the documentation of the struct for documentation for `decode_`*
4064	/// methods collectively.
4065	///
4066	/// Available to Rust only and only with the `alloc` feature enabled (enabled
4067	/// by default).
4068	#[cfg(feature = "alloc")]
4069	pub fn decode_to_string(
4070	&mut self,
4071	src: &[u8],
4072	dst: &mut String,
4073	last: bool,
4074	) -> (CoderResult, usize, bool) {
4075	unsafe {
4076	let vec = dst.as_mut_vec();
4077	let old_len = vec.len();
4078	let capacity = vec.capacity();
4079	vec.set_len(capacity);
4080	let (result, read, written, replaced) =
4081	self.decode_to_utf8(src, &mut vec[old_len..], last);
4082	vec.set_len(old_len + written);
4083	(result, read, replaced)
4084	}
4085	}
4086
4087	public_decode_function!(/// Incrementally decode a byte stream into UTF-8
4088	/// _without replacement_.
4089	///
4090	/// See the documentation of the struct for
4091	/// documentation for `decode_` methods*
4092	/// collectively.
4093	///
4094	/// Available via the C wrapper.
4095	,
4096	decode_to_utf8_without_replacement,
4097	decode_to_utf8_raw,
4098	decode_to_utf8_checking_end,
4099	decode_to_utf8_after_one_potential_bom_byte,
4100	decode_to_utf8_after_two_potential_bom_bytes,
4101	decode_to_utf8_checking_end_with_offset,
4102	u8);
4103
4104	/// Incrementally decode a byte stream into UTF-8 with type system signaling
4105	/// of UTF-8 validity.
4106	///
4107	/// This methods calls `decode_to_utf8` and then zeroes out up to three
4108	/// bytes that aren't logically part of the write in order to retain the
4109	/// UTF-8 validity even for the unwritten part of the buffer.
4110	///
4111	/// See the documentation of the struct for documentation for `decode_`*
4112	/// methods collectively.
4113	///
4114	/// Available to Rust only.
4115	pub fn decode_to_str_without_replacement(
4116	&mut self,
4117	src: &[u8],
4118	dst: &mut str,
4119	last: bool,
4120	) -> (DecoderResult, usize, usize) {
4121	let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() };
4122	let (result, read, written) = self.decode_to_utf8_without_replacement(src, bytes, last);
4123	let len = bytes.len();
4124	let mut trail = written;
4125	// Non-UTF-8 ASCII-compatible decoders may write up to `MAX_STRIDE_SIZE`
4126	// bytes of trailing garbage. No need to optimize non-ASCII-compatible
4127	// encodings to avoid overwriting here.
4128	if self.encoding != UTF_8 {
4129	let max = core::cmp::min(len, trail + ascii::MAX_STRIDE_SIZE);
4130	while trail < max {
4131	bytes[trail] = `0`;
4132	trail += `1`;
4133	}
4134	}
4135	while trail < len && ((bytes[trail] & `0xC0`) == `0x80`) {
4136	bytes[trail] = `0`;
4137	trail += `1`;
4138	}
4139	(result, read, written)
4140	}
4141
4142	/// Incrementally decode a byte stream into UTF-8 using a `String` receiver.
4143	///
4144	/// Like the others, this method follows the logic that the output buffer is
4145	/// caller-allocated. This method treats the capacity of the `String` as
4146	/// the output limit. That is, this method guarantees not to cause a
4147	/// reallocation of the backing buffer of `String`.
4148	///
4149	/// The return value is a pair that contains the `DecoderResult` and the
4150	/// number of bytes read. The number of bytes written is signaled via
4151	/// the length of the `String` changing.
4152	///
4153	/// See the documentation of the struct for documentation for `decode_`*
4154	/// methods collectively.
4155	///
4156	/// Available to Rust only and only with the `alloc` feature enabled (enabled
4157	/// by default).
4158	#[cfg(feature = "alloc")]
4159	pub fn decode_to_string_without_replacement(
4160	&mut self,
4161	src: &[u8],
4162	dst: &mut String,
4163	last: bool,
4164	) -> (DecoderResult, usize) {
4165	unsafe {
4166	let vec = dst.as_mut_vec();
4167	let old_len = vec.len();
4168	let capacity = vec.capacity();
4169	vec.set_len(capacity);
4170	let (result, read, written) =
4171	self.decode_to_utf8_without_replacement(src, &mut vec[old_len..], last);
4172	vec.set_len(old_len + written);
4173	(result, read)
4174	}
4175	}
4176
4177	/// Query the worst-case UTF-16 output size (with or without replacement).
4178	///
4179	/// Returns the size of the output buffer in UTF-16 code units (`u16`)
4180	/// that will not overflow given the current state of the decoder and
4181	/// `byte_length` number of additional input bytes or `None` if `usize`
4182	/// would overflow.
4183	///
4184	/// Since the REPLACEMENT CHARACTER fits into one UTF-16 code unit, the
4185	/// return value of this method applies also in the
4186	/// `_without_replacement` case.
4187	///
4188	/// Available via the C wrapper.
4189	pub fn max_utf16_buffer_length(&self, byte_length: usize) -> Option<usize> {
4190	// Need to consider a) the decoder morphing due to the BOM and b) a partial
4191	// BOM getting pushed to the underlying decoder.
4192	match self.life_cycle {
4193	DecoderLifeCycle::Converting
4194	\| DecoderLifeCycle::AtUtf8Start
4195	\| DecoderLifeCycle::AtUtf16LeStart
4196	\| DecoderLifeCycle::AtUtf16BeStart => {
4197	return self.variant.max_utf16_buffer_length(byte_length);
4198	}
4199	DecoderLifeCycle::AtStart => {
4200	if let Some(utf8_bom) = byte_length.checked_add(`1`) {
4201	if let Some(utf16_bom) =
4202	checked_add(`1`, checked_div(byte_length.checked_add(`1`), `2`))
4203	{
4204	let utf_bom = core::cmp::max(utf8_bom, utf16_bom);
4205	let encoding = self.encoding();
4206	if encoding == UTF_8 \|\| encoding == UTF_16LE \|\| encoding == UTF_16BE {
4207	// No need to consider the internal state of the underlying decoder,
4208	// because it is at start, because no data has reached it yet.
4209	return Some(utf_bom);
4210	} else if let Some(non_bom) =
4211	self.variant.max_utf16_buffer_length(byte_length)
4212	{
4213	return Some(core::cmp::max(utf_bom, non_bom));
4214	}
4215	}
4216	}
4217	}
4218	DecoderLifeCycle::SeenUtf8First \| DecoderLifeCycle::SeenUtf8Second => {
4219	// Add two bytes even when only one byte has been seen,
4220	// because the one byte can become a lead byte in multibyte
4221	// decoders, but only after the decoder has been queried
4222	// for max length, so the decoder's own logic for adding
4223	// one for a pending lead cannot work.
4224	if let Some(sum) = byte_length.checked_add(`2`) {
4225	if let Some(utf8_bom) = sum.checked_add(`1`) {
4226	if self.encoding() == UTF_8 {
4227	// No need to consider the internal state of the underlying decoder,
4228	// because it is at start, because no data has reached it yet.
4229	return Some(utf8_bom);
4230	} else if let Some(non_bom) = self.variant.max_utf16_buffer_length(sum) {
4231	return Some(core::cmp::max(utf8_bom, non_bom));
4232	}
4233	}
4234	}
4235	}
4236	DecoderLifeCycle::ConvertingWithPendingBB => {
4237	if let Some(sum) = byte_length.checked_add(`2`) {
4238	return self.variant.max_utf16_buffer_length(sum);
4239	}
4240	}
4241	DecoderLifeCycle::SeenUtf16LeFirst \| DecoderLifeCycle::SeenUtf16BeFirst => {
4242	// Add two bytes even when only one byte has been seen,
4243	// because the one byte can become a lead byte in multibyte
4244	// decoders, but only after the decoder has been queried
4245	// for max length, so the decoder's own logic for adding
4246	// one for a pending lead cannot work.
4247	if let Some(sum) = byte_length.checked_add(`2`) {
4248	if let Some(utf16_bom) = checked_add(`1`, checked_div(sum.checked_add(`1`), `2`)) {
4249	let encoding = self.encoding();
4250	if encoding == UTF_16LE \|\| encoding == UTF_16BE {
4251	// No need to consider the internal state of the underlying decoder,
4252	// because it is at start, because no data has reached it yet.
4253	return Some(utf16_bom);
4254	} else if let Some(non_bom) = self.variant.max_utf16_buffer_length(sum) {
4255	return Some(core::cmp::max(utf16_bom, non_bom));
4256	}
4257	}
4258	}
4259	}
4260	DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),
4261	}
4262	None
4263	}
4264
4265	/// Incrementally decode a byte stream into UTF-16 with malformed sequences
4266	/// replaced with the REPLACEMENT CHARACTER.
4267	///
4268	/// See the documentation of the struct for documentation for `decode_`*
4269	/// methods collectively.
4270	///
4271	/// Available via the C wrapper.
4272	pub fn decode_to_utf16(
4273	&mut self,
4274	src: &[u8],
4275	dst: &mut [u16],
4276	last: bool,
4277	) -> (CoderResult, usize, usize, bool) {
4278	let mut had_errors = `false`;
4279	let mut total_read = `0usize`;
4280	let mut total_written = `0usize`;
4281	loop {
4282	let (result, read, written) = self.decode_to_utf16_without_replacement(
4283	&src[total_read..],
4284	&mut dst[total_written..],
4285	last,
4286	);
4287	total_read += read;
4288	total_written += written;
4289	match result {
4290	DecoderResult::InputEmpty => {
4291	return (
4292	CoderResult::InputEmpty,
4293	total_read,
4294	total_written,
4295	had_errors,
4296	);
4297	}
4298	DecoderResult::OutputFull => {
4299	return (
4300	CoderResult::OutputFull,
4301	total_read,
4302	total_written,
4303	had_errors,
4304	);
4305	}
4306	DecoderResult::Malformed(_, _) => {
4307	had_errors = `true`;
4308	// There should always be space for the U+FFFD, because
4309	// otherwise we'd have gotten OutputFull already.
4310	dst[total_written] = `0xFFFD`;
4311	total_written += `1`;
4312	}
4313	}
4314	}
4315	}
4316
4317	public_decode_function!(/// Incrementally decode a byte stream into UTF-16
4318	/// _without replacement_.
4319	///
4320	/// See the documentation of the struct for
4321	/// documentation for `decode_` methods*
4322	/// collectively.
4323	///
4324	/// Available via the C wrapper.
4325	,
4326	decode_to_utf16_without_replacement,
4327	decode_to_utf16_raw,
4328	decode_to_utf16_checking_end,
4329	decode_to_utf16_after_one_potential_bom_byte,
4330	decode_to_utf16_after_two_potential_bom_bytes,
4331	decode_to_utf16_checking_end_with_offset,
4332	u16);
4333
4334	/// Checks for compatibility with storing Unicode scalar values as unsigned
4335	/// bytes taking into account the state of the decoder.
4336	///
4337	/// Returns `None` if the decoder is not in a neutral state, including waiting
4338	/// for the BOM, or if the encoding is never Latin1-byte-compatible.
4339	///
4340	/// Otherwise returns the index of the first byte whose unsigned value doesn't
4341	/// directly correspond to the decoded Unicode scalar value, or the length
4342	/// of the input if all bytes in the input decode directly to scalar values
4343	/// corresponding to the unsigned byte values.
4344	///
4345	/// Does not change the state of the decoder.
4346	///
4347	/// Do not use this unless you are supporting SpiderMonkey/V8-style string
4348	/// storage optimizations.
4349	///
4350	/// Available via the C wrapper.
4351	pub fn latin1_byte_compatible_up_to(&self, bytes: &[u8]) -> Option<usize> {
4352	match self.life_cycle {
4353	DecoderLifeCycle::Converting => {
4354	return self.variant.latin1_byte_compatible_up_to(bytes);
4355	}
4356	DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),
4357	_ => None,
4358	}
4359	}
4360	}
4361
4362	/// Result of a (potentially partial) encode operation without replacement.
4363	#[must_use]
4364	#[derive(Debug, PartialEq, Eq)]
4365	pub enum EncoderResult {
4366	/// The input was exhausted.
4367	///
4368	/// If this result was returned from a call where `last` was `true`, the
4369	/// decoding process has completed. Otherwise, the caller should call a
4370	/// decode method again with more input.
4371	InputEmpty,
4372
4373	/// The encoder cannot produce another unit of output, because the output
4374	/// buffer does not have enough space left.
4375	///
4376	/// The caller must provide more output space upon the next call and re-push
4377	/// the remaining input to the decoder.
4378	OutputFull,
4379
4380	/// The encoder encountered an unmappable character.
4381	///
4382	/// The caller must either treat this as a fatal error or must append
4383	/// a placeholder to the output and then re-push the remaining input to the
4384	/// encoder.
4385	Unmappable(char),
4386	}
4387
4388	impl EncoderResult {
4389	fn unmappable_from_bmp(bmp: u16) -> EncoderResult {
4390	EncoderResult::Unmappable(::core::char::from_u32(u32::from(bmp)).unwrap())
4391	}
4392	}
4393
4394	/// A converter that encodes a Unicode stream into bytes according to a
4395	/// character encoding in a streaming (incremental) manner.
4396	///
4397	/// The various `encode_` methods take an input buffer (`src`) and an output*
4398	/// buffer `dst` both of which are caller-allocated. There are variants for
4399	/// both UTF-8 and UTF-16 input buffers.
4400	///
4401	/// An `encode_` method encode characters from `src` into bytes characters*
4402	/// stored into `dst` until one of the following three things happens:
4403	///
4404	/// 1. An unmappable character is encountered (`_without_replacement` variants*
4405	/// only).
4406	///
4407	/// 2. The output buffer has been filled so near capacity that the decoder
4408	/// cannot be sure that processing an additional character of input wouldn't
4409	/// cause so much output that the output buffer would overflow.
4410	///
4411	/// 3. All the input characters have been processed.
4412	///
4413	/// The `encode_` method then returns tuple of a status indicating which one*
4414	/// of the three reasons to return happened, how many input code units (`u8`
4415	/// when encoding from UTF-8 and `u16` when encoding from UTF-16) were read,
4416	/// how many output bytes were written (except when encoding into `Vec<u8>`,
4417	/// whose length change indicates this), and in the case of the variants that
4418	/// perform replacement, a boolean indicating whether an unmappable
4419	/// character was replaced with a numeric character reference during the call.
4420	///
4421	/// The number of bytes "written" is what's logically written. Garbage may be
4422	/// written in the output buffer beyond the point logically written to.
4423	///
4424	/// In the case of the methods whose name ends with
4425	/// `_without_replacement`, the status is an [`EncoderResult`][1] enumeration*
4426	/// (possibilities `Unmappable`, `OutputFull` and `InputEmpty` corresponding to
4427	/// the three cases listed above).
4428	///
4429	/// In the case of methods whose name does not end with
4430	/// `_without_replacement`, unmappable characters are automatically replaced*
4431	/// with the corresponding numeric character references and unmappable
4432	/// characters do not cause the methods to return early.
4433	///
4434	/// When encoding from UTF-8 without replacement, the methods are guaranteed
4435	/// not to return indicating that more output space is needed if the length
4436	/// of the output buffer is at least the length returned by
4437	/// [`max_buffer_length_from_utf8_without_replacement()`][2]. When encoding from
4438	/// UTF-8 with replacement, the length of the output buffer that guarantees the
4439	/// methods not to return indicating that more output space is needed in the
4440	/// absence of unmappable characters is given by
4441	/// [`max_buffer_length_from_utf8_if_no_unmappables()`][3]. When encoding from
4442	/// UTF-16 without replacement, the methods are guaranteed not to return
4443	/// indicating that more output space is needed if the length of the output
4444	/// buffer is at least the length returned by
4445	/// [`max_buffer_length_from_utf16_without_replacement()`][4]. When encoding
4446	/// from UTF-16 with replacement, the the length of the output buffer that
4447	/// guarantees the methods not to return indicating that more output space is
4448	/// needed in the absence of unmappable characters is given by
4449	/// [`max_buffer_length_from_utf16_if_no_unmappables()`][5].
4450	/// When encoding with replacement, applications are not expected to size the
4451	/// buffer for the worst case ahead of time but to resize the buffer if there
4452	/// are unmappable characters. This is why max length queries are only available
4453	/// for the case where there are no unmappable characters.
4454	///
4455	/// When encoding from UTF-8, each `src` buffer _must_ be valid UTF-8. (When
4456	/// calling from Rust, the type system takes care of this.) When encoding from
4457	/// UTF-16, unpaired surrogates in the input are treated as U+FFFD REPLACEMENT
4458	/// CHARACTERS. Therefore, in order for astral characters not to turn into a
4459	/// pair of REPLACEMENT CHARACTERS, the caller must ensure that surrogate pairs
4460	/// are not split across input buffer boundaries.
4461	///
4462	/// After an `encode_` call returns, the output produced so far, taken as a*
4463	/// whole from the start of the stream, is guaranteed to consist of a valid
4464	/// byte sequence in the target encoding. (I.e. the code unit sequence for a
4465	/// character is guaranteed not to be split across output buffers. However, due
4466	/// to the stateful nature of ISO-2022-JP, the stream needs to be considered
4467	/// from the start for it to be valid. For other encodings, the validity holds
4468	/// on a per-output buffer basis.)
4469	///
4470	/// The boolean argument `last` indicates that the end of the stream is reached
4471	/// when all the characters in `src` have been consumed. This argument is needed
4472	/// for ISO-2022-JP and is ignored for other encodings.
4473	///
4474	/// An `Encoder` object can be used to incrementally encode a byte stream.
4475	///
4476	/// During the processing of a single stream, the caller must call `encode_`*
4477	/// zero or more times with `last` set to `false` and then call `encode_` at*
4478	/// least once with `last` set to `true`. If `encode_` returns `InputEmpty`,*
4479	/// the processing of the stream has ended. Otherwise, the caller must call
4480	/// `encode_` again with `last` set to `true` (or treat an `Unmappable` result*
4481	/// as a fatal error).
4482	///
4483	/// Once the stream has ended, the `Encoder` object must not be used anymore.
4484	/// That is, you need to create another one to process another stream.
4485	///
4486	/// When the encoder returns `OutputFull` or the encoder returns `Unmappable`
4487	/// and the caller does not wish to treat it as a fatal error, the input buffer
4488	/// `src` may not have been completely consumed. In that case, the caller must
4489	/// pass the unconsumed contents of `src` to `encode_` again upon the next*
4490	/// call.
4491	///
4492	/// [1]: enum.EncoderResult.html
4493	/// [2]: #method.max_buffer_length_from_utf8_without_replacement
4494	/// [3]: #method.max_buffer_length_from_utf8_if_no_unmappables
4495	/// [4]: #method.max_buffer_length_from_utf16_without_replacement
4496	/// [5]: #method.max_buffer_length_from_utf16_if_no_unmappables
4497	///
4498	/// # Infinite loops
4499	///
4500	/// When converting with a fixed-size output buffer whose size is too small to
4501	/// accommodate one character of output, an infinite loop ensues. When
4502	/// converting with a fixed-size output buffer, it generally makes sense to
4503	/// make the buffer fairly large (e.g. couple of kilobytes).
4504	pub struct Encoder {
4505	encoding: &'static Encoding,
4506	variant: VariantEncoder,
4507	}
4508
4509	impl Encoder {
4510	fn new(enc: &'static Encoding, encoder: VariantEncoder) -> Encoder {
4511	Encoder {
4512	encoding: enc,
4513	variant: encoder,
4514	}
4515	}
4516
4517	/// The `Encoding` this `Encoder` is for.
4518	#[inline]
4519	pub fn encoding(&self) -> &'static Encoding {
4520	self.encoding
4521	}
4522
4523	/// Returns `true` if this is an ISO-2022-JP encoder that's not in the
4524	/// ASCII state and `false` otherwise.
4525	#[inline]
4526	pub fn has_pending_state(&self) -> bool {
4527	self.variant.has_pending_state()
4528	}
4529
4530	/// Query the worst-case output size when encoding from UTF-8 with
4531	/// replacement.
4532	///
4533	/// Returns the size of the output buffer in bytes that will not overflow
4534	/// given the current state of the encoder and `byte_length` number of
4535	/// additional input code units if there are no unmappable characters in
4536	/// the input or `None` if `usize` would overflow.
4537	///
4538	/// Available via the C wrapper.
4539	pub fn max_buffer_length_from_utf8_if_no_unmappables(
4540	&self,
4541	byte_length: usize,
4542	) -> Option<usize> {
4543	checked_add(
4544	if self.encoding().can_encode_everything() {
4545	`0`
4546	} else {
4547	NCR_EXTRA
4548	},
4549	self.max_buffer_length_from_utf8_without_replacement(byte_length),
4550	)
4551	}
4552
4553	/// Query the worst-case output size when encoding from UTF-8 without
4554	/// replacement.
4555	///
4556	/// Returns the size of the output buffer in bytes that will not overflow
4557	/// given the current state of the encoder and `byte_length` number of
4558	/// additional input code units or `None` if `usize` would overflow.
4559	///
4560	/// Available via the C wrapper.
4561	pub fn max_buffer_length_from_utf8_without_replacement(
4562	&self,
4563	byte_length: usize,
4564	) -> Option<usize> {
4565	self.variant
4566	.max_buffer_length_from_utf8_without_replacement(byte_length)
4567	}
4568
4569	/// Incrementally encode into byte stream from UTF-8 with unmappable
4570	/// characters replaced with HTML (decimal) numeric character references.
4571	///
4572	/// See the documentation of the struct for documentation for `encode_`*
4573	/// methods collectively.
4574	///
4575	/// Available via the C wrapper.
4576	pub fn encode_from_utf8(
4577	&mut self,
4578	src: &str,
4579	dst: &mut [u8],
4580	last: bool,
4581	) -> (CoderResult, usize, usize, bool) {
4582	let dst_len = dst.len();
4583	let effective_dst_len = if self.encoding().can_encode_everything() {
4584	dst_len
4585	} else {
4586	if dst_len < NCR_EXTRA {
4587	if src.is_empty() && !(last && self.has_pending_state()) {
4588	return (CoderResult::InputEmpty, `0`, `0`, `false`);
4589	}
4590	return (CoderResult::OutputFull, `0`, `0`, `false`);
4591	}
4592	dst_len - NCR_EXTRA
4593	};
4594	let mut had_unmappables = `false`;
4595	let mut total_read = `0usize`;
4596	let mut total_written = `0usize`;
4597	loop {
4598	let (result, read, written) = self.encode_from_utf8_without_replacement(
4599	&src[total_read..],
4600	&mut dst[total_written..effective_dst_len],
4601	last,
4602	);
4603	total_read += read;
4604	total_written += written;
4605	match result {
4606	EncoderResult::InputEmpty => {
4607	return (
4608	CoderResult::InputEmpty,
4609	total_read,
4610	total_written,
4611	had_unmappables,
4612	);
4613	}
4614	EncoderResult::OutputFull => {
4615	return (
4616	CoderResult::OutputFull,
4617	total_read,
4618	total_written,
4619	had_unmappables,
4620	);
4621	}
4622	EncoderResult::Unmappable(unmappable) => {
4623	had_unmappables = `true`;
4624	debug_assert!(dst.len() - total_written >= NCR_EXTRA);
4625	debug_assert_ne!(self.encoding(), UTF_16BE);
4626	debug_assert_ne!(self.encoding(), UTF_16LE);
4627	// Additionally, Iso2022JpEncoder is responsible for
4628	// transitioning to ASCII when returning with Unmappable.
4629	total_written += write_ncr(unmappable, &mut dst[total_written..]);
4630	if total_written >= effective_dst_len {
4631	if total_read == src.len() && !(last && self.has_pending_state()) {
4632	return (
4633	CoderResult::InputEmpty,
4634	total_read,
4635	total_written,
4636	had_unmappables,
4637	);
4638	}
4639	return (
4640	CoderResult::OutputFull,
4641	total_read,
4642	total_written,
4643	had_unmappables,
4644	);
4645	}
4646	}
4647	}
4648	}
4649	}
4650
4651	/// Incrementally encode into byte stream from UTF-8 with unmappable
4652	/// characters replaced with HTML (decimal) numeric character references.
4653	///
4654	/// See the documentation of the struct for documentation for `encode_`*
4655	/// methods collectively.
4656	///
4657	/// Available to Rust only and only with the `alloc` feature enabled (enabled
4658	/// by default).
4659	#[cfg(feature = "alloc")]
4660	pub fn encode_from_utf8_to_vec(
4661	&mut self,
4662	src: &str,
4663	dst: &mut Vec<u8>,
4664	last: bool,
4665	) -> (CoderResult, usize, bool) {
4666	unsafe {
4667	let old_len = dst.len();
4668	let capacity = dst.capacity();
4669	dst.set_len(capacity);
4670	let (result, read, written, replaced) =
4671	self.encode_from_utf8(src, &mut dst[old_len..], last);
4672	dst.set_len(old_len + written);
4673	(result, read, replaced)
4674	}
4675	}
4676
4677	/// Incrementally encode into byte stream from UTF-8 _without replacement_.
4678	///
4679	/// See the documentation of the struct for documentation for `encode_`*
4680	/// methods collectively.
4681	///
4682	/// Available via the C wrapper.
4683	pub fn encode_from_utf8_without_replacement(
4684	&mut self,
4685	src: &str,
4686	dst: &mut [u8],
4687	last: bool,
4688	) -> (EncoderResult, usize, usize) {
4689	self.variant.encode_from_utf8_raw(src, dst, last)
4690	}
4691
4692	/// Incrementally encode into byte stream from UTF-8 _without replacement_.
4693	///
4694	/// See the documentation of the struct for documentation for `encode_`*
4695	/// methods collectively.
4696	///
4697	/// Available to Rust only and only with the `alloc` feature enabled (enabled
4698	/// by default).
4699	#[cfg(feature = "alloc")]
4700	pub fn encode_from_utf8_to_vec_without_replacement(
4701	&mut self,
4702	src: &str,
4703	dst: &mut Vec<u8>,
4704	last: bool,
4705	) -> (EncoderResult, usize) {
4706	unsafe {
4707	let old_len = dst.len();
4708	let capacity = dst.capacity();
4709	dst.set_len(capacity);
4710	let (result, read, written) =
4711	self.encode_from_utf8_without_replacement(src, &mut dst[old_len..], last);
4712	dst.set_len(old_len + written);
4713	(result, read)
4714	}
4715	}
4716
4717	/// Query the worst-case output size when encoding from UTF-16 with
4718	/// replacement.
4719	///
4720	/// Returns the size of the output buffer in bytes that will not overflow
4721	/// given the current state of the encoder and `u16_length` number of
4722	/// additional input code units if there are no unmappable characters in
4723	/// the input or `None` if `usize` would overflow.
4724	///
4725	/// Available via the C wrapper.
4726	pub fn max_buffer_length_from_utf16_if_no_unmappables(
4727	&self,
4728	u16_length: usize,
4729	) -> Option<usize> {
4730	checked_add(
4731	if self.encoding().can_encode_everything() {
4732	`0`
4733	} else {
4734	NCR_EXTRA
4735	},
4736	self.max_buffer_length_from_utf16_without_replacement(u16_length),
4737	)
4738	}
4739
4740	/// Query the worst-case output size when encoding from UTF-16 without
4741	/// replacement.
4742	///
4743	/// Returns the size of the output buffer in bytes that will not overflow
4744	/// given the current state of the encoder and `u16_length` number of
4745	/// additional input code units or `None` if `usize` would overflow.
4746	///
4747	/// Available via the C wrapper.
4748	pub fn max_buffer_length_from_utf16_without_replacement(
4749	&self,
4750	u16_length: usize,
4751	) -> Option<usize> {
4752	self.variant
4753	.max_buffer_length_from_utf16_without_replacement(u16_length)
4754	}
4755
4756	/// Incrementally encode into byte stream from UTF-16 with unmappable
4757	/// characters replaced with HTML (decimal) numeric character references.
4758	///
4759	/// See the documentation of the struct for documentation for `encode_`*
4760	/// methods collectively.
4761	///
4762	/// Available via the C wrapper.
4763	pub fn encode_from_utf16(
4764	&mut self,
4765	src: &[u16],
4766	dst: &mut [u8],
4767	last: bool,
4768	) -> (CoderResult, usize, usize, bool) {
4769	let dst_len = dst.len();
4770	let effective_dst_len = if self.encoding().can_encode_everything() {
4771	dst_len
4772	} else {
4773	if dst_len < NCR_EXTRA {
4774	if src.is_empty() && !(last && self.has_pending_state()) {
4775	return (CoderResult::InputEmpty, `0`, `0`, `false`);
4776	}
4777	return (CoderResult::OutputFull, `0`, `0`, `false`);
4778	}
4779	dst_len - NCR_EXTRA
4780	};
4781	let mut had_unmappables = `false`;
4782	let mut total_read = `0usize`;
4783	let mut total_written = `0usize`;
4784	loop {
4785	let (result, read, written) = self.encode_from_utf16_without_replacement(
4786	&src[total_read..],
4787	&mut dst[total_written..effective_dst_len],
4788	last,
4789	);
4790	total_read += read;
4791	total_written += written;
4792	match result {
4793	EncoderResult::InputEmpty => {
4794	return (
4795	CoderResult::InputEmpty,
4796	total_read,
4797	total_written,
4798	had_unmappables,
4799	);
4800	}
4801	EncoderResult::OutputFull => {
4802	return (
4803	CoderResult::OutputFull,
4804	total_read,
4805	total_written,
4806	had_unmappables,
4807	);
4808	}
4809	EncoderResult::Unmappable(unmappable) => {
4810	had_unmappables = `true`;
4811	debug_assert!(dst.len() - total_written >= NCR_EXTRA);
4812	// There are no UTF-16 encoders and even if there were,
4813	// they'd never have unmappables.
4814	debug_assert_ne!(self.encoding(), UTF_16BE);
4815	debug_assert_ne!(self.encoding(), UTF_16LE);
4816	// Additionally, Iso2022JpEncoder is responsible for
4817	// transitioning to ASCII when returning with Unmappable
4818	// from the jis0208 state. That is, when we encode
4819	// ISO-2022-JP and come here, the encoder is in either the
4820	// ASCII or the Roman state. We are allowed to generate any
4821	// printable ASCII excluding \ and ~.
4822	total_written += write_ncr(unmappable, &mut dst[total_written..]);
4823	if total_written >= effective_dst_len {
4824	if total_read == src.len() && !(last && self.has_pending_state()) {
4825	return (
4826	CoderResult::InputEmpty,
4827	total_read,
4828	total_written,
4829	had_unmappables,
4830	);
4831	}
4832	return (
4833	CoderResult::OutputFull,
4834	total_read,
4835	total_written,
4836	had_unmappables,
4837	);
4838	}
4839	}
4840	}
4841	}
4842	}
4843
4844	/// Incrementally encode into byte stream from UTF-16 _without replacement_.
4845	///
4846	/// See the documentation of the struct for documentation for `encode_`*
4847	/// methods collectively.
4848	///
4849	/// Available via the C wrapper.
4850	pub fn encode_from_utf16_without_replacement(
4851	&mut self,
4852	src: &[u16],
4853	dst: &mut [u8],
4854	last: bool,
4855	) -> (EncoderResult, usize, usize) {
4856	self.variant.encode_from_utf16_raw(src, dst, last)
4857	}
4858	}
4859
4860	/// Format an unmappable as NCR without heap allocation.
4861	fn write_ncr(unmappable: char, dst: &mut [u8]) -> usize {
4862	// len is the number of decimal digits needed to represent unmappable plus
4863	// 3 (the length of "&#" and ";").
4864	let mut number = unmappable as u32;
4865	let len = if number >= `1_000_000u32` {
4866	`10usize`
4867	} else if number >= `100_000u32` {
4868	`9usize`
4869	} else if number >= `10_000u32` {
4870	`8usize`
4871	} else if number >= `1_000u32` {
4872	`7usize`
4873	} else if number >= `100u32` {
4874	`6usize`
4875	} else {
4876	// Review the outcome of https://github.com/whatwg/encoding/issues/15
4877	// to see if this case is possible
4878	`5usize`
4879	};
4880	debug_assert!(number >= `10u32`);
4881	debug_assert!(len <= dst.len());
4882	let mut pos = len - `1`;
4883	dst[pos] = b';';
4884	pos -= `1`;
4885	loop {
4886	let rightmost = number % `10`;
4887	dst[pos] = rightmost as u8 + b'0';
4888	pos -= `1`;
4889	if number < `10` {
4890	break;
4891	}
4892	number /= `10`;
4893	}
4894	dst[`1`] = b'#';
4895	dst[`0`] = b'&';
4896	len
4897	}
4898
4899	#[inline(always)]
4900	fn in_range16(i: u16, start: u16, end: u16) -> bool {
4901	i.wrapping_sub(start) < (end - start)
4902	}
4903
4904	#[inline(always)]
4905	fn in_range32(i: u32, start: u32, end: u32) -> bool {
4906	i.wrapping_sub(start) < (end - start)
4907	}
4908
4909	#[inline(always)]
4910	fn in_inclusive_range8(i: u8, start: u8, end: u8) -> bool {
4911	i.wrapping_sub(start) <= (end - start)
4912	}
4913
4914	#[inline(always)]
4915	fn in_inclusive_range16(i: u16, start: u16, end: u16) -> bool {
4916	i.wrapping_sub(start) <= (end - start)
4917	}
4918
4919	#[inline(always)]
4920	fn in_inclusive_range32(i: u32, start: u32, end: u32) -> bool {
4921	i.wrapping_sub(start) <= (end - start)
4922	}
4923
4924	#[inline(always)]
4925	fn in_inclusive_range(i: usize, start: usize, end: usize) -> bool {
4926	i.wrapping_sub(start) <= (end - start)
4927	}
4928
4929	#[inline(always)]
4930	fn checked_add(num: usize, opt: Option<usize>) -> Option<usize> {
4931	if let Some(n: usize) = opt {
4932	n.checked_add(num)
4933	} else {
4934	None
4935	}
4936	}
4937
4938	#[inline(always)]
4939	fn checked_add_opt(one: Option<usize>, other: Option<usize>) -> Option<usize> {
4940	if let Some(n: usize) = one {
4941	checked_add(num:n, opt:other)
4942	} else {
4943	None
4944	}
4945	}
4946
4947	#[inline(always)]
4948	fn checked_mul(num: usize, opt: Option<usize>) -> Option<usize> {
4949	if let Some(n: usize) = opt {
4950	n.checked_mul(num)
4951	} else {
4952	None
4953	}
4954	}
4955
4956	#[inline(always)]
4957	fn checked_div(opt: Option<usize>, num: usize) -> Option<usize> {
4958	if let Some(n: usize) = opt {
4959	n.checked_div(num)
4960	} else {
4961	None
4962	}
4963	}
4964
4965	#[cfg(feature = "alloc")]
4966	#[inline(always)]
4967	fn checked_next_power_of_two(opt: Option<usize>) -> Option<usize> {
4968	opt.map(\|n: usize\| n.next_power_of_two())
4969	}
4970
4971	#[cfg(feature = "alloc")]
4972	#[inline(always)]
4973	fn checked_min(one: Option<usize>, other: Option<usize>) -> Option<usize> {
4974	if let Some(a: usize) = one {
4975	if let Some(b: usize) = other {
4976	Some(::core::cmp::min(v1:a, v2:b))
4977	} else {
4978	Some(a)
4979	}
4980	} else {
4981	other
4982	}
4983	}
4984
4985	// ############## TESTS ###############
4986
4987	#[cfg(all(test, feature = "serde"))]
4988	#[derive(Serialize, Deserialize, Debug, PartialEq)]
4989	struct Demo {
4990	num: u32,
4991	name: String,
4992	enc: &'static Encoding,
4993	}
4994
4995	#[cfg(test)]
4996	mod test_labels_names;
4997
4998	#[cfg(all(test, feature = "alloc"))]
4999	mod tests {
5000	use super::*;
5001	use alloc::borrow::Cow;
5002
5003	fn sniff_to_utf16(
5004	initial_encoding: &'static Encoding,
5005	expected_encoding: &'static Encoding,
5006	bytes: &[u8],
5007	expect: &[u16],
5008	breaks: &[usize],
5009	) {
5010	let mut decoder = initial_encoding.new_decoder();
5011
5012	let mut dest: Vec<u16> =
5013	Vec::with_capacity(decoder.max_utf16_buffer_length(bytes.len()).unwrap());
5014	let capacity = dest.capacity();
5015	dest.resize(capacity, `0u16`);
5016
5017	let mut total_written = `0usize`;
5018	let mut start = `0usize`;
5019	for br in breaks {
5020	let (result, read, written, _) =
5021	decoder.decode_to_utf16(&bytes[start..br], &mut* dest[total_written..], `false`);
5022	total_written += written;
5023	assert_eq!(read, *br - start);
5024	match result {
5025	CoderResult::InputEmpty => {}
5026	CoderResult::OutputFull => {
5027	unreachable!();
5028	}
5029	}
5030	start = *br;
5031	}
5032	let (result, read, written, _) =
5033	decoder.decode_to_utf16(&bytes[start..], &mut dest[total_written..], `true`);
5034	total_written += written;
5035	match result {
5036	CoderResult::InputEmpty => {}
5037	CoderResult::OutputFull => {
5038	unreachable!();
5039	}
5040	}
5041	assert_eq!(read, bytes.len() - start);
5042	assert_eq!(total_written, expect.len());
5043	assert_eq!(&dest[..total_written], expect);
5044	assert_eq!(decoder.encoding(), expected_encoding);
5045	}
5046
5047	// Any copyright to the test code below this comment is dedicated to the
5048	// Public Domain. http://creativecommons.org/publicdomain/zero/1.0/
5049
5050	#[test]
5051	fn test_bom_sniffing() {
5052	// ASCII
5053	sniff_to_utf16(
5054	WINDOWS_1252,
5055	WINDOWS_1252,
5056	b"`\x61\x62`",
5057	&[`0x0061u16`, `0x0062u16`],
5058	&[],
5059	);
5060	// UTF-8
5061	sniff_to_utf16(
5062	WINDOWS_1252,
5063	UTF_8,
5064	b"`\xEF\xBB\xBF\x61\x62`",
5065	&[`0x0061u16`, `0x0062u16`],
5066	&[],
5067	);
5068	sniff_to_utf16(
5069	WINDOWS_1252,
5070	UTF_8,
5071	b"`\xEF\xBB\xBF\x61\x62`",
5072	&[`0x0061u16`, `0x0062u16`],
5073	&[`1`],
5074	);
5075	sniff_to_utf16(
5076	WINDOWS_1252,
5077	UTF_8,
5078	b"`\xEF\xBB\xBF\x61\x62`",
5079	&[`0x0061u16`, `0x0062u16`],
5080	&[`2`],
5081	);
5082	sniff_to_utf16(
5083	WINDOWS_1252,
5084	UTF_8,
5085	b"`\xEF\xBB\xBF\x61\x62`",
5086	&[`0x0061u16`, `0x0062u16`],
5087	&[`3`],
5088	);
5089	sniff_to_utf16(
5090	WINDOWS_1252,
5091	UTF_8,
5092	b"`\xEF\xBB\xBF\x61\x62`",
5093	&[`0x0061u16`, `0x0062u16`],
5094	&[`4`],
5095	);
5096	sniff_to_utf16(
5097	WINDOWS_1252,
5098	UTF_8,
5099	b"`\xEF\xBB\xBF\x61\x62`",
5100	&[`0x0061u16`, `0x0062u16`],
5101	&[`2`, `3`],
5102	);
5103	sniff_to_utf16(
5104	WINDOWS_1252,
5105	UTF_8,
5106	b"`\xEF\xBB\xBF\x61\x62`",
5107	&[`0x0061u16`, `0x0062u16`],
5108	&[`1`, `2`],
5109	);
5110	sniff_to_utf16(
5111	WINDOWS_1252,
5112	UTF_8,
5113	b"`\xEF\xBB\xBF\x61\x62`",
5114	&[`0x0061u16`, `0x0062u16`],
5115	&[`1`, `3`],
5116	);
5117	sniff_to_utf16(
5118	WINDOWS_1252,
5119	UTF_8,
5120	b"`\xEF\xBB\xBF\x61\x62`",
5121	&[`0x0061u16`, `0x0062u16`],
5122	&[`1`, `2`, `3`, `4`],
5123	);
5124	sniff_to_utf16(WINDOWS_1252, UTF_8, b"`\xEF\xBB\xBF`", &[], &[]);
5125	// Not UTF-8
5126	sniff_to_utf16(
5127	WINDOWS_1252,
5128	WINDOWS_1252,
5129	b"`\xEF\xBB\x61\x62`",
5130	&[`0x00EFu16`, `0x00BBu16`, `0x0061u16`, `0x0062u16`],
5131	&[],
5132	);
5133	sniff_to_utf16(
5134	WINDOWS_1252,
5135	WINDOWS_1252,
5136	b"`\xEF\xBB\x61\x62`",
5137	&[`0x00EFu16`, `0x00BBu16`, `0x0061u16`, `0x0062u16`],
5138	&[`1`],
5139	);
5140	sniff_to_utf16(
5141	WINDOWS_1252,
5142	WINDOWS_1252,
5143	b"`\xEF\x61\x62`",
5144	&[`0x00EFu16`, `0x0061u16`, `0x0062u16`],
5145	&[],
5146	);
5147	sniff_to_utf16(
5148	WINDOWS_1252,
5149	WINDOWS_1252,
5150	b"`\xEF\x61\x62`",
5151	&[`0x00EFu16`, `0x0061u16`, `0x0062u16`],
5152	&[`1`],
5153	);
5154	sniff_to_utf16(
5155	WINDOWS_1252,
5156	WINDOWS_1252,
5157	b"`\xEF\xBB`",
5158	&[`0x00EFu16`, `0x00BBu16`],
5159	&[],
5160	);
5161	sniff_to_utf16(
5162	WINDOWS_1252,
5163	WINDOWS_1252,
5164	b"`\xEF\xBB`",
5165	&[`0x00EFu16`, `0x00BBu16`],
5166	&[`1`],
5167	);
5168	sniff_to_utf16(WINDOWS_1252, WINDOWS_1252, b"`\xEF`", &[`0x00EFu16`], &[]);
5169	// Not UTF-16
5170	sniff_to_utf16(
5171	WINDOWS_1252,
5172	WINDOWS_1252,
5173	b"`\xFE\x61\x62`",
5174	&[`0x00FEu16`, `0x0061u16`, `0x0062u16`],
5175	&[],
5176	);
5177	sniff_to_utf16(
5178	WINDOWS_1252,
5179	WINDOWS_1252,
5180	b"`\xFE\x61\x62`",
5181	&[`0x00FEu16`, `0x0061u16`, `0x0062u16`],
5182	&[`1`],
5183	);
5184	sniff_to_utf16(WINDOWS_1252, WINDOWS_1252, b"`\xFE`", &[`0x00FEu16`], &[]);
5185	sniff_to_utf16(
5186	WINDOWS_1252,
5187	WINDOWS_1252,
5188	b"`\xFF\x61\x62`",
5189	&[`0x00FFu16`, `0x0061u16`, `0x0062u16`],
5190	&[],
5191	);
5192	sniff_to_utf16(
5193	WINDOWS_1252,
5194	WINDOWS_1252,
5195	b"`\xFF\x61\x62`",
5196	&[`0x00FFu16`, `0x0061u16`, `0x0062u16`],
5197	&[`1`],
5198	);
5199	sniff_to_utf16(WINDOWS_1252, WINDOWS_1252, b"`\xFF`", &[`0x00FFu16`], &[]);
5200	// UTF-16
5201	sniff_to_utf16(WINDOWS_1252, UTF_16BE, b"`\xFE\xFF`", &[], &[]);
5202	sniff_to_utf16(WINDOWS_1252, UTF_16BE, b"`\xFE\xFF`", &[], &[`1`]);
5203	sniff_to_utf16(WINDOWS_1252, UTF_16LE, b"`\xFF\xFE`", &[], &[]);
5204	sniff_to_utf16(WINDOWS_1252, UTF_16LE, b"`\xFF\xFE`", &[], &[`1`]);
5205	}
5206
5207	#[test]
5208	fn test_output_encoding() {
5209	assert_eq!(REPLACEMENT.output_encoding(), UTF_8);
5210	assert_eq!(UTF_16BE.output_encoding(), UTF_8);
5211	assert_eq!(UTF_16LE.output_encoding(), UTF_8);
5212	assert_eq!(UTF_8.output_encoding(), UTF_8);
5213	assert_eq!(WINDOWS_1252.output_encoding(), WINDOWS_1252);
5214	assert_eq!(REPLACEMENT.new_encoder().encoding(), UTF_8);
5215	assert_eq!(UTF_16BE.new_encoder().encoding(), UTF_8);
5216	assert_eq!(UTF_16LE.new_encoder().encoding(), UTF_8);
5217	assert_eq!(UTF_8.new_encoder().encoding(), UTF_8);
5218	assert_eq!(WINDOWS_1252.new_encoder().encoding(), WINDOWS_1252);
5219	}
5220
5221	#[test]
5222	fn test_label_resolution() {
5223	assert_eq!(Encoding::for_label(b"utf-8"), Some(UTF_8));
5224	assert_eq!(Encoding::for_label(b"UTF-8"), Some(UTF_8));
5225	assert_eq!(
5226	Encoding::for_label(b" `\t` `\n` `\x0C` `\n` utf-8 `\r` `\n` `\t` `\x0C` "),
5227	Some(UTF_8)
5228	);
5229	assert_eq!(Encoding::for_label(b"utf-8 _"), None);
5230	assert_eq!(Encoding::for_label(b"bogus"), None);
5231	assert_eq!(Encoding::for_label(b"bogusbogusbogusbogus"), None);
5232	}
5233
5234	#[test]
5235	fn test_decode_valid_windows_1257_to_cow() {
5236	let (cow, encoding, had_errors) = WINDOWS_1257.decode(b"abc`\x80\xE4`");
5237	match cow {
5238	Cow::Borrowed(_) => unreachable!(),
5239	Cow::Owned(s) => {
5240	assert_eq!(s, "abc`\u{20AC}\u{00E4}`");
5241	}
5242	}
5243	assert_eq!(encoding, WINDOWS_1257);
5244	assert!(!had_errors);
5245	}
5246
5247	#[test]
5248	fn test_decode_invalid_windows_1257_to_cow() {
5249	let (cow, encoding, had_errors) = WINDOWS_1257.decode(b"abc`\x80\xA1\xE4`");
5250	match cow {
5251	Cow::Borrowed(_) => unreachable!(),
5252	Cow::Owned(s) => {
5253	assert_eq!(s, "abc`\u{20AC}\u{FFFD}\u{00E4}`");
5254	}
5255	}
5256	assert_eq!(encoding, WINDOWS_1257);
5257	assert!(had_errors);
5258	}
5259
5260	#[test]
5261	fn test_decode_ascii_only_windows_1257_to_cow() {
5262	let (cow, encoding, had_errors) = WINDOWS_1257.decode(b"abc");
5263	match cow {
5264	Cow::Borrowed(s) => {
5265	assert_eq!(s, "abc");
5266	}
5267	Cow::Owned(_) => unreachable!(),
5268	}
5269	assert_eq!(encoding, WINDOWS_1257);
5270	assert!(!had_errors);
5271	}
5272
5273	#[test]
5274	fn test_decode_bomful_valid_utf8_as_windows_1257_to_cow() {
5275	let (cow, encoding, had_errors) = WINDOWS_1257.decode(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5276	match cow {
5277	Cow::Borrowed(s) => {
5278	assert_eq!(s, "`\u{20AC}\u{00E4}`");
5279	}
5280	Cow::Owned(_) => unreachable!(),
5281	}
5282	assert_eq!(encoding, UTF_8);
5283	assert!(!had_errors);
5284	}
5285
5286	#[test]
5287	fn test_decode_bomful_invalid_utf8_as_windows_1257_to_cow() {
5288	let (cow, encoding, had_errors) =
5289	WINDOWS_1257.decode(b"`\xEF\xBB\xBF\xE2\x82\xAC\x80\xC3\xA4`");
5290	match cow {
5291	Cow::Borrowed(_) => unreachable!(),
5292	Cow::Owned(s) => {
5293	assert_eq!(s, "`\u{20AC}\u{FFFD}\u{00E4}`");
5294	}
5295	}
5296	assert_eq!(encoding, UTF_8);
5297	assert!(had_errors);
5298	}
5299
5300	#[test]
5301	fn test_decode_bomful_valid_utf8_as_utf_8_to_cow() {
5302	let (cow, encoding, had_errors) = UTF_8.decode(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5303	match cow {
5304	Cow::Borrowed(s) => {
5305	assert_eq!(s, "`\u{20AC}\u{00E4}`");
5306	}
5307	Cow::Owned(_) => unreachable!(),
5308	}
5309	assert_eq!(encoding, UTF_8);
5310	assert!(!had_errors);
5311	}
5312
5313	#[test]
5314	fn test_decode_bomful_invalid_utf8_as_utf_8_to_cow() {
5315	let (cow, encoding, had_errors) = UTF_8.decode(b"`\xEF\xBB\xBF\xE2\x82\xAC\x80\xC3\xA4`");
5316	match cow {
5317	Cow::Borrowed(_) => unreachable!(),
5318	Cow::Owned(s) => {
5319	assert_eq!(s, "`\u{20AC}\u{FFFD}\u{00E4}`");
5320	}
5321	}
5322	assert_eq!(encoding, UTF_8);
5323	assert!(had_errors);
5324	}
5325
5326	#[test]
5327	fn test_decode_bomful_valid_utf8_as_utf_8_to_cow_with_bom_removal() {
5328	let (cow, had_errors) = UTF_8.decode_with_bom_removal(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5329	match cow {
5330	Cow::Borrowed(s) => {
5331	assert_eq!(s, "`\u{20AC}\u{00E4}`");
5332	}
5333	Cow::Owned(_) => unreachable!(),
5334	}
5335	assert!(!had_errors);
5336	}
5337
5338	#[test]
5339	fn test_decode_bomful_valid_utf8_as_windows_1257_to_cow_with_bom_removal() {
5340	let (cow, had_errors) =
5341	WINDOWS_1257.decode_with_bom_removal(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5342	match cow {
5343	Cow::Borrowed(_) => unreachable!(),
5344	Cow::Owned(s) => {
5345	assert_eq!(
5346	s,
5347	"`\u{013C}\u{00BB}\u{00E6}\u{0101}\u{201A}\u{00AC}\u{0106}\u{00A4}`"
5348	);
5349	}
5350	}
5351	assert!(!had_errors);
5352	}
5353
5354	#[test]
5355	fn test_decode_valid_windows_1257_to_cow_with_bom_removal() {
5356	let (cow, had_errors) = WINDOWS_1257.decode_with_bom_removal(b"abc`\x80\xE4`");
5357	match cow {
5358	Cow::Borrowed(_) => unreachable!(),
5359	Cow::Owned(s) => {
5360	assert_eq!(s, "abc`\u{20AC}\u{00E4}`");
5361	}
5362	}
5363	assert!(!had_errors);
5364	}
5365
5366	#[test]
5367	fn test_decode_invalid_windows_1257_to_cow_with_bom_removal() {
5368	let (cow, had_errors) = WINDOWS_1257.decode_with_bom_removal(b"abc`\x80\xA1\xE4`");
5369	match cow {
5370	Cow::Borrowed(_) => unreachable!(),
5371	Cow::Owned(s) => {
5372	assert_eq!(s, "abc`\u{20AC}\u{FFFD}\u{00E4}`");
5373	}
5374	}
5375	assert!(had_errors);
5376	}
5377
5378	#[test]
5379	fn test_decode_ascii_only_windows_1257_to_cow_with_bom_removal() {
5380	let (cow, had_errors) = WINDOWS_1257.decode_with_bom_removal(b"abc");
5381	match cow {
5382	Cow::Borrowed(s) => {
5383	assert_eq!(s, "abc");
5384	}
5385	Cow::Owned(_) => unreachable!(),
5386	}
5387	assert!(!had_errors);
5388	}
5389
5390	#[test]
5391	fn test_decode_bomful_valid_utf8_to_cow_without_bom_handling() {
5392	let (cow, had_errors) =
5393	UTF_8.decode_without_bom_handling(b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`");
5394	match cow {
5395	Cow::Borrowed(s) => {
5396	assert_eq!(s, "`\u{FEFF}\u{20AC}\u{00E4}`");
5397	}
5398	Cow::Owned(_) => unreachable!(),
5399	}
5400	assert!(!had_errors);
5401	}
5402
5403	#[test]
5404	fn test_decode_bomful_invalid_utf8_to_cow_without_bom_handling() {
5405	let (cow, had_errors) =
5406	UTF_8.decode_without_bom_handling(b"`\xEF\xBB\xBF\xE2\x82\xAC\x80\xC3\xA4`");
5407	match cow {
5408	Cow::Borrowed(_) => unreachable!(),
5409	Cow::Owned(s) => {
5410	assert_eq!(s, "`\u{FEFF}\u{20AC}\u{FFFD}\u{00E4}`");
5411	}
5412	}
5413	assert!(had_errors);
5414	}
5415
5416	#[test]
5417	fn test_decode_valid_windows_1257_to_cow_without_bom_handling() {
5418	let (cow, had_errors) = WINDOWS_1257.decode_without_bom_handling(b"abc`\x80\xE4`");
5419	match cow {
5420	Cow::Borrowed(_) => unreachable!(),
5421	Cow::Owned(s) => {
5422	assert_eq!(s, "abc`\u{20AC}\u{00E4}`");
5423	}
5424	}
5425	assert!(!had_errors);
5426	}
5427
5428	#[test]
5429	fn test_decode_invalid_windows_1257_to_cow_without_bom_handling() {
5430	let (cow, had_errors) = WINDOWS_1257.decode_without_bom_handling(b"abc`\x80\xA1\xE4`");
5431	match cow {
5432	Cow::Borrowed(_) => unreachable!(),
5433	Cow::Owned(s) => {
5434	assert_eq!(s, "abc`\u{20AC}\u{FFFD}\u{00E4}`");
5435	}
5436	}
5437	assert!(had_errors);
5438	}
5439
5440	#[test]
5441	fn test_decode_ascii_only_windows_1257_to_cow_without_bom_handling() {
5442	let (cow, had_errors) = WINDOWS_1257.decode_without_bom_handling(b"abc");
5443	match cow {
5444	Cow::Borrowed(s) => {
5445	assert_eq!(s, "abc");
5446	}
5447	Cow::Owned(_) => unreachable!(),
5448	}
5449	assert!(!had_errors);
5450	}
5451
5452	#[test]
5453	fn test_decode_bomful_valid_utf8_to_cow_without_bom_handling_and_without_replacement() {
5454	match UTF_8.decode_without_bom_handling_and_without_replacement(
5455	b"`\xEF\xBB\xBF\xE2\x82\xAC\xC3\xA4`",
5456	) {
5457	Some(cow) => match cow {
5458	Cow::Borrowed(s) => {
5459	assert_eq!(s, "`\u{FEFF}\u{20AC}\u{00E4}`");
5460	}
5461	Cow::Owned(_) => unreachable!(),
5462	},
5463	None => unreachable!(),
5464	}
5465	}
5466
5467	#[test]
5468	fn test_decode_bomful_invalid_utf8_to_cow_without_bom_handling_and_without_replacement() {
5469	assert!(UTF_8
5470	.decode_without_bom_handling_and_without_replacement(
5471	b"`\xEF\xBB\xBF\xE2\x82\xAC\x80\xC3\xA4`"
5472	)
5473	.is_none());
5474	}
5475
5476	#[test]
5477	fn test_decode_valid_windows_1257_to_cow_without_bom_handling_and_without_replacement() {
5478	match WINDOWS_1257.decode_without_bom_handling_and_without_replacement(b"abc`\x80\xE4`") {
5479	Some(cow) => match cow {
5480	Cow::Borrowed(_) => unreachable!(),
5481	Cow::Owned(s) => {
5482	assert_eq!(s, "abc`\u{20AC}\u{00E4}`");
5483	}
5484	},
5485	None => unreachable!(),
5486	}
5487	}
5488
5489	#[test]
5490	fn test_decode_invalid_windows_1257_to_cow_without_bom_handling_and_without_replacement() {
5491	assert!(WINDOWS_1257
5492	.decode_without_bom_handling_and_without_replacement(b"abc`\x80\xA1\xE4`")
5493	.is_none());
5494	}
5495
5496	#[test]
5497	fn test_decode_ascii_only_windows_1257_to_cow_without_bom_handling_and_without_replacement() {
5498	match WINDOWS_1257.decode_without_bom_handling_and_without_replacement(b"abc") {
5499	Some(cow) => match cow {
5500	Cow::Borrowed(s) => {
5501	assert_eq!(s, "abc");
5502	}
5503	Cow::Owned(_) => unreachable!(),
5504	},
5505	None => unreachable!(),
5506	}
5507	}
5508
5509	#[test]
5510	fn test_encode_ascii_only_windows_1257_to_cow() {
5511	let (cow, encoding, had_errors) = WINDOWS_1257.encode("abc");
5512	match cow {
5513	Cow::Borrowed(s) => {
5514	assert_eq!(s, b"abc");
5515	}
5516	Cow::Owned(_) => unreachable!(),
5517	}
5518	assert_eq!(encoding, WINDOWS_1257);
5519	assert!(!had_errors);
5520	}
5521
5522	#[test]
5523	fn test_encode_valid_windows_1257_to_cow() {
5524	let (cow, encoding, had_errors) = WINDOWS_1257.encode("abc`\u{20AC}\u{00E4}`");
5525	match cow {
5526	Cow::Borrowed(_) => unreachable!(),
5527	Cow::Owned(s) => {
5528	assert_eq!(s, b"abc`\x80\xE4`");
5529	}
5530	}
5531	assert_eq!(encoding, WINDOWS_1257);
5532	assert!(!had_errors);
5533	}
5534
5535	#[test]
5536	fn test_utf16_space_with_one_bom_byte() {
5537	let mut decoder = UTF_16LE.new_decoder();
5538	let mut dst = [`0u16`; `12`];
5539	{
5540	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5541	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `false`);
5542	assert_eq!(result, CoderResult::InputEmpty);
5543	}
5544	{
5545	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5546	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `true`);
5547	assert_eq!(result, CoderResult::InputEmpty);
5548	}
5549	}
5550
5551	#[test]
5552	fn test_utf8_space_with_one_bom_byte() {
5553	let mut decoder = UTF_8.new_decoder();
5554	let mut dst = [`0u16`; `12`];
5555	{
5556	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5557	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `false`);
5558	assert_eq!(result, CoderResult::InputEmpty);
5559	}
5560	{
5561	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5562	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `true`);
5563	assert_eq!(result, CoderResult::InputEmpty);
5564	}
5565	}
5566
5567	#[test]
5568	fn test_utf16_space_with_two_bom_bytes() {
5569	let mut decoder = UTF_16LE.new_decoder();
5570	let mut dst = [`0u16`; `12`];
5571	{
5572	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5573	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xEF`", &mut dst[..needed], `false`);
5574	assert_eq!(result, CoderResult::InputEmpty);
5575	}
5576	{
5577	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5578	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xBB`", &mut dst[..needed], `false`);
5579	assert_eq!(result, CoderResult::InputEmpty);
5580	}
5581	{
5582	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5583	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `true`);
5584	assert_eq!(result, CoderResult::InputEmpty);
5585	}
5586	}
5587
5588	#[test]
5589	fn test_utf8_space_with_two_bom_bytes() {
5590	let mut decoder = UTF_8.new_decoder();
5591	let mut dst = [`0u16`; `12`];
5592	{
5593	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5594	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xEF`", &mut dst[..needed], `false`);
5595	assert_eq!(result, CoderResult::InputEmpty);
5596	}
5597	{
5598	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5599	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xBB`", &mut dst[..needed], `false`);
5600	assert_eq!(result, CoderResult::InputEmpty);
5601	}
5602	{
5603	let needed = decoder.max_utf16_buffer_length(`1`).unwrap();
5604	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF`", &mut dst[..needed], `true`);
5605	assert_eq!(result, CoderResult::InputEmpty);
5606	}
5607	}
5608
5609	#[test]
5610	fn test_utf16_space_with_one_bom_byte_and_a_second_byte_in_same_call() {
5611	let mut decoder = UTF_16LE.new_decoder();
5612	let mut dst = [`0u16`; `12`];
5613	{
5614	let needed = decoder.max_utf16_buffer_length(`2`).unwrap();
5615	let (result, _, _, _) = decoder.decode_to_utf16(b"`\xFF\xFF`", &mut dst[..needed], `true`);
5616	assert_eq!(result, CoderResult::InputEmpty);
5617	}
5618	}
5619
5620	#[test]
5621	fn test_too_short_buffer_with_iso_2022_jp_ascii_from_utf8() {
5622	let mut dst = [`0u8`; `8`];
5623	let mut encoder = ISO_2022_JP.new_encoder();
5624	{
5625	let (result, _, _, _) = encoder.encode_from_utf8("", &mut dst[..], `false`);
5626	assert_eq!(result, CoderResult::InputEmpty);
5627	}
5628	{
5629	let (result, _, _, _) = encoder.encode_from_utf8("", &mut dst[..], `true`);
5630	assert_eq!(result, CoderResult::InputEmpty);
5631	}
5632	}
5633
5634	#[test]
5635	fn test_too_short_buffer_with_iso_2022_jp_roman_from_utf8() {
5636	let mut dst = [`0u8`; `16`];
5637	let mut encoder = ISO_2022_JP.new_encoder();
5638	{
5639	let (result, _, _, _) = encoder.encode_from_utf8("`\u{A5}`", &mut dst[..], `false`);
5640	assert_eq!(result, CoderResult::InputEmpty);
5641	}
5642	{
5643	let (result, _, _, _) = encoder.encode_from_utf8("", &mut dst[..`8`], `false`);
5644	assert_eq!(result, CoderResult::InputEmpty);
5645	}
5646	{
5647	let (result, _, _, _) = encoder.encode_from_utf8("", &mut dst[..`8`], `true`);
5648	assert_eq!(result, CoderResult::OutputFull);
5649	}
5650	}
5651
5652	#[test]
5653	fn test_buffer_end_iso_2022_jp_from_utf8() {
5654	let mut dst = [`0u8`; `18`];
5655	{
5656	let mut encoder = ISO_2022_JP.new_encoder();
5657	let (result, _, _, _) =
5658	encoder.encode_from_utf8("`\u{A5}\u{1F4A9}`", &mut dst[..], `false`);
5659	assert_eq!(result, CoderResult::InputEmpty);
5660	}
5661	{
5662	let mut encoder = ISO_2022_JP.new_encoder();
5663	let (result, _, _, _) = encoder.encode_from_utf8("`\u{A5}\u{1F4A9}`", &mut dst[..], `true`);
5664	assert_eq!(result, CoderResult::OutputFull);
5665	}
5666	{
5667	let mut encoder = ISO_2022_JP.new_encoder();
5668	let (result, _, _, _) = encoder.encode_from_utf8("`\u{1F4A9}`", &mut dst[..`13`], `false`);
5669	assert_eq!(result, CoderResult::InputEmpty);
5670	}
5671	{
5672	let mut encoder = ISO_2022_JP.new_encoder();
5673	let (result, _, _, _) = encoder.encode_from_utf8("`\u{1F4A9}`", &mut dst[..`13`], `true`);
5674	assert_eq!(result, CoderResult::InputEmpty);
5675	}
5676	}
5677
5678	#[test]
5679	fn test_too_short_buffer_with_iso_2022_jp_ascii_from_utf16() {
5680	let mut dst = [`0u8`; `8`];
5681	let mut encoder = ISO_2022_JP.new_encoder();
5682	{
5683	let (result, _, _, _) = encoder.encode_from_utf16(&[`0u16`; `0`], &mut dst[..], `false`);
5684	assert_eq!(result, CoderResult::InputEmpty);
5685	}
5686	{
5687	let (result, _, _, _) = encoder.encode_from_utf16(&[`0u16`; `0`], &mut dst[..], `true`);
5688	assert_eq!(result, CoderResult::InputEmpty);
5689	}
5690	}
5691
5692	#[test]
5693	fn test_too_short_buffer_with_iso_2022_jp_roman_from_utf16() {
5694	let mut dst = [`0u8`; `16`];
5695	let mut encoder = ISO_2022_JP.new_encoder();
5696	{
5697	let (result, _, _, _) = encoder.encode_from_utf16(&[`0xA5u16`], &mut dst[..], `false`);
5698	assert_eq!(result, CoderResult::InputEmpty);
5699	}
5700	{
5701	let (result, _, _, _) = encoder.encode_from_utf16(&[`0u16`; `0`], &mut dst[..`8`], `false`);
5702	assert_eq!(result, CoderResult::InputEmpty);
5703	}
5704	{
5705	let (result, _, _, _) = encoder.encode_from_utf16(&[`0u16`; `0`], &mut dst[..`8`], `true`);
5706	assert_eq!(result, CoderResult::OutputFull);
5707	}
5708	}
5709
5710	#[test]
5711	fn test_buffer_end_iso_2022_jp_from_utf16() {
5712	let mut dst = [`0u8`; `18`];
5713	{
5714	let mut encoder = ISO_2022_JP.new_encoder();
5715	let (result, _, _, _) =
5716	encoder.encode_from_utf16(&[`0xA5u16`, `0xD83Du16`, `0xDCA9u16`], &mut dst[..], `false`);
5717	assert_eq!(result, CoderResult::InputEmpty);
5718	}
5719	{
5720	let mut encoder = ISO_2022_JP.new_encoder();
5721	let (result, _, _, _) =
5722	encoder.encode_from_utf16(&[`0xA5u16`, `0xD83Du16`, `0xDCA9u16`], &mut dst[..], `true`);
5723	assert_eq!(result, CoderResult::OutputFull);
5724	}
5725	{
5726	let mut encoder = ISO_2022_JP.new_encoder();
5727	let (result, _, _, _) =
5728	encoder.encode_from_utf16(&[`0xD83Du16`, `0xDCA9u16`], &mut dst[..`13`], `false`);
5729	assert_eq!(result, CoderResult::InputEmpty);
5730	}
5731	{
5732	let mut encoder = ISO_2022_JP.new_encoder();
5733	let (result, _, _, _) =
5734	encoder.encode_from_utf16(&[`0xD83Du16`, `0xDCA9u16`], &mut dst[..`13`], `true`);
5735	assert_eq!(result, CoderResult::InputEmpty);
5736	}
5737	}
5738
5739	#[test]
5740	fn test_buffer_end_utf16be() {
5741	let mut decoder = UTF_16BE.new_decoder_without_bom_handling();
5742	let mut dest = [`0u8`; `4`];
5743
5744	assert_eq!(
5745	decoder.decode_to_utf8(&[`0xD8`, `0x00`], &mut dest, `false`),
5746	(CoderResult::InputEmpty, `2`, `0`, `false`)
5747	);
5748
5749	let _ = decoder.decode_to_utf8(&[`0xD8`, `0x00`], &mut dest, `true`);
5750	}
5751
5752	#[test]
5753	fn test_hash() {
5754	let mut encodings = ::alloc::collections::btree_set::BTreeSet::new();
5755	encodings.insert(UTF_8);
5756	encodings.insert(ISO_2022_JP);
5757	assert!(encodings.contains(UTF_8));
5758	assert!(encodings.contains(ISO_2022_JP));
5759	assert!(!encodings.contains(WINDOWS_1252));
5760	encodings.remove(ISO_2022_JP);
5761	assert!(!encodings.contains(ISO_2022_JP));
5762	}
5763
5764	#[test]
5765	fn test_iso_2022_jp_ncr_extra_from_utf16() {
5766	let mut dst = [`0u8`; `17`];
5767	{
5768	let mut encoder = ISO_2022_JP.new_encoder();
5769	let (result, _, _, _) =
5770	encoder.encode_from_utf16(&[`0x3041u16`, `0xFFFFu16`], &mut dst[..], `true`);
5771	assert_eq!(result, CoderResult::OutputFull);
5772	}
5773	}
5774
5775	#[test]
5776	fn test_iso_2022_jp_ncr_extra_from_utf8() {
5777	let mut dst = [`0u8`; `17`];
5778	{
5779	let mut encoder = ISO_2022_JP.new_encoder();
5780	let (result, _, _, _) =
5781	encoder.encode_from_utf8("`\u{3041}\u{FFFF}`", &mut dst[..], `true`);
5782	assert_eq!(result, CoderResult::OutputFull);
5783	}
5784	}
5785
5786	#[test]
5787	fn test_max_length_with_bom_to_utf8() {
5788	let mut output = [`0u8`; `20`];
5789	let mut decoder = REPLACEMENT.new_decoder();
5790	let input = b"`\xEF\xBB\xBF`A";
5791	{
5792	let needed = decoder
5793	.max_utf8_buffer_length_without_replacement(input.len())
5794	.unwrap();
5795	let (result, read, written) =
5796	decoder.decode_to_utf8_without_replacement(input, &mut output[..needed], `true`);
5797	assert_eq!(result, DecoderResult::InputEmpty);
5798	assert_eq!(read, input.len());
5799	assert_eq!(written, `1`);
5800	assert_eq!(output[`0`], `0x41`);
5801	}
5802	}
5803
5804	#[cfg(feature = "serde")]
5805	#[test]
5806	fn test_serde() {
5807	let demo = Demo {
5808	num: `42`,
5809	name: "foo".into(),
5810	enc: UTF_8,
5811	};
5812
5813	let serialized = serde_json::to_string(&demo).unwrap();
5814
5815	let deserialized: Demo = serde_json::from_str(&serialized).unwrap();
5816	assert_eq!(deserialized, demo);
5817
5818	let bincoded = bincode::serialize(&demo).unwrap();
5819	let debincoded: Demo = bincode::deserialize(&bincoded[..]).unwrap();
5820	assert_eq!(debincoded, demo);
5821	}
5822
5823	#[test]
5824	fn test_is_single_byte() {
5825	assert!(!BIG5.is_single_byte());
5826	assert!(!EUC_JP.is_single_byte());
5827	assert!(!EUC_KR.is_single_byte());
5828	assert!(!GB18030.is_single_byte());
5829	assert!(!GBK.is_single_byte());
5830	assert!(!REPLACEMENT.is_single_byte());
5831	assert!(!SHIFT_JIS.is_single_byte());
5832	assert!(!UTF_8.is_single_byte());
5833	assert!(!UTF_16BE.is_single_byte());
5834	assert!(!UTF_16LE.is_single_byte());
5835	assert!(!ISO_2022_JP.is_single_byte());
5836
5837	assert!(IBM866.is_single_byte());
5838	assert!(ISO_8859_2.is_single_byte());
5839	assert!(ISO_8859_3.is_single_byte());
5840	assert!(ISO_8859_4.is_single_byte());
5841	assert!(ISO_8859_5.is_single_byte());
5842	assert!(ISO_8859_6.is_single_byte());
5843	assert!(ISO_8859_7.is_single_byte());
5844	assert!(ISO_8859_8.is_single_byte());
5845	assert!(ISO_8859_10.is_single_byte());
5846	assert!(ISO_8859_13.is_single_byte());
5847	assert!(ISO_8859_14.is_single_byte());
5848	assert!(ISO_8859_15.is_single_byte());
5849	assert!(ISO_8859_16.is_single_byte());
5850	assert!(ISO_8859_8_I.is_single_byte());
5851	assert!(KOI8_R.is_single_byte());
5852	assert!(KOI8_U.is_single_byte());
5853	assert!(MACINTOSH.is_single_byte());
5854	assert!(WINDOWS_874.is_single_byte());
5855	assert!(WINDOWS_1250.is_single_byte());
5856	assert!(WINDOWS_1251.is_single_byte());
5857	assert!(WINDOWS_1252.is_single_byte());
5858	assert!(WINDOWS_1253.is_single_byte());
5859	assert!(WINDOWS_1254.is_single_byte());
5860	assert!(WINDOWS_1255.is_single_byte());
5861	assert!(WINDOWS_1256.is_single_byte());
5862	assert!(WINDOWS_1257.is_single_byte());
5863	assert!(WINDOWS_1258.is_single_byte());
5864	assert!(X_MAC_CYRILLIC.is_single_byte());
5865	assert!(X_USER_DEFINED.is_single_byte());
5866	}
5867
5868	#[test]
5869	fn test_latin1_byte_compatible_up_to() {
5870	let buffer = b"a`\x81\xB6\xF6\xF0\x82\xB4`";
5871	assert_eq!(
5872	BIG5.new_decoder_without_bom_handling()
5873	.latin1_byte_compatible_up_to(buffer)
5874	.unwrap(),
5875	`1`
5876	);
5877	assert_eq!(
5878	EUC_JP
5879	.new_decoder_without_bom_handling()
5880	.latin1_byte_compatible_up_to(buffer)
5881	.unwrap(),
5882	`1`
5883	);
5884	assert_eq!(
5885	EUC_KR
5886	.new_decoder_without_bom_handling()
5887	.latin1_byte_compatible_up_to(buffer)
5888	.unwrap(),
5889	`1`
5890	);
5891	assert_eq!(
5892	GB18030
5893	.new_decoder_without_bom_handling()
5894	.latin1_byte_compatible_up_to(buffer)
5895	.unwrap(),
5896	`1`
5897	);
5898	assert_eq!(
5899	GBK.new_decoder_without_bom_handling()
5900	.latin1_byte_compatible_up_to(buffer)
5901	.unwrap(),
5902	`1`
5903	);
5904	assert!(REPLACEMENT
5905	.new_decoder_without_bom_handling()
5906	.latin1_byte_compatible_up_to(buffer)
5907	.is_none());
5908	assert_eq!(
5909	SHIFT_JIS
5910	.new_decoder_without_bom_handling()
5911	.latin1_byte_compatible_up_to(buffer)
5912	.unwrap(),
5913	`1`
5914	);
5915	assert_eq!(
5916	UTF_8
5917	.new_decoder_without_bom_handling()
5918	.latin1_byte_compatible_up_to(buffer)
5919	.unwrap(),
5920	`1`
5921	);
5922	assert!(UTF_16BE
5923	.new_decoder_without_bom_handling()
5924	.latin1_byte_compatible_up_to(buffer)
5925	.is_none());
5926	assert!(UTF_16LE
5927	.new_decoder_without_bom_handling()
5928	.latin1_byte_compatible_up_to(buffer)
5929	.is_none());
5930	assert_eq!(
5931	ISO_2022_JP
5932	.new_decoder_without_bom_handling()
5933	.latin1_byte_compatible_up_to(buffer)
5934	.unwrap(),
5935	`1`
5936	);
5937
5938	assert_eq!(
5939	IBM866
5940	.new_decoder_without_bom_handling()
5941	.latin1_byte_compatible_up_to(buffer)
5942	.unwrap(),
5943	`1`
5944	);
5945	assert_eq!(
5946	ISO_8859_2
5947	.new_decoder_without_bom_handling()
5948	.latin1_byte_compatible_up_to(buffer)
5949	.unwrap(),
5950	`2`
5951	);
5952	assert_eq!(
5953	ISO_8859_3
5954	.new_decoder_without_bom_handling()
5955	.latin1_byte_compatible_up_to(buffer)
5956	.unwrap(),
5957	`2`
5958	);
5959	assert_eq!(
5960	ISO_8859_4
5961	.new_decoder_without_bom_handling()
5962	.latin1_byte_compatible_up_to(buffer)
5963	.unwrap(),
5964	`2`
5965	);
5966	assert_eq!(
5967	ISO_8859_5
5968	.new_decoder_without_bom_handling()
5969	.latin1_byte_compatible_up_to(buffer)
5970	.unwrap(),
5971	`2`
5972	);
5973	assert_eq!(
5974	ISO_8859_6
5975	.new_decoder_without_bom_handling()
5976	.latin1_byte_compatible_up_to(buffer)
5977	.unwrap(),
5978	`2`
5979	);
5980	assert_eq!(
5981	ISO_8859_7
5982	.new_decoder_without_bom_handling()
5983	.latin1_byte_compatible_up_to(buffer)
5984	.unwrap(),
5985	`2`
5986	);
5987	assert_eq!(
5988	ISO_8859_8
5989	.new_decoder_without_bom_handling()
5990	.latin1_byte_compatible_up_to(buffer)
5991	.unwrap(),
5992	`3`
5993	);
5994	assert_eq!(
5995	ISO_8859_10
5996	.new_decoder_without_bom_handling()
5997	.latin1_byte_compatible_up_to(buffer)
5998	.unwrap(),
5999	`2`
6000	);
6001	assert_eq!(
6002	ISO_8859_13
6003	.new_decoder_without_bom_handling()
6004	.latin1_byte_compatible_up_to(buffer)
6005	.unwrap(),
6006	`4`
6007	);
6008	assert_eq!(
6009	ISO_8859_14
6010	.new_decoder_without_bom_handling()
6011	.latin1_byte_compatible_up_to(buffer)
6012	.unwrap(),
6013	`4`
6014	);
6015	assert_eq!(
6016	ISO_8859_15
6017	.new_decoder_without_bom_handling()
6018	.latin1_byte_compatible_up_to(buffer)
6019	.unwrap(),
6020	`6`
6021	);
6022	assert_eq!(
6023	ISO_8859_16
6024	.new_decoder_without_bom_handling()
6025	.latin1_byte_compatible_up_to(buffer)
6026	.unwrap(),
6027	`4`
6028	);
6029	assert_eq!(
6030	ISO_8859_8_I
6031	.new_decoder_without_bom_handling()
6032	.latin1_byte_compatible_up_to(buffer)
6033	.unwrap(),
6034	`3`
6035	);
6036	assert_eq!(
6037	KOI8_R
6038	.new_decoder_without_bom_handling()
6039	.latin1_byte_compatible_up_to(buffer)
6040	.unwrap(),
6041	`1`
6042	);
6043	assert_eq!(
6044	KOI8_U
6045	.new_decoder_without_bom_handling()
6046	.latin1_byte_compatible_up_to(buffer)
6047	.unwrap(),
6048	`1`
6049	);
6050	assert_eq!(
6051	MACINTOSH
6052	.new_decoder_without_bom_handling()
6053	.latin1_byte_compatible_up_to(buffer)
6054	.unwrap(),
6055	`1`
6056	);
6057	assert_eq!(
6058	WINDOWS_874
6059	.new_decoder_without_bom_handling()
6060	.latin1_byte_compatible_up_to(buffer)
6061	.unwrap(),
6062	`2`
6063	);
6064	assert_eq!(
6065	WINDOWS_1250
6066	.new_decoder_without_bom_handling()
6067	.latin1_byte_compatible_up_to(buffer)
6068	.unwrap(),
6069	`4`
6070	);
6071	assert_eq!(
6072	WINDOWS_1251
6073	.new_decoder_without_bom_handling()
6074	.latin1_byte_compatible_up_to(buffer)
6075	.unwrap(),
6076	`1`
6077	);
6078	assert_eq!(
6079	WINDOWS_1252
6080	.new_decoder_without_bom_handling()
6081	.latin1_byte_compatible_up_to(buffer)
6082	.unwrap(),
6083	`5`
6084	);
6085	assert_eq!(
6086	WINDOWS_1253
6087	.new_decoder_without_bom_handling()
6088	.latin1_byte_compatible_up_to(buffer)
6089	.unwrap(),
6090	`3`
6091	);
6092	assert_eq!(
6093	WINDOWS_1254
6094	.new_decoder_without_bom_handling()
6095	.latin1_byte_compatible_up_to(buffer)
6096	.unwrap(),
6097	`4`
6098	);
6099	assert_eq!(
6100	WINDOWS_1255
6101	.new_decoder_without_bom_handling()
6102	.latin1_byte_compatible_up_to(buffer)
6103	.unwrap(),
6104	`3`
6105	);
6106	assert_eq!(
6107	WINDOWS_1256
6108	.new_decoder_without_bom_handling()
6109	.latin1_byte_compatible_up_to(buffer)
6110	.unwrap(),
6111	`1`
6112	);
6113	assert_eq!(
6114	WINDOWS_1257
6115	.new_decoder_without_bom_handling()
6116	.latin1_byte_compatible_up_to(buffer)
6117	.unwrap(),
6118	`4`
6119	);
6120	assert_eq!(
6121	WINDOWS_1258
6122	.new_decoder_without_bom_handling()
6123	.latin1_byte_compatible_up_to(buffer)
6124	.unwrap(),
6125	`4`
6126	);
6127	assert_eq!(
6128	X_MAC_CYRILLIC
6129	.new_decoder_without_bom_handling()
6130	.latin1_byte_compatible_up_to(buffer)
6131	.unwrap(),
6132	`1`
6133	);
6134	assert_eq!(
6135	X_USER_DEFINED
6136	.new_decoder_without_bom_handling()
6137	.latin1_byte_compatible_up_to(buffer)
6138	.unwrap(),
6139	`1`
6140	);
6141
6142	assert!(UTF_8
6143	.new_decoder()
6144	.latin1_byte_compatible_up_to(buffer)
6145	.is_none());
6146
6147	let mut decoder = UTF_8.new_decoder();
6148	let mut output = [`0u16`; `4`];
6149	let _ = decoder.decode_to_utf16(b"`\xEF`", &mut output, `false`);
6150	assert!(decoder.latin1_byte_compatible_up_to(buffer).is_none());
6151	let _ = decoder.decode_to_utf16(b"`\xBB\xBF`", &mut output, `false`);
6152	assert_eq!(decoder.latin1_byte_compatible_up_to(buffer), Some(`1`));
6153	let _ = decoder.decode_to_utf16(b"`\xEF`", &mut output, `false`);
6154	assert_eq!(decoder.latin1_byte_compatible_up_to(buffer), None);
6155	}
6156	}
6157