1#![deny(
2 missing_copy_implementations,
3 missing_debug_implementations,
4 missing_docs,
5 trivial_casts,
6 trivial_numeric_casts,
7 unsafe_code,
8 unused_import_braces,
9 unused_qualifications,
10)]
11
12#![cfg_attr(feature = "dev", feature(plugin))]
13#![cfg_attr(feature = "dev", plugin(clippy))]
14#![cfg_attr(feature = "dev", deny(clippy))]
15
16//! Fonctions to decode and encode [RFC-3492 Punycode](https://tools.ietf.org/html/rfc3492).
17
18// See [RFC-3492, section 4](https://tools.ietf.org/html/rfc3492#section-4).
19const BASE : u32 = 36;
20const TMIN : u32 = 1;
21const TMAX : u32 = 26;
22const SKEW : u32 = 38;
23const DAMP : u32 = 700;
24const INITIAL_BIAS : u32 = 72;
25const INITIAL_N : u32 = 128;
26const DELIMITER : char = '-';
27
28/// Decode the string as Punycode. The string should not contain the initial `xn--` and must
29/// contain only ASCII characters.
30/// # Example
31/// ```
32/// assert_eq!(
33/// punycode::decode("acadmie-franaise-npb1a").unwrap(),
34/// "académie-française"
35/// );
36/// ```
37pub fn decode(input: &str) -> Result<String, ()> {
38 if !input.is_ascii() {
39 return Err(());
40 }
41
42 let mut n = INITIAL_N;
43 let mut i = 0;
44 let mut bias = INITIAL_BIAS;
45
46 let (mut output, input) = if let Some(i) = input.rfind(DELIMITER) {
47 (input[0..i].chars().collect(), &input[i+1..])
48 }
49 else {
50 (vec![], &input[..])
51 };
52
53 let mut it = input.chars().peekable();
54 while it.peek() != None {
55 let oldi = i;
56 let mut w = 1;
57
58 for k in 1.. {
59 let c = if let Some(c) = it.next() {
60 c
61 }
62 else {
63 return Err(());
64 };
65
66 let k = k*BASE;
67
68 let digit = decode_digit(c);
69
70 if digit == BASE {
71 return Err(());
72 }
73
74 // overflow check
75 if digit > (std::u32::MAX - i) / w {
76 return Err(());
77 }
78 i += digit * w;
79
80 let t = clamped_sub(TMIN, k, bias, TMAX);
81 if digit < t {
82 break;
83 }
84
85 // overflow check
86 if BASE > (std::u32::MAX - t) / w {
87 return Err(());
88 }
89 w *= BASE - t;
90 }
91
92 let len = (output.len() + 1) as u32;
93 bias = adapt(i - oldi, len, oldi == 0);
94
95 let il = i / len;
96 // overflow check
97 if n > std::u32::MAX - il {
98 return Err(());
99 }
100 n += il;
101 i %= len;
102
103 if let Some(c) = std::char::from_u32(n) {
104 output.insert(i as usize, c);
105 }
106 else {
107 return Err(());
108 }
109
110 i += 1;
111 }
112
113 Ok(output.iter().cloned().collect())
114}
115
116/// Encode a string as punycode. The result string will contain only ASCII characters. The result
117/// string does not start with `xn--`.
118/// # Example
119/// ```
120/// assert_eq!(
121/// punycode::encode("académie-française").unwrap(),
122/// "acadmie-franaise-npb1a"
123/// );
124/// ```
125pub fn encode(input: &str) -> Result<String, ()> {
126 encode_slice(&input.chars().collect::<Vec<char>>())
127}
128
129fn encode_slice(input: &[char]) -> Result<String, ()> {
130 let mut n = INITIAL_N;
131 let mut delta = 0;
132 let mut bias = INITIAL_BIAS;
133
134 let mut output : String = input.iter().filter(|&&c| c.is_ascii()).cloned().collect();
135 let mut h = output.len() as u32;
136 let b = h;
137
138 if b > 0 {
139 output.push(DELIMITER)
140 }
141
142 while h < input.len() as u32 {
143 let m = *input.iter().filter(|&&c| (c as u32) >= n).min().unwrap() as u32;
144
145 if m - n > (std::u32::MAX - delta) / (h + 1) {
146 return Err(());
147 }
148 delta += (m - n) * (h + 1);
149
150 n = m;
151
152 for c in input {
153 let c = *c as u32;
154 if c < n {
155 delta += 1;
156 }
157 else if c == n {
158 let mut q = delta;
159
160 for k in 1.. {
161 let k = k*BASE;
162
163 let t = clamped_sub(TMIN, k, bias, TMAX);
164
165 if q < t {
166 break;
167 }
168
169 output.push(encode_digit(t + (q - t) % (BASE - t)));
170
171 q = (q - t) / (BASE - t);
172 }
173
174 output.push(encode_digit(q));
175
176 bias = adapt(delta, h+1, h == b);
177 delta = 0;
178 h += 1;
179 }
180 }
181
182 delta += 1;
183 n += 1;
184 }
185
186 Ok(output)
187}
188
189fn adapt(delta: u32, numpoint: u32, firsttime: bool) -> u32 {
190 let mut delta: u32 = if firsttime {
191 delta / DAMP
192 }
193 else {
194 delta / 2
195 };
196
197 delta += delta / numpoint;
198 let mut k: u32 = 0;
199
200 while delta > (BASE - TMIN) * TMAX / 2 {
201 delta /= BASE - TMIN;
202 k += BASE
203 }
204
205 k + (BASE - TMIN + 1) * delta / (delta + SKEW)
206}
207
208/// Compute `lhs-rhs`. Result will be clamped in [min, max].
209fn clamped_sub<T>(min: T, lhs: T, rhs: T, max: T) -> T
210where T : Ord
211 + std::ops::Add<Output=T>
212 + std::ops::Sub<Output=T>
213 + Copy
214{
215 if min + rhs >= lhs { min }
216 else if max + rhs <= lhs { max }
217 else { lhs - rhs }
218}
219
220fn decode_digit(c: char) -> u32 {
221 let cp: u32 = c as u32;
222
223 match c {
224 '0' ... '9' => cp - ('0' as u32) + 26,
225 'A' ... 'Z' => cp - ('A' as u32),
226 'a' ... 'z' => cp - ('a' as u32),
227 _ => BASE,
228 }
229}
230
231fn encode_digit(d: u32) -> char {
232 let r: char = (d + 22 + (if d < 26 { 75 } else { 0 })) as u8 as char;
233
234 assert!(('0' <= r && r <= '9') || ('a' <= r && r <= 'z'), "r = {}", r);
235
236 r
237}
238
239#[cfg(test)]
240static TESTS: &'static [(&'static str, &'static str)] = &[
241 // examples taken from [RCF-3492, section 7.1](https://tools.ietf.org/html/rfc3492#section-7.1)
242 (&"\u{0644}\u{064A}\u{0647}\u{0645}\u{0627}\u{0628}\u{062A}\u{0643}\u{0644}\
243 \u{0645}\u{0648}\u{0634}\u{0639}\u{0631}\u{0628}\u{064A}\u{061F}",
244 &"egbpdaj6bu4bxfgehfvwxn"),
245
246 (&"\u{4ED6}\u{4EEC}\u{4E3A}\u{4EC0}\u{4E48}\u{4E0D}\u{8BF4}\u{4E2D}\u{6587}",
247 &"ihqwcrb4cv8a8dqg056pqjye"),
248
249 (&"\u{4ED6}\u{5011}\u{7232}\u{4EC0}\u{9EBD}\u{4E0D}\u{8AAA}\u{4E2D}\u{6587}",
250 &"ihqwctvzc91f659drss3x8bo0yb"),
251
252 (&"\u{0050}\u{0072}\u{006F}\u{010D}\u{0070}\u{0072}\u{006F}\u{0073}\u{0074}\
253 \u{011B}\u{006E}\u{0065}\u{006D}\u{006C}\u{0075}\u{0076}\u{00ED}\u{010D}\
254 \u{0065}\u{0073}\u{006B}\u{0079}",
255 &"Proprostnemluvesky-uyb24dma41a"),
256
257 (&"\u{05DC}\u{05DE}\u{05D4}\u{05D4}\u{05DD}\u{05E4}\u{05E9}\u{05D5}\u{05D8}\
258 \u{05DC}\u{05D0}\u{05DE}\u{05D3}\u{05D1}\u{05E8}\u{05D9}\u{05DD}\u{05E2}\
259 \u{05D1}\u{05E8}\u{05D9}\u{05EA}",
260 &"4dbcagdahymbxekheh6e0a7fei0b"),
261
262 (&"\u{092F}\u{0939}\u{0932}\u{094B}\u{0917}\u{0939}\u{093F}\u{0928}\u{094D}\
263 \u{0926}\u{0940}\u{0915}\u{094D}\u{092F}\u{094B}\u{0902}\u{0928}\u{0939}\
264 \u{0940}\u{0902}\u{092C}\u{094B}\u{0932}\u{0938}\u{0915}\u{0924}\u{0947}\
265 \u{0939}\u{0948}\u{0902}",
266 &"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
267
268 (&"\u{306A}\u{305C}\u{307F}\u{3093}\u{306A}\u{65E5}\u{672C}\u{8A9E}\u{3092}\
269 \u{8A71}\u{3057}\u{3066}\u{304F}\u{308C}\u{306A}\u{3044}\u{306E}\u{304B}",
270 &"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
271
272 (&"\u{C138}\u{ACC4}\u{C758}\u{BAA8}\u{B4E0}\u{C0AC}\u{B78C}\u{B4E4}\u{C774}\
273 \u{D55C}\u{AD6D}\u{C5B4}\u{B97C}\u{C774}\u{D574}\u{D55C}\u{B2E4}\u{BA74}\
274 \u{C5BC}\u{B9C8}\u{B098}\u{C88B}\u{C744}\u{AE4C}",
275 &"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"),
276
277 (&"\u{043F}\u{043E}\u{0447}\u{0435}\u{043C}\u{0443}\u{0436}\u{0435}\u{043E}\
278 \u{043D}\u{0438}\u{043D}\u{0435}\u{0433}\u{043E}\u{0432}\u{043E}\u{0440}\
279 \u{044F}\u{0442}\u{043F}\u{043E}\u{0440}\u{0443}\u{0441}\u{0441}\u{043A}\
280 \u{0438}",
281 &"b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
282
283 (&"\u{0050}\u{006F}\u{0072}\u{0071}\u{0075}\u{00E9}\u{006E}\u{006F}\u{0070}\
284 \u{0075}\u{0065}\u{0064}\u{0065}\u{006E}\u{0073}\u{0069}\u{006D}\u{0070}\
285 \u{006C}\u{0065}\u{006D}\u{0065}\u{006E}\u{0074}\u{0065}\u{0068}\u{0061}\
286 \u{0062}\u{006C}\u{0061}\u{0072}\u{0065}\u{006E}\u{0045}\u{0073}\u{0070}\
287 \u{0061}\u{00F1}\u{006F}\u{006C}",
288 &"PorqunopuedensimplementehablarenEspaol-fmd56a"),
289
290 (&"\u{0054}\u{1EA1}\u{0069}\u{0073}\u{0061}\u{006F}\u{0068}\u{1ECD}\u{006B}\
291 \u{0068}\u{00F4}\u{006E}\u{0067}\u{0074}\u{0068}\u{1EC3}\u{0063}\u{0068}\
292 \u{1EC9}\u{006E}\u{00F3}\u{0069}\u{0074}\u{0069}\u{1EBF}\u{006E}\u{0067}\
293 \u{0056}\u{0069}\u{1EC7}\u{0074}",
294 &"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
295
296 (&"\u{0033}\u{5E74}\u{0042}\u{7D44}\u{91D1}\u{516B}\u{5148}\u{751F}",
297 &"3B-ww4c5e180e575a65lsy2b"),
298
299 (&"\u{5B89}\u{5BA4}\u{5948}\u{7F8E}\u{6075}\u{002D}\u{0077}\u{0069}\u{0074}\
300 \u{0068}\u{002D}\u{0053}\u{0055}\u{0050}\u{0045}\u{0052}\u{002D}\u{004D}\
301 \u{004F}\u{004E}\u{004B}\u{0045}\u{0059}\u{0053}",
302 &"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
303
304 (&"\u{0048}\u{0065}\u{006C}\u{006C}\u{006F}\u{002D}\u{0041}\u{006E}\u{006F}\
305 \u{0074}\u{0068}\u{0065}\u{0072}\u{002D}\u{0057}\u{0061}\u{0079}\u{002D}\
306 \u{305D}\u{308C}\u{305E}\u{308C}\u{306E}\u{5834}\u{6240}",
307 &"Hello-Another-Way--fc4qua05auwb3674vfr0b"),
308
309 (&"\u{3072}\u{3068}\u{3064}\u{5C4B}\u{6839}\u{306E}\u{4E0B}\u{0032}",
310 &"2-u9tlzr9756bt3uc0v"),
311
312 (&"\u{004D}\u{0061}\u{006A}\u{0069}\u{3067}\u{004B}\u{006F}\u{0069}\u{3059}\
313 \u{308B}\u{0035}\u{79D2}\u{524D}",
314 &"MajiKoi5-783gue6qz075azm5e"),
315
316 (&"\u{30D1}\u{30D5}\u{30A3}\u{30FC}\u{0064}\u{0065}\u{30EB}\u{30F3}\u{30D0}",
317 &"de-jg4avhby1noc0d"),
318
319 (&"\u{305D}\u{306E}\u{30B9}\u{30D4}\u{30FC}\u{30C9}\u{3067}",
320 &"d9juau41awczczp"),
321
322 (&"\u{002D}\u{003E}\u{0020}\u{0024}\u{0031}\u{002E}\u{0030}\u{0030}\u{0020}\
323 \u{003C}\u{002D}",
324 &"-> $1.00 <--"),
325
326 // some real-life examples
327 (&"académie-française", &"acadmie-franaise-npb1a"),
328 (&"bücher", &"bcher-kva"),
329 (&"république-numérique", &"rpublique-numrique-bwbm"),
330
331 // some real-life TLD
332 (&"бг", &"90ae"),
333 (&"рф", &"p1ai"),
334 (&"укр", &"j1amh"),
335 (&"السعودية", &"mgberp4a5d4ar"),
336 (&"امارات", &"mgbaam7a8h"),
337 (&"مصر", &"wgbh1c"),
338 (&"中国", &"fiqs8s"),
339 (&"中國", &"fiqz9s"),
340 (&"台湾", &"kprw13d"),
341 (&"台灣", &"kpry57d"),
342 (&"香港", &"j6w193g"),
343
344 // other
345 (&"", &""),
346 (&"a", &"a-"),
347 (&"0", &"0-"),
348 (&"A", &"A-"),
349 (&"é", &"9ca"),
350 (&"\n", &"\n-"),
351];
352
353#[test]
354fn test_decode() {
355 for t in TESTS {
356 assert_eq!(decode(&t.1), Ok(t.0.into()));
357 }
358}
359
360#[test]
361fn test_encode() {
362 for t in TESTS {
363 assert_eq!(encode(t.0).unwrap().to_lowercase(), t.1.to_lowercase());
364 }
365}
366
367#[test]
368fn test_fail_decode() {
369 assert_eq!(decode(&"bcher-kva.ch"), Err(()));
370 assert_eq!(decode(&"+"), Err(()));
371 assert_eq!(decode(&"\\"), Err(()));
372 assert_eq!(decode(&"é"), Err(()));
373 assert_eq!(decode(&"99999999"), Err(()));
374}
375