1 | #![deny ( |
2 | missing_copy_implementations, |
3 | missing_debug_implementations, |
4 | missing_docs, |
5 | trivial_casts, |
6 | trivial_numeric_casts, |
7 | unsafe_code, |
8 | unused_import_braces, |
9 | unused_qualifications, |
10 | )] |
11 | |
12 | #![cfg_attr (feature = "dev" , feature(plugin))] |
13 | #![cfg_attr (feature = "dev" , plugin(clippy))] |
14 | #![cfg_attr (feature = "dev" , deny(clippy))] |
15 | |
16 | //! Fonctions to decode and encode [RFC-3492 Punycode](https://tools.ietf.org/html/rfc3492). |
17 | |
18 | // See [RFC-3492, section 4](https://tools.ietf.org/html/rfc3492#section-4). |
19 | const BASE : u32 = 36; |
20 | const TMIN : u32 = 1; |
21 | const TMAX : u32 = 26; |
22 | const SKEW : u32 = 38; |
23 | const DAMP : u32 = 700; |
24 | const INITIAL_BIAS : u32 = 72; |
25 | const INITIAL_N : u32 = 128; |
26 | const DELIMITER : char = '-' ; |
27 | |
28 | /// Decode the string as Punycode. The string should not contain the initial `xn--` and must |
29 | /// contain only ASCII characters. |
30 | /// # Example |
31 | /// ``` |
32 | /// assert_eq!( |
33 | /// punycode::decode("acadmie-franaise-npb1a" ).unwrap(), |
34 | /// "académie-française" |
35 | /// ); |
36 | /// ``` |
37 | pub fn decode(input: &str) -> Result<String, ()> { |
38 | if !input.is_ascii() { |
39 | return Err(()); |
40 | } |
41 | |
42 | let mut n = INITIAL_N; |
43 | let mut i = 0; |
44 | let mut bias = INITIAL_BIAS; |
45 | |
46 | let (mut output, input) = if let Some(i) = input.rfind(DELIMITER) { |
47 | (input[0..i].chars().collect(), &input[i+1..]) |
48 | } |
49 | else { |
50 | (vec![], &input[..]) |
51 | }; |
52 | |
53 | let mut it = input.chars().peekable(); |
54 | while it.peek() != None { |
55 | let oldi = i; |
56 | let mut w = 1; |
57 | |
58 | for k in 1.. { |
59 | let c = if let Some(c) = it.next() { |
60 | c |
61 | } |
62 | else { |
63 | return Err(()); |
64 | }; |
65 | |
66 | let k = k*BASE; |
67 | |
68 | let digit = decode_digit(c); |
69 | |
70 | if digit == BASE { |
71 | return Err(()); |
72 | } |
73 | |
74 | // overflow check |
75 | if digit > (std::u32::MAX - i) / w { |
76 | return Err(()); |
77 | } |
78 | i += digit * w; |
79 | |
80 | let t = clamped_sub(TMIN, k, bias, TMAX); |
81 | if digit < t { |
82 | break; |
83 | } |
84 | |
85 | // overflow check |
86 | if BASE > (std::u32::MAX - t) / w { |
87 | return Err(()); |
88 | } |
89 | w *= BASE - t; |
90 | } |
91 | |
92 | let len = (output.len() + 1) as u32; |
93 | bias = adapt(i - oldi, len, oldi == 0); |
94 | |
95 | let il = i / len; |
96 | // overflow check |
97 | if n > std::u32::MAX - il { |
98 | return Err(()); |
99 | } |
100 | n += il; |
101 | i %= len; |
102 | |
103 | if let Some(c) = std::char::from_u32(n) { |
104 | output.insert(i as usize, c); |
105 | } |
106 | else { |
107 | return Err(()); |
108 | } |
109 | |
110 | i += 1; |
111 | } |
112 | |
113 | Ok(output.iter().cloned().collect()) |
114 | } |
115 | |
116 | /// Encode a string as punycode. The result string will contain only ASCII characters. The result |
117 | /// string does not start with `xn--`. |
118 | /// # Example |
119 | /// ``` |
120 | /// assert_eq!( |
121 | /// punycode::encode("académie-française" ).unwrap(), |
122 | /// "acadmie-franaise-npb1a" |
123 | /// ); |
124 | /// ``` |
125 | pub fn encode(input: &str) -> Result<String, ()> { |
126 | encode_slice(&input.chars().collect::<Vec<char>>()) |
127 | } |
128 | |
129 | fn encode_slice(input: &[char]) -> Result<String, ()> { |
130 | let mut n = INITIAL_N; |
131 | let mut delta = 0; |
132 | let mut bias = INITIAL_BIAS; |
133 | |
134 | let mut output : String = input.iter().filter(|&&c| c.is_ascii()).cloned().collect(); |
135 | let mut h = output.len() as u32; |
136 | let b = h; |
137 | |
138 | if b > 0 { |
139 | output.push(DELIMITER) |
140 | } |
141 | |
142 | while h < input.len() as u32 { |
143 | let m = *input.iter().filter(|&&c| (c as u32) >= n).min().unwrap() as u32; |
144 | |
145 | if m - n > (std::u32::MAX - delta) / (h + 1) { |
146 | return Err(()); |
147 | } |
148 | delta += (m - n) * (h + 1); |
149 | |
150 | n = m; |
151 | |
152 | for c in input { |
153 | let c = *c as u32; |
154 | if c < n { |
155 | delta += 1; |
156 | } |
157 | else if c == n { |
158 | let mut q = delta; |
159 | |
160 | for k in 1.. { |
161 | let k = k*BASE; |
162 | |
163 | let t = clamped_sub(TMIN, k, bias, TMAX); |
164 | |
165 | if q < t { |
166 | break; |
167 | } |
168 | |
169 | output.push(encode_digit(t + (q - t) % (BASE - t))); |
170 | |
171 | q = (q - t) / (BASE - t); |
172 | } |
173 | |
174 | output.push(encode_digit(q)); |
175 | |
176 | bias = adapt(delta, h+1, h == b); |
177 | delta = 0; |
178 | h += 1; |
179 | } |
180 | } |
181 | |
182 | delta += 1; |
183 | n += 1; |
184 | } |
185 | |
186 | Ok(output) |
187 | } |
188 | |
189 | fn adapt(delta: u32, numpoint: u32, firsttime: bool) -> u32 { |
190 | let mut delta: u32 = if firsttime { |
191 | delta / DAMP |
192 | } |
193 | else { |
194 | delta / 2 |
195 | }; |
196 | |
197 | delta += delta / numpoint; |
198 | let mut k: u32 = 0; |
199 | |
200 | while delta > (BASE - TMIN) * TMAX / 2 { |
201 | delta /= BASE - TMIN; |
202 | k += BASE |
203 | } |
204 | |
205 | k + (BASE - TMIN + 1) * delta / (delta + SKEW) |
206 | } |
207 | |
208 | /// Compute `lhs-rhs`. Result will be clamped in [min, max]. |
209 | fn clamped_sub<T>(min: T, lhs: T, rhs: T, max: T) -> T |
210 | where T : Ord |
211 | + std::ops::Add<Output=T> |
212 | + std::ops::Sub<Output=T> |
213 | + Copy |
214 | { |
215 | if min + rhs >= lhs { min } |
216 | else if max + rhs <= lhs { max } |
217 | else { lhs - rhs } |
218 | } |
219 | |
220 | fn decode_digit(c: char) -> u32 { |
221 | let cp: u32 = c as u32; |
222 | |
223 | match c { |
224 | '0' ... '9' => cp - ('0' as u32) + 26, |
225 | 'A' ... 'Z' => cp - ('A' as u32), |
226 | 'a' ... 'z' => cp - ('a' as u32), |
227 | _ => BASE, |
228 | } |
229 | } |
230 | |
231 | fn encode_digit(d: u32) -> char { |
232 | let r: char = (d + 22 + (if d < 26 { 75 } else { 0 })) as u8 as char; |
233 | |
234 | assert!(('0' <= r && r <= '9' ) || ('a' <= r && r <= 'z' ), "r = {}" , r); |
235 | |
236 | r |
237 | } |
238 | |
239 | #[cfg (test)] |
240 | static TESTS: &'static [(&'static str, &'static str)] = &[ |
241 | // examples taken from [RCF-3492, section 7.1](https://tools.ietf.org/html/rfc3492#section-7.1) |
242 | (&" \u{0644}\u{064A}\u{0647}\u{0645}\u{0627}\u{0628}\u{062A}\u{0643}\u{0644}\ |
243 | \u{0645}\u{0648}\u{0634}\u{0639}\u{0631}\u{0628}\u{064A}\u{061F}" , |
244 | &"egbpdaj6bu4bxfgehfvwxn" ), |
245 | |
246 | (&" \u{4ED6}\u{4EEC}\u{4E3A}\u{4EC0}\u{4E48}\u{4E0D}\u{8BF4}\u{4E2D}\u{6587}" , |
247 | &"ihqwcrb4cv8a8dqg056pqjye" ), |
248 | |
249 | (&" \u{4ED6}\u{5011}\u{7232}\u{4EC0}\u{9EBD}\u{4E0D}\u{8AAA}\u{4E2D}\u{6587}" , |
250 | &"ihqwctvzc91f659drss3x8bo0yb" ), |
251 | |
252 | (&" \u{0050}\u{0072}\u{006F}\u{010D}\u{0070}\u{0072}\u{006F}\u{0073}\u{0074}\ |
253 | \u{011B}\u{006E}\u{0065}\u{006D}\u{006C}\u{0075}\u{0076}\u{00ED}\u{010D}\ |
254 | \u{0065}\u{0073}\u{006B}\u{0079}" , |
255 | &"Proprostnemluvesky-uyb24dma41a" ), |
256 | |
257 | (&" \u{05DC}\u{05DE}\u{05D4}\u{05D4}\u{05DD}\u{05E4}\u{05E9}\u{05D5}\u{05D8}\ |
258 | \u{05DC}\u{05D0}\u{05DE}\u{05D3}\u{05D1}\u{05E8}\u{05D9}\u{05DD}\u{05E2}\ |
259 | \u{05D1}\u{05E8}\u{05D9}\u{05EA}" , |
260 | &"4dbcagdahymbxekheh6e0a7fei0b" ), |
261 | |
262 | (&" \u{092F}\u{0939}\u{0932}\u{094B}\u{0917}\u{0939}\u{093F}\u{0928}\u{094D}\ |
263 | \u{0926}\u{0940}\u{0915}\u{094D}\u{092F}\u{094B}\u{0902}\u{0928}\u{0939}\ |
264 | \u{0940}\u{0902}\u{092C}\u{094B}\u{0932}\u{0938}\u{0915}\u{0924}\u{0947}\ |
265 | \u{0939}\u{0948}\u{0902}" , |
266 | &"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" ), |
267 | |
268 | (&" \u{306A}\u{305C}\u{307F}\u{3093}\u{306A}\u{65E5}\u{672C}\u{8A9E}\u{3092}\ |
269 | \u{8A71}\u{3057}\u{3066}\u{304F}\u{308C}\u{306A}\u{3044}\u{306E}\u{304B}" , |
270 | &"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" ), |
271 | |
272 | (&" \u{C138}\u{ACC4}\u{C758}\u{BAA8}\u{B4E0}\u{C0AC}\u{B78C}\u{B4E4}\u{C774}\ |
273 | \u{D55C}\u{AD6D}\u{C5B4}\u{B97C}\u{C774}\u{D574}\u{D55C}\u{B2E4}\u{BA74}\ |
274 | \u{C5BC}\u{B9C8}\u{B098}\u{C88B}\u{C744}\u{AE4C}" , |
275 | &"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" ), |
276 | |
277 | (&" \u{043F}\u{043E}\u{0447}\u{0435}\u{043C}\u{0443}\u{0436}\u{0435}\u{043E}\ |
278 | \u{043D}\u{0438}\u{043D}\u{0435}\u{0433}\u{043E}\u{0432}\u{043E}\u{0440}\ |
279 | \u{044F}\u{0442}\u{043F}\u{043E}\u{0440}\u{0443}\u{0441}\u{0441}\u{043A}\ |
280 | \u{0438}" , |
281 | &"b1abfaaepdrnnbgefbaDotcwatmq2g4l" ), |
282 | |
283 | (&" \u{0050}\u{006F}\u{0072}\u{0071}\u{0075}\u{00E9}\u{006E}\u{006F}\u{0070}\ |
284 | \u{0075}\u{0065}\u{0064}\u{0065}\u{006E}\u{0073}\u{0069}\u{006D}\u{0070}\ |
285 | \u{006C}\u{0065}\u{006D}\u{0065}\u{006E}\u{0074}\u{0065}\u{0068}\u{0061}\ |
286 | \u{0062}\u{006C}\u{0061}\u{0072}\u{0065}\u{006E}\u{0045}\u{0073}\u{0070}\ |
287 | \u{0061}\u{00F1}\u{006F}\u{006C}" , |
288 | &"PorqunopuedensimplementehablarenEspaol-fmd56a" ), |
289 | |
290 | (&" \u{0054}\u{1EA1}\u{0069}\u{0073}\u{0061}\u{006F}\u{0068}\u{1ECD}\u{006B}\ |
291 | \u{0068}\u{00F4}\u{006E}\u{0067}\u{0074}\u{0068}\u{1EC3}\u{0063}\u{0068}\ |
292 | \u{1EC9}\u{006E}\u{00F3}\u{0069}\u{0074}\u{0069}\u{1EBF}\u{006E}\u{0067}\ |
293 | \u{0056}\u{0069}\u{1EC7}\u{0074}" , |
294 | &"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" ), |
295 | |
296 | (&" \u{0033}\u{5E74}\u{0042}\u{7D44}\u{91D1}\u{516B}\u{5148}\u{751F}" , |
297 | &"3B-ww4c5e180e575a65lsy2b" ), |
298 | |
299 | (&" \u{5B89}\u{5BA4}\u{5948}\u{7F8E}\u{6075}\u{002D}\u{0077}\u{0069}\u{0074}\ |
300 | \u{0068}\u{002D}\u{0053}\u{0055}\u{0050}\u{0045}\u{0052}\u{002D}\u{004D}\ |
301 | \u{004F}\u{004E}\u{004B}\u{0045}\u{0059}\u{0053}" , |
302 | &"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n" ), |
303 | |
304 | (&" \u{0048}\u{0065}\u{006C}\u{006C}\u{006F}\u{002D}\u{0041}\u{006E}\u{006F}\ |
305 | \u{0074}\u{0068}\u{0065}\u{0072}\u{002D}\u{0057}\u{0061}\u{0079}\u{002D}\ |
306 | \u{305D}\u{308C}\u{305E}\u{308C}\u{306E}\u{5834}\u{6240}" , |
307 | &"Hello-Another-Way--fc4qua05auwb3674vfr0b" ), |
308 | |
309 | (&" \u{3072}\u{3068}\u{3064}\u{5C4B}\u{6839}\u{306E}\u{4E0B}\u{0032}" , |
310 | &"2-u9tlzr9756bt3uc0v" ), |
311 | |
312 | (&" \u{004D}\u{0061}\u{006A}\u{0069}\u{3067}\u{004B}\u{006F}\u{0069}\u{3059}\ |
313 | \u{308B}\u{0035}\u{79D2}\u{524D}" , |
314 | &"MajiKoi5-783gue6qz075azm5e" ), |
315 | |
316 | (&" \u{30D1}\u{30D5}\u{30A3}\u{30FC}\u{0064}\u{0065}\u{30EB}\u{30F3}\u{30D0}" , |
317 | &"de-jg4avhby1noc0d" ), |
318 | |
319 | (&" \u{305D}\u{306E}\u{30B9}\u{30D4}\u{30FC}\u{30C9}\u{3067}" , |
320 | &"d9juau41awczczp" ), |
321 | |
322 | (&" \u{002D}\u{003E}\u{0020}\u{0024}\u{0031}\u{002E}\u{0030}\u{0030}\u{0020}\ |
323 | \u{003C}\u{002D}" , |
324 | &"-> $1.00 <--" ), |
325 | |
326 | // some real-life examples |
327 | (&"académie-française" , &"acadmie-franaise-npb1a" ), |
328 | (&"bücher" , &"bcher-kva" ), |
329 | (&"république-numérique" , &"rpublique-numrique-bwbm" ), |
330 | |
331 | // some real-life TLD |
332 | (&"бг" , &"90ae" ), |
333 | (&"рф" , &"p1ai" ), |
334 | (&"укр" , &"j1amh" ), |
335 | (&"السعودية" , &"mgberp4a5d4ar" ), |
336 | (&"امارات" , &"mgbaam7a8h" ), |
337 | (&"مصر" , &"wgbh1c" ), |
338 | (&"中国" , &"fiqs8s" ), |
339 | (&"中國" , &"fiqz9s" ), |
340 | (&"台湾" , &"kprw13d" ), |
341 | (&"台灣" , &"kpry57d" ), |
342 | (&"香港" , &"j6w193g" ), |
343 | |
344 | // other |
345 | (&"" , &"" ), |
346 | (&"a" , &"a-" ), |
347 | (&"0" , &"0-" ), |
348 | (&"A" , &"A-" ), |
349 | (&"é" , &"9ca" ), |
350 | (&" \n" , &" \n-" ), |
351 | ]; |
352 | |
353 | #[test ] |
354 | fn test_decode() { |
355 | for t in TESTS { |
356 | assert_eq!(decode(&t.1), Ok(t.0.into())); |
357 | } |
358 | } |
359 | |
360 | #[test ] |
361 | fn test_encode() { |
362 | for t in TESTS { |
363 | assert_eq!(encode(t.0).unwrap().to_lowercase(), t.1.to_lowercase()); |
364 | } |
365 | } |
366 | |
367 | #[test ] |
368 | fn test_fail_decode() { |
369 | assert_eq!(decode(&"bcher-kva.ch" ), Err(())); |
370 | assert_eq!(decode(&"+" ), Err(())); |
371 | assert_eq!(decode(&" \\" ), Err(())); |
372 | assert_eq!(decode(&"é" ), Err(())); |
373 | assert_eq!(decode(&"99999999" ), Err(())); |
374 | } |
375 | |