1 | //! Convert a string in IBM codepage 437 to UTF-8 |
2 | |
3 | /// Trait to convert IBM codepage 437 to the target type |
4 | pub trait FromCp437 { |
5 | /// Target type |
6 | type Target; |
7 | |
8 | /// Function that does the conversion from cp437. |
9 | /// Generally allocations will be avoided if all data falls into the ASCII range. |
10 | #[allow (clippy::wrong_self_convention)] |
11 | fn from_cp437(self) -> Self::Target; |
12 | } |
13 | |
14 | impl<'a> FromCp437 for &'a [u8] { |
15 | type Target = ::std::borrow::Cow<'a, str>; |
16 | |
17 | fn from_cp437(self) -> Self::Target { |
18 | if self.iter().all(|c: &u8| *c < 0x80) { |
19 | ::std::str::from_utf8(self).unwrap().into() |
20 | } else { |
21 | self.iter().map(|c: &u8| to_char(*c)).collect::<String>().into() |
22 | } |
23 | } |
24 | } |
25 | |
26 | impl FromCp437 for Vec<u8> { |
27 | type Target = String; |
28 | |
29 | fn from_cp437(self) -> Self::Target { |
30 | if self.iter().all(|c: &u8| *c < 0x80) { |
31 | String::from_utf8(self).unwrap() |
32 | } else { |
33 | self.into_iter().map(to_char).collect() |
34 | } |
35 | } |
36 | } |
37 | |
38 | fn to_char(input: u8) -> char { |
39 | let output = match input { |
40 | 0x00..=0x7f => input as u32, |
41 | 0x80 => 0x00c7, |
42 | 0x81 => 0x00fc, |
43 | 0x82 => 0x00e9, |
44 | 0x83 => 0x00e2, |
45 | 0x84 => 0x00e4, |
46 | 0x85 => 0x00e0, |
47 | 0x86 => 0x00e5, |
48 | 0x87 => 0x00e7, |
49 | 0x88 => 0x00ea, |
50 | 0x89 => 0x00eb, |
51 | 0x8a => 0x00e8, |
52 | 0x8b => 0x00ef, |
53 | 0x8c => 0x00ee, |
54 | 0x8d => 0x00ec, |
55 | 0x8e => 0x00c4, |
56 | 0x8f => 0x00c5, |
57 | 0x90 => 0x00c9, |
58 | 0x91 => 0x00e6, |
59 | 0x92 => 0x00c6, |
60 | 0x93 => 0x00f4, |
61 | 0x94 => 0x00f6, |
62 | 0x95 => 0x00f2, |
63 | 0x96 => 0x00fb, |
64 | 0x97 => 0x00f9, |
65 | 0x98 => 0x00ff, |
66 | 0x99 => 0x00d6, |
67 | 0x9a => 0x00dc, |
68 | 0x9b => 0x00a2, |
69 | 0x9c => 0x00a3, |
70 | 0x9d => 0x00a5, |
71 | 0x9e => 0x20a7, |
72 | 0x9f => 0x0192, |
73 | 0xa0 => 0x00e1, |
74 | 0xa1 => 0x00ed, |
75 | 0xa2 => 0x00f3, |
76 | 0xa3 => 0x00fa, |
77 | 0xa4 => 0x00f1, |
78 | 0xa5 => 0x00d1, |
79 | 0xa6 => 0x00aa, |
80 | 0xa7 => 0x00ba, |
81 | 0xa8 => 0x00bf, |
82 | 0xa9 => 0x2310, |
83 | 0xaa => 0x00ac, |
84 | 0xab => 0x00bd, |
85 | 0xac => 0x00bc, |
86 | 0xad => 0x00a1, |
87 | 0xae => 0x00ab, |
88 | 0xaf => 0x00bb, |
89 | 0xb0 => 0x2591, |
90 | 0xb1 => 0x2592, |
91 | 0xb2 => 0x2593, |
92 | 0xb3 => 0x2502, |
93 | 0xb4 => 0x2524, |
94 | 0xb5 => 0x2561, |
95 | 0xb6 => 0x2562, |
96 | 0xb7 => 0x2556, |
97 | 0xb8 => 0x2555, |
98 | 0xb9 => 0x2563, |
99 | 0xba => 0x2551, |
100 | 0xbb => 0x2557, |
101 | 0xbc => 0x255d, |
102 | 0xbd => 0x255c, |
103 | 0xbe => 0x255b, |
104 | 0xbf => 0x2510, |
105 | 0xc0 => 0x2514, |
106 | 0xc1 => 0x2534, |
107 | 0xc2 => 0x252c, |
108 | 0xc3 => 0x251c, |
109 | 0xc4 => 0x2500, |
110 | 0xc5 => 0x253c, |
111 | 0xc6 => 0x255e, |
112 | 0xc7 => 0x255f, |
113 | 0xc8 => 0x255a, |
114 | 0xc9 => 0x2554, |
115 | 0xca => 0x2569, |
116 | 0xcb => 0x2566, |
117 | 0xcc => 0x2560, |
118 | 0xcd => 0x2550, |
119 | 0xce => 0x256c, |
120 | 0xcf => 0x2567, |
121 | 0xd0 => 0x2568, |
122 | 0xd1 => 0x2564, |
123 | 0xd2 => 0x2565, |
124 | 0xd3 => 0x2559, |
125 | 0xd4 => 0x2558, |
126 | 0xd5 => 0x2552, |
127 | 0xd6 => 0x2553, |
128 | 0xd7 => 0x256b, |
129 | 0xd8 => 0x256a, |
130 | 0xd9 => 0x2518, |
131 | 0xda => 0x250c, |
132 | 0xdb => 0x2588, |
133 | 0xdc => 0x2584, |
134 | 0xdd => 0x258c, |
135 | 0xde => 0x2590, |
136 | 0xdf => 0x2580, |
137 | 0xe0 => 0x03b1, |
138 | 0xe1 => 0x00df, |
139 | 0xe2 => 0x0393, |
140 | 0xe3 => 0x03c0, |
141 | 0xe4 => 0x03a3, |
142 | 0xe5 => 0x03c3, |
143 | 0xe6 => 0x00b5, |
144 | 0xe7 => 0x03c4, |
145 | 0xe8 => 0x03a6, |
146 | 0xe9 => 0x0398, |
147 | 0xea => 0x03a9, |
148 | 0xeb => 0x03b4, |
149 | 0xec => 0x221e, |
150 | 0xed => 0x03c6, |
151 | 0xee => 0x03b5, |
152 | 0xef => 0x2229, |
153 | 0xf0 => 0x2261, |
154 | 0xf1 => 0x00b1, |
155 | 0xf2 => 0x2265, |
156 | 0xf3 => 0x2264, |
157 | 0xf4 => 0x2320, |
158 | 0xf5 => 0x2321, |
159 | 0xf6 => 0x00f7, |
160 | 0xf7 => 0x2248, |
161 | 0xf8 => 0x00b0, |
162 | 0xf9 => 0x2219, |
163 | 0xfa => 0x00b7, |
164 | 0xfb => 0x221a, |
165 | 0xfc => 0x207f, |
166 | 0xfd => 0x00b2, |
167 | 0xfe => 0x25a0, |
168 | 0xff => 0x00a0, |
169 | }; |
170 | ::std::char::from_u32(output).unwrap() |
171 | } |
172 | |
173 | #[cfg (test)] |
174 | mod test { |
175 | #[test ] |
176 | fn to_char_valid() { |
177 | for i in 0x00_u32..0x100 { |
178 | super::to_char(i as u8); |
179 | } |
180 | } |
181 | |
182 | #[test ] |
183 | fn ascii() { |
184 | for i in 0x00..0x80 { |
185 | assert_eq!(super::to_char(i), i as char); |
186 | } |
187 | } |
188 | |
189 | #[test ] |
190 | fn example_slice() { |
191 | use super::FromCp437; |
192 | let data = b"Cura \x87ao" ; |
193 | assert!(::std::str::from_utf8(data).is_err()); |
194 | assert_eq!(data.from_cp437(), "Curaçao" ); |
195 | } |
196 | |
197 | #[test ] |
198 | fn example_vec() { |
199 | use super::FromCp437; |
200 | let data = vec![0xCC, 0xCD, 0xCD, 0xB9]; |
201 | assert!(String::from_utf8(data.clone()).is_err()); |
202 | assert_eq!(&data.from_cp437(), "╠══╣" ); |
203 | } |
204 | } |
205 | |