1 | use super::protocol::*; |
2 | use std::{ |
3 | io::{BufRead, BufReader, Read}, |
4 | str::FromStr, |
5 | }; |
6 | |
7 | use quick_xml::{ |
8 | events::{attributes::Attributes, Event}, |
9 | Reader, |
10 | }; |
11 | |
12 | macro_rules! extract_from( |
13 | ($it: expr => $pattern: pat => $result: tt) => ( |
14 | match $it.read_event_into(&mut Vec::new()) { |
15 | Ok($pattern) => { $result }, |
16 | e => panic!("Ill-formed protocol file: {:?}" , e) |
17 | } |
18 | ) |
19 | ); |
20 | |
21 | macro_rules! extract_end_tag( |
22 | ($it: expr => $tag: expr) => ( |
23 | extract_from!($it => Event::End(bytes) => { |
24 | assert!(bytes.name().into_inner() == $tag.as_bytes(), "Ill-formed protocol file" ); |
25 | }); |
26 | ) |
27 | ); |
28 | |
29 | pub fn parse<S: Read>(stream: S) -> Protocol { |
30 | let mut reader: Reader> = Reader::from_reader(BufReader::new(inner:stream)); |
31 | let reader_config: &mut Config = reader.config_mut(); |
32 | reader_config.trim_text(trim:true); |
33 | reader_config.expand_empty_elements = true; |
34 | parse_protocol(reader) |
35 | } |
36 | |
37 | fn decode_utf8_or_panic(txt: Vec<u8>) -> String { |
38 | match String::from_utf8(vec:txt) { |
39 | Ok(txt: String) => txt, |
40 | Err(e: FromUtf8Error) => panic!("Invalid UTF8: ' {}'" , String::from_utf8_lossy(&e.into_bytes())), |
41 | } |
42 | } |
43 | |
44 | fn parse_or_panic<T: FromStr>(txt: &[u8]) -> T { |
45 | match std::str::from_utf8(txt).ok().and_then(|val: &str| val.parse().ok()) { |
46 | Some(version: T) => version, |
47 | None => panic!( |
48 | "Invalid value ' {}' for parsing type ' {}'" , |
49 | String::from_utf8_lossy(txt), |
50 | std::any::type_name::<T>() |
51 | ), |
52 | } |
53 | } |
54 | |
55 | fn init_protocol<R: BufRead>(reader: &mut Reader<R>) -> Protocol { |
56 | // Check two firsts lines for protocol tag |
57 | for _ in 0..3 { |
58 | match reader.read_event_into(&mut Vec::new()) { |
59 | Ok(Event::Decl(_) | Event::DocType(_)) => { |
60 | continue; |
61 | } |
62 | Ok(Event::Start(bytes: BytesStart<'_>)) => { |
63 | assert!(bytes.name().into_inner() == b"protocol" , "Missing protocol toplevel tag" ); |
64 | if let Some(attr: Attribute<'_>) = bytesimpl Iterator- >
|
65 | .attributes() |
66 | .filter_map(|res: Result, AttrError>| res.ok()) |
67 | .find(|attr: &Attribute<'_>| attr.key.into_inner() == b"name" ) |
68 | { |
69 | return Protocol::new(name:decode_utf8_or_panic(txt:attr.value.into_owned())); |
70 | } else { |
71 | panic!("Protocol must have a name" ); |
72 | } |
73 | } |
74 | _ => panic!("Ill-formed protocol file" ), |
75 | } |
76 | } |
77 | panic!("Ill-formed protocol file" ); |
78 | } |
79 | |
80 | fn parse_protocol<R: BufRead>(mut reader: Reader<R>) -> Protocol { |
81 | let mut protocol = init_protocol(&mut reader); |
82 | |
83 | loop { |
84 | match reader.read_event_into(&mut Vec::new()) { |
85 | Ok(Event::Start(bytes)) => { |
86 | match bytes.name().into_inner() { |
87 | b"copyright" => { |
88 | // parse the copyright |
89 | let copyright = match reader.read_event_into(&mut Vec::new()) { |
90 | Ok(Event::Text(copyright)) => { |
91 | copyright.unescape().ok().map(|x| x.to_string()) |
92 | } |
93 | Ok(Event::CData(copyright)) => { |
94 | String::from_utf8(copyright.into_inner().into()).ok() |
95 | } |
96 | e => panic!("Ill-formed protocol file: {:?}" , e), |
97 | }; |
98 | |
99 | extract_end_tag!(reader => "copyright" ); |
100 | protocol.copyright = copyright |
101 | } |
102 | b"interface" => { |
103 | protocol.interfaces.push(parse_interface(&mut reader, bytes.attributes())); |
104 | } |
105 | b"description" => { |
106 | protocol.description = |
107 | Some(parse_description(&mut reader, bytes.attributes())); |
108 | } |
109 | name => panic!( |
110 | "Ill-formed protocol file: unexpected token ` {}` in protocol {}" , |
111 | String::from_utf8_lossy(name), |
112 | protocol.name |
113 | ), |
114 | } |
115 | } |
116 | Ok(Event::End(bytes)) => { |
117 | let name = bytes.name().into_inner(); |
118 | assert!( |
119 | name == b"protocol" , |
120 | "Unexpected closing token ` {}`" , |
121 | String::from_utf8_lossy(name) |
122 | ); |
123 | break; |
124 | } |
125 | // ignore comments |
126 | Ok(Event::Comment(_)) => {} |
127 | e => panic!("Ill-formed protocol file: unexpected token {:?}" , e), |
128 | } |
129 | } |
130 | |
131 | protocol |
132 | } |
133 | |
134 | fn parse_interface<R: BufRead>(reader: &mut Reader<R>, attrs: Attributes) -> Interface { |
135 | let mut interface = Interface::new(); |
136 | for attr in attrs.filter_map(|res| res.ok()) { |
137 | match attr.key.into_inner() { |
138 | b"name" => interface.name = decode_utf8_or_panic(attr.value.into_owned()), |
139 | b"version" => interface.version = parse_or_panic(&attr.value), |
140 | _ => {} |
141 | } |
142 | } |
143 | |
144 | loop { |
145 | match reader.read_event_into(&mut Vec::new()) { |
146 | Ok(Event::Start(bytes)) => match bytes.name().into_inner() { |
147 | b"description" => { |
148 | interface.description = Some(parse_description(reader, bytes.attributes())) |
149 | } |
150 | b"request" => interface.requests.push(parse_request(reader, bytes.attributes())), |
151 | b"event" => interface.events.push(parse_event(reader, bytes.attributes())), |
152 | b"enum" => interface.enums.push(parse_enum(reader, bytes.attributes())), |
153 | name => panic!("Unexpected token: ` {}`" , String::from_utf8_lossy(name)), |
154 | }, |
155 | Ok(Event::End(bytes)) if bytes.name().into_inner() == b"interface" => break, |
156 | _ => {} |
157 | } |
158 | } |
159 | |
160 | interface |
161 | } |
162 | |
163 | fn parse_description<R: BufRead>(reader: &mut Reader<R>, attrs: Attributes) -> (String, String) { |
164 | let mut summary = String::new(); |
165 | for attr in attrs.filter_map(|res| res.ok()) { |
166 | if attr.key.into_inner() == b"summary" { |
167 | summary = String::from_utf8_lossy(&attr.value) |
168 | .split_whitespace() |
169 | .collect::<Vec<_>>() |
170 | .join(" " ); |
171 | } |
172 | } |
173 | |
174 | let mut description = String::new(); |
175 | // Some protocols have comments inside their descriptions, so we need to parse them in a loop and |
176 | // concatenate the parts into a single block of text |
177 | loop { |
178 | match reader.read_event_into(&mut Vec::new()) { |
179 | Ok(Event::Text(bytes)) => { |
180 | if !description.is_empty() { |
181 | description.push_str(" \n\n" ); |
182 | } |
183 | description.push_str(&bytes.unescape().unwrap_or_default()) |
184 | } |
185 | Ok(Event::End(bytes)) if bytes.name().into_inner() == b"description" => break, |
186 | Ok(Event::Comment(_)) => {} |
187 | e => panic!("Ill-formed protocol file: {:?}" , e), |
188 | } |
189 | } |
190 | |
191 | (summary, description) |
192 | } |
193 | |
194 | fn parse_request<R: BufRead>(reader: &mut Reader<R>, attrs: Attributes) -> Message { |
195 | let mut request = Message::new(); |
196 | for attr in attrs.filter_map(|res| res.ok()) { |
197 | match attr.key.into_inner() { |
198 | b"name" => request.name = decode_utf8_or_panic(attr.value.into_owned()), |
199 | b"type" => request.typ = Some(parse_type(&attr.value)), |
200 | b"since" => request.since = parse_or_panic(&attr.value), |
201 | _ => {} |
202 | } |
203 | } |
204 | |
205 | loop { |
206 | match reader.read_event_into(&mut Vec::new()) { |
207 | Ok(Event::Start(bytes)) => match bytes.name().into_inner() { |
208 | b"description" => { |
209 | request.description = Some(parse_description(reader, bytes.attributes())) |
210 | } |
211 | b"arg" => request.args.push(parse_arg(reader, bytes.attributes())), |
212 | name => panic!("Unexpected token: ` {}`" , String::from_utf8_lossy(name)), |
213 | }, |
214 | Ok(Event::End(bytes)) if bytes.name().into_inner() == b"request" => break, |
215 | _ => {} |
216 | } |
217 | } |
218 | |
219 | request |
220 | } |
221 | |
222 | fn parse_enum<R: BufRead>(reader: &mut Reader<R>, attrs: Attributes) -> Enum { |
223 | let mut enu = Enum::new(); |
224 | for attr in attrs.filter_map(|res| res.ok()) { |
225 | match attr.key.into_inner() { |
226 | b"name" => enu.name = decode_utf8_or_panic(attr.value.into_owned()), |
227 | b"since" => enu.since = parse_or_panic(&attr.value), |
228 | b"bitfield" => { |
229 | if &attr.value[..] == b"true" { |
230 | enu.bitfield = true |
231 | } |
232 | } |
233 | _ => {} |
234 | } |
235 | } |
236 | |
237 | loop { |
238 | match reader.read_event_into(&mut Vec::new()) { |
239 | Ok(Event::Start(bytes)) => match bytes.name().into_inner() { |
240 | b"description" => { |
241 | enu.description = Some(parse_description(reader, bytes.attributes())) |
242 | } |
243 | b"entry" => enu.entries.push(parse_entry(reader, bytes.attributes())), |
244 | name => panic!("Unexpected token: ` {}`" , String::from_utf8_lossy(name)), |
245 | }, |
246 | Ok(Event::End(bytes)) if bytes.name().into_inner() == b"enum" => break, |
247 | _ => {} |
248 | } |
249 | } |
250 | |
251 | enu |
252 | } |
253 | |
254 | fn parse_event<R: BufRead>(reader: &mut Reader<R>, attrs: Attributes) -> Message { |
255 | let mut event = Message::new(); |
256 | for attr in attrs.filter_map(|res| res.ok()) { |
257 | match attr.key.into_inner() { |
258 | b"name" => event.name = decode_utf8_or_panic(attr.value.into_owned()), |
259 | b"type" => event.typ = Some(parse_type(&attr.value)), |
260 | b"since" => event.since = parse_or_panic(&attr.value), |
261 | _ => {} |
262 | } |
263 | } |
264 | |
265 | loop { |
266 | match reader.read_event_into(&mut Vec::new()) { |
267 | Ok(Event::Start(bytes)) => match bytes.name().into_inner() { |
268 | b"description" => { |
269 | event.description = Some(parse_description(reader, bytes.attributes())) |
270 | } |
271 | b"arg" => event.args.push(parse_arg(reader, bytes.attributes())), |
272 | name => panic!("Unexpected token: ` {}`" , String::from_utf8_lossy(name)), |
273 | }, |
274 | Ok(Event::End(bytes)) if bytes.name().into_inner() == b"event" => break, |
275 | _ => {} |
276 | } |
277 | } |
278 | |
279 | event |
280 | } |
281 | |
282 | fn parse_arg<R: BufRead>(reader: &mut Reader<R>, attrs: Attributes) -> Arg { |
283 | let mut arg = Arg::new(); |
284 | for attr in attrs.filter_map(|res| res.ok()) { |
285 | match attr.key.into_inner() { |
286 | b"name" => arg.name = decode_utf8_or_panic(attr.value.into_owned()), |
287 | b"type" => arg.typ = parse_type(&attr.value), |
288 | b"summary" => { |
289 | arg.summary = Some( |
290 | String::from_utf8_lossy(&attr.value) |
291 | .split_whitespace() |
292 | .collect::<Vec<_>>() |
293 | .join(" " ), |
294 | ) |
295 | } |
296 | b"interface" => arg.interface = Some(parse_or_panic(&attr.value)), |
297 | b"allow-null" => { |
298 | if &*attr.value == b"true" { |
299 | arg.allow_null = true |
300 | } |
301 | } |
302 | b"enum" => arg.enum_ = Some(decode_utf8_or_panic(attr.value.into_owned())), |
303 | _ => {} |
304 | } |
305 | } |
306 | |
307 | loop { |
308 | match reader.read_event_into(&mut Vec::new()) { |
309 | Ok(Event::Start(bytes)) => match bytes.name().into_inner() { |
310 | b"description" => { |
311 | arg.description = Some(parse_description(reader, bytes.attributes())) |
312 | } |
313 | name => panic!("Unexpected token: ` {}`" , String::from_utf8_lossy(name)), |
314 | }, |
315 | Ok(Event::End(bytes)) if bytes.name().into_inner() == b"arg" => break, |
316 | _ => {} |
317 | } |
318 | } |
319 | |
320 | arg |
321 | } |
322 | |
323 | fn parse_type(txt: &[u8]) -> Type { |
324 | match txt { |
325 | b"int" => Type::Int, |
326 | b"uint" => Type::Uint, |
327 | b"fixed" => Type::Fixed, |
328 | b"string" => Type::String, |
329 | b"object" => Type::Object, |
330 | b"new_id" => Type::NewId, |
331 | b"array" => Type::Array, |
332 | b"fd" => Type::Fd, |
333 | b"destructor" => Type::Destructor, |
334 | e: &[u8] => panic!("Unexpected type: {}" , String::from_utf8_lossy(e)), |
335 | } |
336 | } |
337 | |
338 | fn parse_entry<R: BufRead>(reader: &mut Reader<R>, attrs: Attributes) -> Entry { |
339 | let mut entry = Entry::new(); |
340 | for attr in attrs.filter_map(|res| res.ok()) { |
341 | match attr.key.into_inner() { |
342 | b"name" => entry.name = decode_utf8_or_panic(attr.value.into_owned()), |
343 | b"value" => { |
344 | entry.value = if attr.value.starts_with(b"0x" ) { |
345 | if let Some(val) = std::str::from_utf8(&attr.value[2..]) |
346 | .ok() |
347 | .and_then(|s| u32::from_str_radix(s, 16).ok()) |
348 | { |
349 | val |
350 | } else { |
351 | panic!("Invalid number: {}" , String::from_utf8_lossy(&attr.value)) |
352 | } |
353 | } else { |
354 | parse_or_panic(&attr.value) |
355 | }; |
356 | } |
357 | b"since" => entry.since = parse_or_panic(&attr.value), |
358 | b"summary" => { |
359 | entry.summary = Some( |
360 | String::from_utf8_lossy(&attr.value) |
361 | .split_whitespace() |
362 | .collect::<Vec<_>>() |
363 | .join(" " ), |
364 | ) |
365 | } |
366 | _ => {} |
367 | } |
368 | } |
369 | |
370 | loop { |
371 | match reader.read_event_into(&mut Vec::new()) { |
372 | Ok(Event::Start(bytes)) => match bytes.name().into_inner() { |
373 | b"description" => { |
374 | entry.description = Some(parse_description(reader, bytes.attributes())) |
375 | } |
376 | name => panic!("Unexpected token: ` {}`" , String::from_utf8_lossy(name)), |
377 | }, |
378 | Ok(Event::End(bytes)) if bytes.name().into_inner() == b"entry" => break, |
379 | _ => {} |
380 | } |
381 | } |
382 | |
383 | entry |
384 | } |
385 | |
386 | #[cfg (test)] |
387 | mod tests { |
388 | #[test ] |
389 | fn xml_parse() { |
390 | let protocol_file = |
391 | std::fs::File::open("./tests/scanner_assets/test-protocol.xml" ).unwrap(); |
392 | let _ = crate::parse::parse(protocol_file); |
393 | } |
394 | |
395 | #[test ] |
396 | fn headerless_xml_parse() { |
397 | let protocol_file = |
398 | std::fs::File::open("./tests/scanner_assets/test-headerless-protocol.xml" ).unwrap(); |
399 | let _ = crate::parse::parse(protocol_file); |
400 | } |
401 | } |
402 | |