expr.rs source code [crates/cexpr-0.6.0/src/expr.rs]

1	// (C) Copyright 2016 Jethro G. Beekman
2	//
3	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6	// option. This file may not be copied, modified, or distributed
7	// except according to those terms.
8	//! Evaluating C expressions from tokens.
9	//!
10	//! Numerical operators are supported. All numerical values are treated as
11	//! `i64` or `f64`. Type casting is not supported. `i64` are converted to
12	//! `f64` when used in conjunction with a `f64`. Right shifts are always
13	//! arithmetic shifts.
14	//!
15	//! The `sizeof` operator is not supported.
16	//!
17	//! String concatenation is supported, but width prefixes are ignored; all
18	//! strings are treated as narrow strings.
19	//!
20	//! Use the `IdentifierParser` to substitute identifiers found in expressions.
21
22	use std::collections::HashMap;
23	use std::num::Wrapping;
24	use std::ops::{
25	AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, RemAssign, ShlAssign,
26	ShrAssign, SubAssign,
27	};
28
29	use crate::literal::{self, CChar};
30	use crate::token::{Kind as TokenKind, Token};
31	use crate::ToCexprResult;
32	use nom::branch::alt;
33	use nom::combinator::{complete, map, map_opt};
34	use nom::multi::{fold_many0, many0, separated_list0};
35	use nom::sequence::{delimited, pair, preceded};
36	use nom::*;
37
38	/// Expression parser/evaluator that supports identifiers.
39	#[derive(Debug)]
40	pub struct IdentifierParser<'ident> {
41	identifiers: &'ident HashMap<Vec<u8>, EvalResult>,
42	}
43	#[derive(Copy, Clone)]
44	struct PRef<'a>(&'a IdentifierParser<'a>);
45
46	/// A shorthand for the type of cexpr expression evaluation results.
47	pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error<&'a [Token]>>;
48
49	/// The result of parsing a literal or evaluating an expression.
50	#[derive(Debug, Clone, PartialEq)]
51	#[allow(missing_docs)]
52	pub enum EvalResult {
53	Int(Wrapping<i64>),
54	Float(f64),
55	Char(CChar),
56	Str(Vec<u8>),
57	Invalid,
58	}
59
60	macro_rules! result_opt (
61	(fn $n:ident: $e:ident -> $t:ty) => (
62	#[allow(dead_code)]
63	#[allow(clippy::wrong_self_convention)]
64	fn $n(self) -> Option<$t> {
65	if let EvalResult::$e(v) = self {
66	Some(v)
67	} else {
68	None
69	}
70	}
71	);
72	);
73
74	impl EvalResult {
75	result_opt!(fn as_int: Int -> Wrapping<i64>);
76	result_opt!(fn as_float: Float -> f64);
77	result_opt!(fn as_char: Char -> CChar);
78	result_opt!(fn as_str: Str -> Vec<u8>);
79
80	#[allow(clippy::wrong_self_convention)]
81	fn as_numeric(self) -> Option<EvalResult> {
82	match self {
83	EvalResult::Int(_) \| EvalResult::Float(_) => Some(self),
84	_ => None,
85	}
86	}
87	}
88
89	impl From<Vec<u8>> for EvalResult {
90	fn from(s: Vec<u8>) -> EvalResult {
91	EvalResult::Str(s)
92	}
93	}
94
95	// ===========================================
96	// ============= Clang tokens ================
97	// ===========================================
98
99	macro_rules! exact_token (
100	($k:ident, $c:expr) => ({
101	move \|input: &[Token]\| {
102	if input.is_empty() {
103	let res: CResult<'_, &[u8]> = Err(crate::nom::Err::Incomplete(Needed::new($c.len())));
104	res
105	} else {
106	if input[`0`].kind==TokenKind::$k && &input[`0`].raw[..]==$c {
107	Ok((&input[`1`..], &input[`0`].raw[..]))
108	} else {
109	Err(crate::nom::Err::Error((input, crate::ErrorKind::ExactToken(TokenKind::$k,$c)).into()))
110	}
111	}
112	}
113	});
114	);
115
116	fn identifier_token(input: &[Token]) -> CResult<'_, &[u8]> {
117	if input.is_empty() {
118	let res: CResult<'_, &[u8]> = Err(nom::Err::Incomplete(Needed::new(`1`)));
119	res
120	} else {
121	if input[`0`].kind == TokenKind::Identifier {
122	Ok((&input[`1`..], &input[`0`].raw[..]))
123	} else {
124	Err(crate::nom::Err::Error((input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into()))
125	}
126	}
127	}
128
129	fn p(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
130	exact_token!(Punctuation, c.as_bytes())
131	}
132
133	fn one_of_punctuation(c: &'static [&'static str]) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
134	move \|input: &[Token]\| {
135	if input.is_empty() {
136	let min: usize = c
137	.iter()
138	.map(\|opt\| opt.len())
139	.min()
140	.expect(msg:"at least one option");
141	Err(crate::nom::Err::Incomplete(Needed::new(min)))
142	} else if input[`0`].kind == TokenKind::Punctuation
143	&& c.iter().any(\|opt: &&str\| opt.as_bytes() == &input[`0`].raw[..])
144	{
145	Ok((&input[`1`..], &input[`0`].raw[..]))
146	} else {
147	Err(crate::nom::Err::Error(
148	(
149	input,
150	crate::ErrorKind::ExactTokens(TokenKind::Punctuation, c),
151	)
152	.into(),
153	))
154	}
155	}
156	}
157
158	// ==================================================
159	// ============= Numeric expressions ================
160	// ==================================================
161
162	impl<'a> AddAssign<&'a EvalResult> for EvalResult {
163	fn add_assign(&mut self, rhs: &'a EvalResult) {
164	use self::EvalResult::*;
165	self = match* (&*self, rhs) {
166	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a + b),
167	(&Float(a: f64), &Int(b: Wrapping)) => Float(a + (b.0 as f64)),
168	(&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 + b),
169	(&Float(a: f64), &Float(b: f64)) => Float(a + b),
170	_ => Invalid,
171	};
172	}
173	}
174	impl<'a> BitAndAssign<&'a EvalResult> for EvalResult {
175	fn bitand_assign(&mut self, rhs: &'a EvalResult) {
176	use self::EvalResult::*;
177	self = match* (&*self, rhs) {
178	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a & b),
179	_ => Invalid,
180	};
181	}
182	}
183	impl<'a> BitOrAssign<&'a EvalResult> for EvalResult {
184	fn bitor_assign(&mut self, rhs: &'a EvalResult) {
185	use self::EvalResult::*;
186	self = match* (&*self, rhs) {
187	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a \| b),
188	_ => Invalid,
189	};
190	}
191	}
192	impl<'a> BitXorAssign<&'a EvalResult> for EvalResult {
193	fn bitxor_assign(&mut self, rhs: &'a EvalResult) {
194	use self::EvalResult::*;
195	self = match* (&*self, rhs) {
196	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a ^ b),
197	_ => Invalid,
198	};
199	}
200	}
201	impl<'a> DivAssign<&'a EvalResult> for EvalResult {
202	fn div_assign(&mut self, rhs: &'a EvalResult) {
203	use self::EvalResult::*;
204	self = match* (&*self, rhs) {
205	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a / b),
206	(&Float(a: f64), &Int(b: Wrapping)) => Float(a / (b.0 as f64)),
207	(&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 / b),
208	(&Float(a: f64), &Float(b: f64)) => Float(a / b),
209	_ => Invalid,
210	};
211	}
212	}
213	impl<'a> MulAssign<&'a EvalResult> for EvalResult {
214	fn mul_assign(&mut self, rhs: &'a EvalResult) {
215	use self::EvalResult::*;
216	self = match* (&*self, rhs) {
217	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a * b),
218	(&Float(a: f64), &Int(b: Wrapping)) => Float(a * (b.0 as f64)),
219	(&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 * b),
220	(&Float(a: f64), &Float(b: f64)) => Float(a * b),
221	_ => Invalid,
222	};
223	}
224	}
225	impl<'a> RemAssign<&'a EvalResult> for EvalResult {
226	fn rem_assign(&mut self, rhs: &'a EvalResult) {
227	use self::EvalResult::*;
228	self = match* (&*self, rhs) {
229	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a % b),
230	(&Float(a: f64), &Int(b: Wrapping)) => Float(a % (b.0 as f64)),
231	(&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 % b),
232	(&Float(a: f64), &Float(b: f64)) => Float(a % b),
233	_ => Invalid,
234	};
235	}
236	}
237	impl<'a> ShlAssign<&'a EvalResult> for EvalResult {
238	fn shl_assign(&mut self, rhs: &'a EvalResult) {
239	use self::EvalResult::*;
240	self = match* (&*self, rhs) {
241	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a << (b.0 as usize)),
242	_ => Invalid,
243	};
244	}
245	}
246	impl<'a> ShrAssign<&'a EvalResult> for EvalResult {
247	fn shr_assign(&mut self, rhs: &'a EvalResult) {
248	use self::EvalResult::*;
249	self = match* (&*self, rhs) {
250	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a >> (b.0 as usize)),
251	_ => Invalid,
252	};
253	}
254	}
255	impl<'a> SubAssign<&'a EvalResult> for EvalResult {
256	fn sub_assign(&mut self, rhs: &'a EvalResult) {
257	use self::EvalResult::*;
258	self = match* (&*self, rhs) {
259	(&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a - b),
260	(&Float(a: f64), &Int(b: Wrapping)) => Float(a - (b.0 as f64)),
261	(&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 - b),
262	(&Float(a: f64), &Float(b: f64)) => Float(a - b),
263	_ => Invalid,
264	};
265	}
266	}
267
268	fn unary_op(input: (&[u8], EvalResult)) -> Option<EvalResult> {
269	use self::EvalResult::*;
270	assert_eq!(input.0.len(), `1`);
271	match (input.0[`0`], input.1) {
272	(b'+', i: EvalResult) => Some(i),
273	(b'-', Int(i: Wrapping)) => Some(Int(Wrapping(i.0.wrapping_neg()))), // impl Neg for Wrapping not until rust 1.10...
274	(b'-', Float(i: f64)) => Some(Float(-i)),
275	(b'-', _) => unreachable!("non-numeric unary op"),
276	(b'~', Int(i: Wrapping)) => Some(Int(!i)),
277	(b'~', Float(_)) => None,
278	(b'~', _) => unreachable!("non-numeric unary op"),
279	_ => unreachable!("invalid unary op"),
280	}
281	}
282
283	fn numeric<I: Clone, E: nom::error::ParseError<I>, F>(
284	f: F,
285	) -> impl FnMut(I) -> nom::IResult<I, EvalResult, E>
286	where
287	F: FnMut(I) -> nom::IResult<I, EvalResult, E>,
288	{
289	nom::combinator::map_opt(parser:f, f:EvalResult::as_numeric)
290	}
291
292	impl<'a> PRef<'a> {
293	fn unary(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
294	alt((
295	delimited(p("("), \|i\| self.numeric_expr(i), p(")")),
296	numeric(\|i\| self.literal(i)),
297	numeric(\|i\| self.identifier(i)),
298	map_opt(
299	pair(one_of_punctuation(&["+", "-", "~"][..]), \|i\| self.unary(i)),
300	unary_op,
301	),
302	))(input)
303	}
304
305	fn mul_div_rem(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
306	let (input, acc) = self.unary(input)?;
307	fold_many0(
308	pair(complete(one_of_punctuation(&["*", "/", "%"][..])), \|i\| {
309	self.unary(i)
310	}),
311	move \|\| acc.clone(),
312	\|mut acc, (op, val): (&[u8], EvalResult)\| {
313	match op[`0`] as char {
314	'' => acc = &val,
315	'/' => acc /= &val,
316	'%' => acc %= &val,
317	_ => unreachable!(),
318	};
319	acc
320	},
321	)(input)
322	}
323
324	fn add_sub(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
325	let (input, acc) = self.mul_div_rem(input)?;
326	fold_many0(
327	pair(complete(one_of_punctuation(&["+", "-"][..])), \|i\| {
328	self.mul_div_rem(i)
329	}),
330	move \|\| acc.clone(),
331	\|mut acc, (op, val): (&[u8], EvalResult)\| {
332	match op[`0`] as char {
333	'+' => acc += &val,
334	'-' => acc -= &val,
335	_ => unreachable!(),
336	};
337	acc
338	},
339	)(input)
340	}
341
342	fn shl_shr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
343	let (input, acc) = self.add_sub(input)?;
344	numeric(fold_many0(
345	pair(complete(one_of_punctuation(&["<<", ">>"][..])), \|i\| {
346	self.add_sub(i)
347	}),
348	move \|\| acc.clone(),
349	\|mut acc, (op, val): (&[u8], EvalResult)\| {
350	match op {
351	b"<<" => acc <<= &val,
352	b">>" => acc >>= &val,
353	_ => unreachable!(),
354	};
355	acc
356	},
357	))(input)
358	}
359
360	fn and(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
361	let (input, acc) = self.shl_shr(input)?;
362	numeric(fold_many0(
363	preceded(complete(p("&")), \|i\| self.shl_shr(i)),
364	move \|\| acc.clone(),
365	\|mut acc, val: EvalResult\| {
366	acc &= &val;
367	acc
368	},
369	))(input)
370	}
371
372	fn xor(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
373	let (input, acc) = self.and(input)?;
374	numeric(fold_many0(
375	preceded(complete(p("^")), \|i\| self.and(i)),
376	move \|\| acc.clone(),
377	\|mut acc, val: EvalResult\| {
378	acc ^= &val;
379	acc
380	},
381	))(input)
382	}
383
384	fn or(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
385	let (input, acc) = self.xor(input)?;
386	numeric(fold_many0(
387	preceded(complete(p("\|")), \|i\| self.xor(i)),
388	move \|\| acc.clone(),
389	\|mut acc, val: EvalResult\| {
390	acc \|= &val;
391	acc
392	},
393	))(input)
394	}
395
396	#[inline(always)]
397	fn numeric_expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
398	self.or(input)
399	}
400	}
401
402	// =======================================================
403	// ============= Literals and identifiers ================
404	// =======================================================
405
406	impl<'a> PRef<'a> {
407	fn identifier(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
408	match input.split_first() {
409	None => Err(Err::Incomplete(Needed::new(`1`))),
410	Some((
411	&Token {
412	kind: TokenKind::Identifier,
413	ref raw,
414	},
415	rest,
416	)) => {
417	if let Some(r) = self.identifiers.get(&raw[..]) {
418	Ok((rest, r.clone()))
419	} else {
420	Err(Err::Error(
421	(input, crate::ErrorKind::UnknownIdentifier).into(),
422	))
423	}
424	}
425	Some(_) => Err(Err::Error(
426	(input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into(),
427	)),
428	}
429	}
430
431	fn literal(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
432	match input.split_first() {
433	None => Err(Err::Incomplete(Needed::new(`1`))),
434	Some((
435	&Token {
436	kind: TokenKind::Literal,
437	ref raw,
438	},
439	rest,
440	)) => match literal::parse(raw) {
441	Ok((_, result)) => Ok((rest, result)),
442	_ => Err(Err::Error((input, crate::ErrorKind::InvalidLiteral).into())),
443	},
444	Some(_) => Err(Err::Error(
445	(input, crate::ErrorKind::TypedToken(TokenKind::Literal)).into(),
446	)),
447	}
448	}
449
450	fn string(self, input: &'_ [Token]) -> CResult<'_, Vec<u8>> {
451	alt((
452	map_opt(\|i\| self.literal(i), EvalResult::as_str),
453	map_opt(\|i\| self.identifier(i), EvalResult::as_str),
454	))(input)
455	.to_cexpr_result()
456	}
457
458	// "string1" "string2" etc...
459	fn concat_str(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
460	map(
461	pair(\|i\| self.string(i), many0(complete(\|i\| self.string(i)))),
462	\|(first, v)\| {
463	Vec::into_iter(v)
464	.fold(first, \|mut s, elem\| {
465	Vec::extend_from_slice(&mut s, Vec::<u8>::as_slice(&elem));
466	s
467	})
468	.into()
469	},
470	)(input)
471	.to_cexpr_result()
472	}
473
474	fn expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
475	alt((
476	\|i\| self.numeric_expr(i),
477	delimited(p("("), \|i\| self.expr(i), p(")")),
478	\|i\| self.concat_str(i),
479	\|i\| self.literal(i),
480	\|i\| self.identifier(i),
481	))(input)
482	.to_cexpr_result()
483	}
484
485	fn macro_definition(self, input: &'_ [Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
486	pair(identifier_token, \|i\| self.expr(i))(input)
487	}
488	}
489
490	impl<'a> ::std::ops::Deref for PRef<'a> {
491	type Target = IdentifierParser<'a>;
492	fn deref(&self) -> &IdentifierParser<'a> {
493	self.0
494	}
495	}
496
497	impl<'ident> IdentifierParser<'ident> {
498	fn as_ref(&self) -> PRef<'_> {
499	PRef(self)
500	}
501
502	/// Create a new `IdentifierParser` with a set of known identifiers. When
503	/// a known identifier is encountered during parsing, it is substituted
504	/// for the value specified.
505	pub fn new(identifiers: &HashMap<Vec<u8>, EvalResult>) -> IdentifierParser<'_> {
506	IdentifierParser { identifiers }
507	}
508
509	/// Parse and evaluate an expression of a list of tokens.
510	///
511	/// Returns an error if the input is not a valid expression or if the token
512	/// stream contains comments, keywords or unknown identifiers.
513	pub fn expr<'a>(&self, input: &'a [Token]) -> CResult<'a, EvalResult> {
514	self.as_ref().expr(input)
515	}
516
517	/// Parse and evaluate a macro definition from a list of tokens.
518	///
519	/// Returns the identifier for the macro and its replacement evaluated as an
520	/// expression. The input should not include `#define`.
521	///
522	/// Returns an error if the replacement is not a valid expression, if called
523	/// on most function-like macros, or if the token stream contains comments,
524	/// keywords or unknown identifiers.
525	///
526	/// N.B. This is intended to fail on function-like macros, but if it the
527	/// macro takes a single argument, the argument name is defined as an
528	/// identifier, and the macro otherwise parses as an expression, it will
529	/// return a result even on function-like macros.
530	///
531	/// ```c
532	/// // will evaluate into IDENTIFIER
533	/// #define DELETE(IDENTIFIER)
534	/// // will evaluate into IDENTIFIER-3
535	/// #define NEGATIVE_THREE(IDENTIFIER) -3
536	/// ```
537	pub fn macro_definition<'a>(&self, input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> {
538	crate::assert_full_parse(self.as_ref().macro_definition(input))
539	}
540	}
541
542	/// Parse and evaluate an expression of a list of tokens.
543	///
544	/// Returns an error if the input is not a valid expression or if the token
545	/// stream contains comments, keywords or identifiers.
546	pub fn expr(input: &[Token]) -> CResult<'_, EvalResult> {
547	IdentifierParser::new(&HashMap::new()).expr(input)
548	}
549
550	/// Parse and evaluate a macro definition from a list of tokens.
551	///
552	/// Returns the identifier for the macro and its replacement evaluated as an
553	/// expression. The input should not include `#define`.
554	///
555	/// Returns an error if the replacement is not a valid expression, if called
556	/// on a function-like macro, or if the token stream contains comments,
557	/// keywords or identifiers.
558	pub fn macro_definition(input: &[Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
559	IdentifierParser::new(&HashMap::new()).macro_definition(input)
560	}
561
562	/// Parse a functional macro declaration from a list of tokens.
563	///
564	/// Returns the identifier for the macro and the argument list (in order). The
565	/// input should not include `#define`. The actual definition is not parsed and
566	/// may be obtained from the unparsed data returned.
567	///
568	/// Returns an error if the input is not a functional macro or if the token
569	/// stream contains comments.
570	///
571	/// # Example
572	/// ```
573	/// use cexpr::expr::{IdentifierParser, EvalResult, fn_macro_declaration};
574	/// use cexpr::assert_full_parse;
575	/// use cexpr::token::Kind::*;
576	/// use cexpr::token::Token;
577	///
578	/// // #define SUFFIX(arg) arg "suffix"
579	/// let tokens = vec![
580	/// (Identifier, &b"SUFFIX"[..]).into(),
581	/// (Punctuation, &b"("[..]).into(),
582	/// (Identifier, &b"arg"[..]).into(),
583	/// (Punctuation, &b")"[..]).into(),
584	/// (Identifier, &b"arg"[..]).into(),
585	/// (Literal, &br#""suffix""#[..]).into(),
586	/// ];
587	///
588	/// // Try to parse the functional part
589	/// let (expr, (ident, args)) = fn_macro_declaration(&tokens).unwrap();
590	/// assert_eq!(ident, b"SUFFIX");
591	///
592	/// // Create dummy arguments
593	/// let idents = args.into_iter().map(\|arg\|
594	/// (arg.to_owned(), EvalResult::Str(b"test".to_vec()))
595	/// ).collect();
596	///
597	/// // Evaluate the macro
598	/// let (_, evaluated) = assert_full_parse(IdentifierParser::new(&idents).expr(expr)).unwrap();
599	/// assert_eq!(evaluated, EvalResult::Str(b"testsuffix".to_vec()));
600	/// ```
601	pub fn fn_macro_declaration(input: &[Token]) -> CResult<'_, (&[u8], Vec<&[u8]>)> {
602	pair(
603	first:identifier_token,
604	second:delimited(
605	first:p("("),
606	second:separated_list0(p(","), identifier_token),
607	third:p(")"),
608	),
609	)(input)
610	}
611