1// (C) Copyright 2016 Jethro G. Beekman
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8//! Evaluating C expressions from tokens.
9//!
10//! Numerical operators are supported. All numerical values are treated as
11//! `i64` or `f64`. Type casting is not supported. `i64` are converted to
12//! `f64` when used in conjunction with a `f64`. Right shifts are always
13//! arithmetic shifts.
14//!
15//! The `sizeof` operator is not supported.
16//!
17//! String concatenation is supported, but width prefixes are ignored; all
18//! strings are treated as narrow strings.
19//!
20//! Use the `IdentifierParser` to substitute identifiers found in expressions.
21
22use std::collections::HashMap;
23use std::num::Wrapping;
24use std::ops::{
25 AddAssign, BitAndAssign, BitOrAssign, BitXorAssign, DivAssign, MulAssign, RemAssign, ShlAssign,
26 ShrAssign, SubAssign,
27};
28
29use crate::literal::{self, CChar};
30use crate::token::{Kind as TokenKind, Token};
31use crate::ToCexprResult;
32use nom::branch::alt;
33use nom::combinator::{complete, map, map_opt};
34use nom::multi::{fold_many0, many0, separated_list0};
35use nom::sequence::{delimited, pair, preceded};
36use nom::*;
37
38/// Expression parser/evaluator that supports identifiers.
39#[derive(Debug)]
40pub struct IdentifierParser<'ident> {
41 identifiers: &'ident HashMap<Vec<u8>, EvalResult>,
42}
43#[derive(Copy, Clone)]
44struct PRef<'a>(&'a IdentifierParser<'a>);
45
46/// A shorthand for the type of cexpr expression evaluation results.
47pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error<&'a [Token]>>;
48
49/// The result of parsing a literal or evaluating an expression.
50#[derive(Debug, Clone, PartialEq)]
51#[allow(missing_docs)]
52pub enum EvalResult {
53 Int(Wrapping<i64>),
54 Float(f64),
55 Char(CChar),
56 Str(Vec<u8>),
57 Invalid,
58}
59
60macro_rules! result_opt (
61 (fn $n:ident: $e:ident -> $t:ty) => (
62 #[allow(dead_code)]
63 #[allow(clippy::wrong_self_convention)]
64 fn $n(self) -> Option<$t> {
65 if let EvalResult::$e(v) = self {
66 Some(v)
67 } else {
68 None
69 }
70 }
71 );
72);
73
74impl EvalResult {
75 result_opt!(fn as_int: Int -> Wrapping<i64>);
76 result_opt!(fn as_float: Float -> f64);
77 result_opt!(fn as_char: Char -> CChar);
78 result_opt!(fn as_str: Str -> Vec<u8>);
79
80 #[allow(clippy::wrong_self_convention)]
81 fn as_numeric(self) -> Option<EvalResult> {
82 match self {
83 EvalResult::Int(_) | EvalResult::Float(_) => Some(self),
84 _ => None,
85 }
86 }
87}
88
89impl From<Vec<u8>> for EvalResult {
90 fn from(s: Vec<u8>) -> EvalResult {
91 EvalResult::Str(s)
92 }
93}
94
95// ===========================================
96// ============= Clang tokens ================
97// ===========================================
98
99macro_rules! exact_token (
100 ($k:ident, $c:expr) => ({
101 move |input: &[Token]| {
102 if input.is_empty() {
103 let res: CResult<'_, &[u8]> = Err(crate::nom::Err::Incomplete(Needed::new($c.len())));
104 res
105 } else {
106 if input[0].kind==TokenKind::$k && &input[0].raw[..]==$c {
107 Ok((&input[1..], &input[0].raw[..]))
108 } else {
109 Err(crate::nom::Err::Error((input, crate::ErrorKind::ExactToken(TokenKind::$k,$c)).into()))
110 }
111 }
112 }
113 });
114);
115
116fn identifier_token(input: &[Token]) -> CResult<'_, &[u8]> {
117 if input.is_empty() {
118 let res: CResult<'_, &[u8]> = Err(nom::Err::Incomplete(Needed::new(1)));
119 res
120 } else {
121 if input[0].kind == TokenKind::Identifier {
122 Ok((&input[1..], &input[0].raw[..]))
123 } else {
124 Err(crate::nom::Err::Error((input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into()))
125 }
126 }
127}
128
129fn p(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
130 exact_token!(Punctuation, c.as_bytes())
131}
132
133fn one_of_punctuation(c: &'static [&'static str]) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> {
134 move |input: &[Token]| {
135 if input.is_empty() {
136 let min: usize = c
137 .iter()
138 .map(|opt| opt.len())
139 .min()
140 .expect(msg:"at least one option");
141 Err(crate::nom::Err::Incomplete(Needed::new(min)))
142 } else if input[0].kind == TokenKind::Punctuation
143 && c.iter().any(|opt: &&str| opt.as_bytes() == &input[0].raw[..])
144 {
145 Ok((&input[1..], &input[0].raw[..]))
146 } else {
147 Err(crate::nom::Err::Error(
148 (
149 input,
150 crate::ErrorKind::ExactTokens(TokenKind::Punctuation, c),
151 )
152 .into(),
153 ))
154 }
155 }
156}
157
158// ==================================================
159// ============= Numeric expressions ================
160// ==================================================
161
162impl<'a> AddAssign<&'a EvalResult> for EvalResult {
163 fn add_assign(&mut self, rhs: &'a EvalResult) {
164 use self::EvalResult::*;
165 *self = match (&*self, rhs) {
166 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a + b),
167 (&Float(a: f64), &Int(b: Wrapping)) => Float(a + (b.0 as f64)),
168 (&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 + b),
169 (&Float(a: f64), &Float(b: f64)) => Float(a + b),
170 _ => Invalid,
171 };
172 }
173}
174impl<'a> BitAndAssign<&'a EvalResult> for EvalResult {
175 fn bitand_assign(&mut self, rhs: &'a EvalResult) {
176 use self::EvalResult::*;
177 *self = match (&*self, rhs) {
178 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a & b),
179 _ => Invalid,
180 };
181 }
182}
183impl<'a> BitOrAssign<&'a EvalResult> for EvalResult {
184 fn bitor_assign(&mut self, rhs: &'a EvalResult) {
185 use self::EvalResult::*;
186 *self = match (&*self, rhs) {
187 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a | b),
188 _ => Invalid,
189 };
190 }
191}
192impl<'a> BitXorAssign<&'a EvalResult> for EvalResult {
193 fn bitxor_assign(&mut self, rhs: &'a EvalResult) {
194 use self::EvalResult::*;
195 *self = match (&*self, rhs) {
196 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a ^ b),
197 _ => Invalid,
198 };
199 }
200}
201impl<'a> DivAssign<&'a EvalResult> for EvalResult {
202 fn div_assign(&mut self, rhs: &'a EvalResult) {
203 use self::EvalResult::*;
204 *self = match (&*self, rhs) {
205 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a / b),
206 (&Float(a: f64), &Int(b: Wrapping)) => Float(a / (b.0 as f64)),
207 (&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 / b),
208 (&Float(a: f64), &Float(b: f64)) => Float(a / b),
209 _ => Invalid,
210 };
211 }
212}
213impl<'a> MulAssign<&'a EvalResult> for EvalResult {
214 fn mul_assign(&mut self, rhs: &'a EvalResult) {
215 use self::EvalResult::*;
216 *self = match (&*self, rhs) {
217 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a * b),
218 (&Float(a: f64), &Int(b: Wrapping)) => Float(a * (b.0 as f64)),
219 (&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 * b),
220 (&Float(a: f64), &Float(b: f64)) => Float(a * b),
221 _ => Invalid,
222 };
223 }
224}
225impl<'a> RemAssign<&'a EvalResult> for EvalResult {
226 fn rem_assign(&mut self, rhs: &'a EvalResult) {
227 use self::EvalResult::*;
228 *self = match (&*self, rhs) {
229 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a % b),
230 (&Float(a: f64), &Int(b: Wrapping)) => Float(a % (b.0 as f64)),
231 (&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 % b),
232 (&Float(a: f64), &Float(b: f64)) => Float(a % b),
233 _ => Invalid,
234 };
235 }
236}
237impl<'a> ShlAssign<&'a EvalResult> for EvalResult {
238 fn shl_assign(&mut self, rhs: &'a EvalResult) {
239 use self::EvalResult::*;
240 *self = match (&*self, rhs) {
241 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a << (b.0 as usize)),
242 _ => Invalid,
243 };
244 }
245}
246impl<'a> ShrAssign<&'a EvalResult> for EvalResult {
247 fn shr_assign(&mut self, rhs: &'a EvalResult) {
248 use self::EvalResult::*;
249 *self = match (&*self, rhs) {
250 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a >> (b.0 as usize)),
251 _ => Invalid,
252 };
253 }
254}
255impl<'a> SubAssign<&'a EvalResult> for EvalResult {
256 fn sub_assign(&mut self, rhs: &'a EvalResult) {
257 use self::EvalResult::*;
258 *self = match (&*self, rhs) {
259 (&Int(a: Wrapping), &Int(b: Wrapping)) => Int(a - b),
260 (&Float(a: f64), &Int(b: Wrapping)) => Float(a - (b.0 as f64)),
261 (&Int(a: Wrapping), &Float(b: f64)) => Float(a.0 as f64 - b),
262 (&Float(a: f64), &Float(b: f64)) => Float(a - b),
263 _ => Invalid,
264 };
265 }
266}
267
268fn unary_op(input: (&[u8], EvalResult)) -> Option<EvalResult> {
269 use self::EvalResult::*;
270 assert_eq!(input.0.len(), 1);
271 match (input.0[0], input.1) {
272 (b'+', i: EvalResult) => Some(i),
273 (b'-', Int(i: Wrapping)) => Some(Int(Wrapping(i.0.wrapping_neg()))), // impl Neg for Wrapping not until rust 1.10...
274 (b'-', Float(i: f64)) => Some(Float(-i)),
275 (b'-', _) => unreachable!("non-numeric unary op"),
276 (b'~', Int(i: Wrapping)) => Some(Int(!i)),
277 (b'~', Float(_)) => None,
278 (b'~', _) => unreachable!("non-numeric unary op"),
279 _ => unreachable!("invalid unary op"),
280 }
281}
282
283fn numeric<I: Clone, E: nom::error::ParseError<I>, F>(
284 f: F,
285) -> impl FnMut(I) -> nom::IResult<I, EvalResult, E>
286where
287 F: FnMut(I) -> nom::IResult<I, EvalResult, E>,
288{
289 nom::combinator::map_opt(parser:f, f:EvalResult::as_numeric)
290}
291
292impl<'a> PRef<'a> {
293 fn unary(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
294 alt((
295 delimited(p("("), |i| self.numeric_expr(i), p(")")),
296 numeric(|i| self.literal(i)),
297 numeric(|i| self.identifier(i)),
298 map_opt(
299 pair(one_of_punctuation(&["+", "-", "~"][..]), |i| self.unary(i)),
300 unary_op,
301 ),
302 ))(input)
303 }
304
305 fn mul_div_rem(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
306 let (input, acc) = self.unary(input)?;
307 fold_many0(
308 pair(complete(one_of_punctuation(&["*", "/", "%"][..])), |i| {
309 self.unary(i)
310 }),
311 move || acc.clone(),
312 |mut acc, (op, val): (&[u8], EvalResult)| {
313 match op[0] as char {
314 '*' => acc *= &val,
315 '/' => acc /= &val,
316 '%' => acc %= &val,
317 _ => unreachable!(),
318 };
319 acc
320 },
321 )(input)
322 }
323
324 fn add_sub(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
325 let (input, acc) = self.mul_div_rem(input)?;
326 fold_many0(
327 pair(complete(one_of_punctuation(&["+", "-"][..])), |i| {
328 self.mul_div_rem(i)
329 }),
330 move || acc.clone(),
331 |mut acc, (op, val): (&[u8], EvalResult)| {
332 match op[0] as char {
333 '+' => acc += &val,
334 '-' => acc -= &val,
335 _ => unreachable!(),
336 };
337 acc
338 },
339 )(input)
340 }
341
342 fn shl_shr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
343 let (input, acc) = self.add_sub(input)?;
344 numeric(fold_many0(
345 pair(complete(one_of_punctuation(&["<<", ">>"][..])), |i| {
346 self.add_sub(i)
347 }),
348 move || acc.clone(),
349 |mut acc, (op, val): (&[u8], EvalResult)| {
350 match op {
351 b"<<" => acc <<= &val,
352 b">>" => acc >>= &val,
353 _ => unreachable!(),
354 };
355 acc
356 },
357 ))(input)
358 }
359
360 fn and(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
361 let (input, acc) = self.shl_shr(input)?;
362 numeric(fold_many0(
363 preceded(complete(p("&")), |i| self.shl_shr(i)),
364 move || acc.clone(),
365 |mut acc, val: EvalResult| {
366 acc &= &val;
367 acc
368 },
369 ))(input)
370 }
371
372 fn xor(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
373 let (input, acc) = self.and(input)?;
374 numeric(fold_many0(
375 preceded(complete(p("^")), |i| self.and(i)),
376 move || acc.clone(),
377 |mut acc, val: EvalResult| {
378 acc ^= &val;
379 acc
380 },
381 ))(input)
382 }
383
384 fn or(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
385 let (input, acc) = self.xor(input)?;
386 numeric(fold_many0(
387 preceded(complete(p("|")), |i| self.xor(i)),
388 move || acc.clone(),
389 |mut acc, val: EvalResult| {
390 acc |= &val;
391 acc
392 },
393 ))(input)
394 }
395
396 #[inline(always)]
397 fn numeric_expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
398 self.or(input)
399 }
400}
401
402// =======================================================
403// ============= Literals and identifiers ================
404// =======================================================
405
406impl<'a> PRef<'a> {
407 fn identifier(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
408 match input.split_first() {
409 None => Err(Err::Incomplete(Needed::new(1))),
410 Some((
411 &Token {
412 kind: TokenKind::Identifier,
413 ref raw,
414 },
415 rest,
416 )) => {
417 if let Some(r) = self.identifiers.get(&raw[..]) {
418 Ok((rest, r.clone()))
419 } else {
420 Err(Err::Error(
421 (input, crate::ErrorKind::UnknownIdentifier).into(),
422 ))
423 }
424 }
425 Some(_) => Err(Err::Error(
426 (input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into(),
427 )),
428 }
429 }
430
431 fn literal(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
432 match input.split_first() {
433 None => Err(Err::Incomplete(Needed::new(1))),
434 Some((
435 &Token {
436 kind: TokenKind::Literal,
437 ref raw,
438 },
439 rest,
440 )) => match literal::parse(raw) {
441 Ok((_, result)) => Ok((rest, result)),
442 _ => Err(Err::Error((input, crate::ErrorKind::InvalidLiteral).into())),
443 },
444 Some(_) => Err(Err::Error(
445 (input, crate::ErrorKind::TypedToken(TokenKind::Literal)).into(),
446 )),
447 }
448 }
449
450 fn string(self, input: &'_ [Token]) -> CResult<'_, Vec<u8>> {
451 alt((
452 map_opt(|i| self.literal(i), EvalResult::as_str),
453 map_opt(|i| self.identifier(i), EvalResult::as_str),
454 ))(input)
455 .to_cexpr_result()
456 }
457
458 // "string1" "string2" etc...
459 fn concat_str(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
460 map(
461 pair(|i| self.string(i), many0(complete(|i| self.string(i)))),
462 |(first, v)| {
463 Vec::into_iter(v)
464 .fold(first, |mut s, elem| {
465 Vec::extend_from_slice(&mut s, Vec::<u8>::as_slice(&elem));
466 s
467 })
468 .into()
469 },
470 )(input)
471 .to_cexpr_result()
472 }
473
474 fn expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> {
475 alt((
476 |i| self.numeric_expr(i),
477 delimited(p("("), |i| self.expr(i), p(")")),
478 |i| self.concat_str(i),
479 |i| self.literal(i),
480 |i| self.identifier(i),
481 ))(input)
482 .to_cexpr_result()
483 }
484
485 fn macro_definition(self, input: &'_ [Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
486 pair(identifier_token, |i| self.expr(i))(input)
487 }
488}
489
490impl<'a> ::std::ops::Deref for PRef<'a> {
491 type Target = IdentifierParser<'a>;
492 fn deref(&self) -> &IdentifierParser<'a> {
493 self.0
494 }
495}
496
497impl<'ident> IdentifierParser<'ident> {
498 fn as_ref(&self) -> PRef<'_> {
499 PRef(self)
500 }
501
502 /// Create a new `IdentifierParser` with a set of known identifiers. When
503 /// a known identifier is encountered during parsing, it is substituted
504 /// for the value specified.
505 pub fn new(identifiers: &HashMap<Vec<u8>, EvalResult>) -> IdentifierParser<'_> {
506 IdentifierParser { identifiers }
507 }
508
509 /// Parse and evaluate an expression of a list of tokens.
510 ///
511 /// Returns an error if the input is not a valid expression or if the token
512 /// stream contains comments, keywords or unknown identifiers.
513 pub fn expr<'a>(&self, input: &'a [Token]) -> CResult<'a, EvalResult> {
514 self.as_ref().expr(input)
515 }
516
517 /// Parse and evaluate a macro definition from a list of tokens.
518 ///
519 /// Returns the identifier for the macro and its replacement evaluated as an
520 /// expression. The input should not include `#define`.
521 ///
522 /// Returns an error if the replacement is not a valid expression, if called
523 /// on most function-like macros, or if the token stream contains comments,
524 /// keywords or unknown identifiers.
525 ///
526 /// N.B. This is intended to fail on function-like macros, but if it the
527 /// macro takes a single argument, the argument name is defined as an
528 /// identifier, and the macro otherwise parses as an expression, it will
529 /// return a result even on function-like macros.
530 ///
531 /// ```c
532 /// // will evaluate into IDENTIFIER
533 /// #define DELETE(IDENTIFIER)
534 /// // will evaluate into IDENTIFIER-3
535 /// #define NEGATIVE_THREE(IDENTIFIER) -3
536 /// ```
537 pub fn macro_definition<'a>(&self, input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> {
538 crate::assert_full_parse(self.as_ref().macro_definition(input))
539 }
540}
541
542/// Parse and evaluate an expression of a list of tokens.
543///
544/// Returns an error if the input is not a valid expression or if the token
545/// stream contains comments, keywords or identifiers.
546pub fn expr(input: &[Token]) -> CResult<'_, EvalResult> {
547 IdentifierParser::new(&HashMap::new()).expr(input)
548}
549
550/// Parse and evaluate a macro definition from a list of tokens.
551///
552/// Returns the identifier for the macro and its replacement evaluated as an
553/// expression. The input should not include `#define`.
554///
555/// Returns an error if the replacement is not a valid expression, if called
556/// on a function-like macro, or if the token stream contains comments,
557/// keywords or identifiers.
558pub fn macro_definition(input: &[Token]) -> CResult<'_, (&'_ [u8], EvalResult)> {
559 IdentifierParser::new(&HashMap::new()).macro_definition(input)
560}
561
562/// Parse a functional macro declaration from a list of tokens.
563///
564/// Returns the identifier for the macro and the argument list (in order). The
565/// input should not include `#define`. The actual definition is not parsed and
566/// may be obtained from the unparsed data returned.
567///
568/// Returns an error if the input is not a functional macro or if the token
569/// stream contains comments.
570///
571/// # Example
572/// ```
573/// use cexpr::expr::{IdentifierParser, EvalResult, fn_macro_declaration};
574/// use cexpr::assert_full_parse;
575/// use cexpr::token::Kind::*;
576/// use cexpr::token::Token;
577///
578/// // #define SUFFIX(arg) arg "suffix"
579/// let tokens = vec![
580/// (Identifier, &b"SUFFIX"[..]).into(),
581/// (Punctuation, &b"("[..]).into(),
582/// (Identifier, &b"arg"[..]).into(),
583/// (Punctuation, &b")"[..]).into(),
584/// (Identifier, &b"arg"[..]).into(),
585/// (Literal, &br#""suffix""#[..]).into(),
586/// ];
587///
588/// // Try to parse the functional part
589/// let (expr, (ident, args)) = fn_macro_declaration(&tokens).unwrap();
590/// assert_eq!(ident, b"SUFFIX");
591///
592/// // Create dummy arguments
593/// let idents = args.into_iter().map(|arg|
594/// (arg.to_owned(), EvalResult::Str(b"test".to_vec()))
595/// ).collect();
596///
597/// // Evaluate the macro
598/// let (_, evaluated) = assert_full_parse(IdentifierParser::new(&idents).expr(expr)).unwrap();
599/// assert_eq!(evaluated, EvalResult::Str(b"testsuffix".to_vec()));
600/// ```
601pub fn fn_macro_declaration(input: &[Token]) -> CResult<'_, (&[u8], Vec<&[u8]>)> {
602 pair(
603 first:identifier_token,
604 second:delimited(
605 first:p("("),
606 second:separated_list0(p(","), identifier_token),
607 third:p(")"),
608 ),
609 )(input)
610}
611