1 | #![allow (clippy::empty_docs)]
|
2 | pub mod errors;
|
3 | mod macros;
|
4 | pub mod model;
|
5 | mod tests;
|
6 |
|
7 | use crate::parser::errors::JsonPathError;
|
8 | use crate::parser::model::{
|
9 | Comparable, Comparison, Filter, FilterAtom, FnArg, JpQuery, Literal, Segment, Selector,
|
10 | SingularQuery, SingularQuerySegment, Test, TestFunction,
|
11 | };
|
12 |
|
13 | use pest::iterators::Pair;
|
14 | use pest::Parser;
|
15 |
|
16 | #[derive (Parser)]
|
17 | #[grammar = "parser/grammar/json_path_9535.pest" ]
|
18 | pub(super) struct JSPathParser;
|
19 | const MAX_VAL: i64 = 9007199254740991; // Maximum safe integer value in JavaScript
|
20 | const MIN_VAL: i64 = -9007199254740991; // Minimum safe integer value in JavaScript
|
21 |
|
22 | pub(super) type Parsed<T> = Result<T, JsonPathError>;
|
23 |
|
24 | /// Parses a string into a [JsonPath].
|
25 | ///
|
26 | /// # Errors
|
27 | ///
|
28 | /// Returns a variant of [crate::JsonPathParserError] if the parsing operation failed.
|
29 | pub fn parse_json_path(jp_str: &str) -> Parsed<JpQuery> {
|
30 | if jp_str != jp_str.trim() {
|
31 | Err(JsonPathError::InvalidJsonPath(
|
32 | "Leading or trailing whitespaces" .to_string(),
|
33 | ))
|
34 | } else {
|
35 | JSPathParser::parse(Rule::main, jp_str)
|
36 | .map_err(Box::new)?
|
37 | .next()
|
38 | .ok_or(JsonPathError::UnexpectedPestOutput)
|
39 | .and_then(next_down)
|
40 | .and_then(op:jp_query)
|
41 | }
|
42 | }
|
43 |
|
44 | pub fn jp_query(rule: Pair<Rule>) -> Parsed<JpQuery> {
|
45 | Ok(JpQuery::new(segments(rule:next_down(rule)?)?))
|
46 | }
|
47 | pub fn rel_query(rule: Pair<Rule>) -> Parsed<Vec<Segment>> {
|
48 | segments(rule:next_down(rule)?)
|
49 | }
|
50 |
|
51 | pub fn segments(rule: Pair<Rule>) -> Parsed<Vec<Segment>> {
|
52 | let mut segments: Vec = vec![];
|
53 | for r: Pair<'_, {unknown}> in rule.into_inner() {
|
54 | segments.push(segment(child:next_down(rule:r)?)?);
|
55 | }
|
56 | Ok(segments)
|
57 | }
|
58 |
|
59 | pub fn child_segment(rule: Pair<Rule>) -> Parsed<Segment> {
|
60 | match rule.as_rule() {
|
61 | Rule::wildcard_selector => Ok(Segment::Selector(Selector::Wildcard)),
|
62 | Rule::member_name_shorthand => Ok(Segment::name(rule.as_str().trim())),
|
63 | Rule::bracketed_selection => {
|
64 | let mut selectors: Vec = vec![];
|
65 | for r: Pair<'_, {unknown}> in rule.into_inner() {
|
66 | selectors.push(selector(rule:r)?);
|
67 | }
|
68 | if selectors.len() == 1 {
|
69 | Ok(Segment::Selector(
|
70 | selectors
|
71 | .into_iter()
|
72 | .next()
|
73 | .ok_or(err:JsonPathError::empty("selector" ))?,
|
74 | ))
|
75 | } else {
|
76 | Ok(Segment::Selectors(selectors))
|
77 | }
|
78 | }
|
79 | _ => Err(rule.into()),
|
80 | }
|
81 | }
|
82 |
|
83 | pub fn segment(child: Pair<Rule>) -> Parsed<Segment> {
|
84 | match child.as_rule() {
|
85 | Rule::child_segment => {
|
86 | let val = child.as_str().strip_prefix("." ).unwrap_or_default();
|
87 | if val != val.trim_start() {
|
88 | Err(JsonPathError::InvalidJsonPath(format!(
|
89 | "Invalid child segment ` {}`" ,
|
90 | child.as_str()
|
91 | )))
|
92 | } else {
|
93 | child_segment(next_down(child)?)
|
94 | }
|
95 | }
|
96 | Rule::descendant_segment => {
|
97 | if child
|
98 | .as_str()
|
99 | .chars()
|
100 | .nth(2)
|
101 | .ok_or(JsonPathError::empty(child.as_str()))?
|
102 | .is_whitespace()
|
103 | {
|
104 | Err(JsonPathError::InvalidJsonPath(format!(
|
105 | "Invalid descendant segment ` {}`" ,
|
106 | child.as_str()
|
107 | )))
|
108 | } else {
|
109 | Ok(Segment::Descendant(Box::new(child_segment(next_down(
|
110 | child,
|
111 | )?)?)))
|
112 | }
|
113 | }
|
114 | _ => Err(child.into()),
|
115 | }
|
116 | }
|
117 |
|
118 | pub fn selector(rule: Pair<Rule>) -> Parsed<Selector> {
|
119 | let child: Pair<'_, {unknown}> = next_down(rule)?;
|
120 | match child.as_rule() {
|
121 | Rule::name_selector => Ok(Selector::Name(
|
122 | validate_js_str(child.as_str().trim())?.to_string(),
|
123 | )),
|
124 | Rule::wildcard_selector => Ok(Selector::Wildcard),
|
125 | Rule::index_selector => Ok(Selector::Index(validate_range(
|
126 | val:child
|
127 | .as_str()
|
128 | .trim()
|
129 | .parse::<i64>()
|
130 | .map_err(|e: Error<{unknown}>| (e, "wrong integer" ))?,
|
131 | )?)),
|
132 | Rule::slice_selector => {
|
133 | let (start: Option, end: Option, step: Option) = slice_selector(rule:child)?;
|
134 | Ok(Selector::Slice(start, end, step))
|
135 | }
|
136 | Rule::filter_selector => Ok(Selector::Filter(logical_expr(rule:next_down(rule:child)?)?)),
|
137 | _ => Err(child.into()),
|
138 | }
|
139 | }
|
140 |
|
141 | pub fn function_expr(rule: Pair<Rule>) -> Parsed<TestFunction> {
|
142 | let fn_str = rule.as_str();
|
143 | let mut elems = rule.into_inner();
|
144 | let name = elems
|
145 | .next()
|
146 | .map(|e| e.as_str())
|
147 | .ok_or(JsonPathError::empty("function expression" ))?;
|
148 |
|
149 | // Check if the function name is valid namely nothing between the name and the opening parenthesis
|
150 | if fn_str
|
151 | .chars()
|
152 | .nth(name.len())
|
153 | .map(|c| c != '(' )
|
154 | .unwrap_or_default()
|
155 | {
|
156 | Err(JsonPathError::InvalidJsonPath(format!(
|
157 | "Invalid function expression ` {}`" ,
|
158 | fn_str
|
159 | )))
|
160 | } else {
|
161 | let mut args = vec![];
|
162 | for arg in elems {
|
163 | let next = next_down(arg)?;
|
164 | match next.as_rule() {
|
165 | Rule::literal => args.push(FnArg::Literal(literal(next)?)),
|
166 | Rule::test => args.push(FnArg::Test(Box::new(test(next)?))),
|
167 | Rule::logical_expr => args.push(FnArg::Filter(logical_expr(next)?)),
|
168 |
|
169 | _ => return Err(next.into()),
|
170 | }
|
171 | }
|
172 |
|
173 | TestFunction::try_new(name, args)
|
174 | }
|
175 | }
|
176 |
|
177 | pub fn test(rule: Pair<Rule>) -> Parsed<Test> {
|
178 | let child: Pair<'_, {unknown}> = next_down(rule)?;
|
179 | match child.as_rule() {
|
180 | Rule::jp_query => Ok(Test::AbsQuery(jp_query(rule:child)?)),
|
181 | Rule::rel_query => Ok(Test::RelQuery(rel_query(rule:child)?)),
|
182 | Rule::function_expr => Ok(Test::Function(Box::new(function_expr(rule:child)?))),
|
183 | _ => Err(child.into()),
|
184 | }
|
185 | }
|
186 |
|
187 | pub fn logical_expr(rule: Pair<Rule>) -> Parsed<Filter> {
|
188 | let mut ors: Vec = vec![];
|
189 | for r: Pair<'_, {unknown}> in rule.into_inner() {
|
190 | ors.push(logical_expr_and(rule:r)?);
|
191 | }
|
192 | if ors.len() == 1 {
|
193 | Ok(ors
|
194 | .into_iter()
|
195 | .next()
|
196 | .ok_or(err:JsonPathError::empty("logical expression" ))?)
|
197 | } else {
|
198 | Ok(Filter::Or(ors))
|
199 | }
|
200 | }
|
201 |
|
202 | pub fn logical_expr_and(rule: Pair<Rule>) -> Parsed<Filter> {
|
203 | let mut ands: Vec = vec![];
|
204 | for r: Pair<'_, {unknown}> in rule.into_inner() {
|
205 | ands.push(Filter::Atom(filter_atom(pair:r)?));
|
206 | }
|
207 | if ands.len() == 1 {
|
208 | Ok(ands
|
209 | .into_iter()
|
210 | .next()
|
211 | .ok_or(err:JsonPathError::empty("logical expression" ))?)
|
212 | } else {
|
213 | Ok(Filter::And(ands))
|
214 | }
|
215 | }
|
216 |
|
217 | pub fn singular_query_segments(rule: Pair<Rule>) -> Parsed<Vec<SingularQuerySegment>> {
|
218 | let mut segments: Vec = vec![];
|
219 | for r: Pair<'_, {unknown}> in rule.into_inner() {
|
220 | match r.as_rule() {
|
221 | Rule::name_segment => {
|
222 | segments.push(SingularQuerySegment::Name(
|
223 | next_down(rule:r)?.as_str().trim().to_string(),
|
224 | ));
|
225 | }
|
226 | Rule::index_segment => {
|
227 | segments.push(SingularQuerySegment::Index(
|
228 | next_down(r)?
|
229 | .as_str()
|
230 | .trim()
|
231 | .parse::<i64>()
|
232 | .map_err(|e: Error<{unknown}>| (e, "int" ))?,
|
233 | ));
|
234 | }
|
235 | _ => return Err(r.into()),
|
236 | }
|
237 | }
|
238 | Ok(segments)
|
239 | }
|
240 | fn validate_range(val: i64) -> Result<i64, JsonPathError> {
|
241 | if val > MAX_VAL || val < MIN_VAL {
|
242 | Err(JsonPathError::InvalidJsonPath(format!(
|
243 | "Value {} is out of range" ,
|
244 | val
|
245 | )))
|
246 | } else {
|
247 | Ok(val)
|
248 | }
|
249 | }
|
250 | pub fn slice_selector(rule: Pair<Rule>) -> Parsed<(Option<i64>, Option<i64>, Option<i64>)> {
|
251 | let mut start = None;
|
252 | let mut end = None;
|
253 | let mut step = None;
|
254 | let get_int = |r: Pair<Rule>| r.as_str().trim().parse::<i64>().map_err(|e| (e, "int" ));
|
255 |
|
256 | for r in rule.into_inner() {
|
257 | match r.as_rule() {
|
258 | Rule::start => start = Some(validate_range(get_int(r)?)?),
|
259 | Rule::end => end = Some(validate_range(get_int(r)?)?),
|
260 | Rule::step => {
|
261 | step = {
|
262 | if let Some(int) = r.into_inner().next() {
|
263 | Some(validate_range(get_int(int)?)?)
|
264 | } else {
|
265 | None
|
266 | }
|
267 | }
|
268 | }
|
269 |
|
270 | _ => return Err(r.into()),
|
271 | }
|
272 | }
|
273 | Ok((start, end, step))
|
274 | }
|
275 |
|
276 | pub fn singular_query(rule: Pair<Rule>) -> Parsed<SingularQuery> {
|
277 | let query: Pair<'_, {unknown}> = next_down(rule)?;
|
278 | let segments: Vec = singular_query_segments(rule:next_down(rule:query.clone())?)?;
|
279 | match query.as_rule() {
|
280 | Rule::rel_singular_query => Ok(SingularQuery::Current(segments)),
|
281 | Rule::abs_singular_query => Ok(SingularQuery::Root(segments)),
|
282 | _ => Err(query.into()),
|
283 | }
|
284 | }
|
285 |
|
286 | pub fn comp_expr(rule: Pair<Rule>) -> Parsed<Comparison> {
|
287 | let mut children: Pairs<'_, {unknown}> = rule.into_inner();
|
288 |
|
289 | let lhs: Comparable = comparable(rule:children.next().ok_or(err:JsonPathError::empty("comparison" ))?)?;
|
290 | let op: &str = children!
|
291 | .next()
|
292 | .ok_or(JsonPathError::empty("comparison" ))?
|
293 | .as_str();
|
294 | let rhs: Comparable = comparable(rule:children.next().ok_or(JsonPathError::empty("comparison" ))?)?;
|
295 |
|
296 | Comparison::try_new(op, left:lhs, right:rhs)
|
297 | }
|
298 |
|
299 | /// Validates a JSONPath string literal according to RFC 9535
|
300 | /// Control characters (U+0000 through U+001F and U+007F) are not allowed unescaped
|
301 | /// in string literals, whether single-quoted or double-quoted
|
302 | fn validate_js_str(s: &str) -> Parsed<&str> {
|
303 | for (i: usize, c: char) in s.chars().enumerate() {
|
304 | if c <= ' \u{001F}' {
|
305 | return Err(JsonPathError::InvalidJsonPath(format!(
|
306 | "Invalid control character U+ {:04X} at position {} in string literal" ,
|
307 | c as u32, i
|
308 | )));
|
309 | }
|
310 | }
|
311 |
|
312 | Ok(s)
|
313 | }
|
314 |
|
315 | pub fn literal(rule: Pair<Rule>) -> Parsed<Literal> {
|
316 | fn parse_number(num: &str) -> Parsed<Literal> {
|
317 | let num = num.trim();
|
318 |
|
319 | if num.contains('.' ) || num.contains('e' ) || num.contains('E' ) {
|
320 | Ok(Literal::Float(num.parse::<f64>().map_err(|e| (e, num))?))
|
321 | } else {
|
322 | let num = num.trim().parse::<i64>().map_err(|e| (e, num))?;
|
323 | if num > MAX_VAL || num < MIN_VAL {
|
324 | Err(JsonPathError::InvalidNumber(format!(
|
325 | "number out of bounds: {}" ,
|
326 | num
|
327 | )))
|
328 | } else {
|
329 | Ok(Literal::Int(num))
|
330 | }
|
331 | }
|
332 | }
|
333 |
|
334 | fn parse_string(string: &str) -> Parsed<Literal> {
|
335 | let string = validate_js_str(string.trim())?;
|
336 | if string.starts_with(' \'' ) && string.ends_with(' \'' ) {
|
337 | Ok(Literal::String(string[1..string.len() - 1].to_string()))
|
338 | } else if string.starts_with('"' ) && string.ends_with('"' ) {
|
339 | Ok(Literal::String(string[1..string.len() - 1].to_string()))
|
340 | } else {
|
341 | Err(JsonPathError::InvalidJsonPath(format!(
|
342 | "Invalid string literal ` {}`" ,
|
343 | string
|
344 | )))
|
345 | }
|
346 | }
|
347 |
|
348 | let first = next_down(rule)?;
|
349 |
|
350 | match first.as_rule() {
|
351 | Rule::string => parse_string(first.as_str()),
|
352 | Rule::number => parse_number(first.as_str()),
|
353 | Rule::bool => Ok(Literal::Bool(first.as_str().parse::<bool>()?)),
|
354 | Rule::null => Ok(Literal::Null),
|
355 |
|
356 | _ => Err(first.into()),
|
357 | }
|
358 | }
|
359 |
|
360 | pub fn filter_atom(pair: Pair<Rule>) -> Parsed<FilterAtom> {
|
361 | let rule = next_down(pair)?;
|
362 |
|
363 | match rule.as_rule() {
|
364 | Rule::paren_expr => {
|
365 | let mut not = false;
|
366 | let mut logic_expr = None;
|
367 | for r in rule.into_inner() {
|
368 | match r.as_rule() {
|
369 | Rule::not_op => not = true,
|
370 | Rule::logical_expr => logic_expr = Some(logical_expr(r)?),
|
371 | _ => (),
|
372 | }
|
373 | }
|
374 |
|
375 | logic_expr
|
376 | .map(|expr| FilterAtom::filter(expr, not))
|
377 | .ok_or("Logical expression is absent" .into())
|
378 | }
|
379 | Rule::comp_expr => Ok(FilterAtom::cmp(Box::new(comp_expr(rule)?))),
|
380 | Rule::test_expr => {
|
381 | let mut not = false;
|
382 | let mut test_expr = None;
|
383 | for r in rule.into_inner() {
|
384 | match r.as_rule() {
|
385 | Rule::not_op => not = true,
|
386 | Rule::test => test_expr = Some(test(r)?),
|
387 | _ => (),
|
388 | }
|
389 | }
|
390 |
|
391 | test_expr
|
392 | .map(|expr| FilterAtom::test(expr, not))
|
393 | .ok_or("Logical expression is absent" .into())
|
394 | }
|
395 | _ => Err(rule.into()),
|
396 | }
|
397 | }
|
398 |
|
399 | pub fn comparable(rule: Pair<Rule>) -> Parsed<Comparable> {
|
400 | let rule: Pair<'_, {unknown}> = next_down(rule)?;
|
401 | match rule.as_rule() {
|
402 | Rule::literal => Ok(Comparable::Literal(literal(rule)?)),
|
403 | Rule::singular_query => Ok(Comparable::SingularQuery(singular_query(rule)?)),
|
404 | Rule::function_expr => {
|
405 | let tf: TestFunction = function_expr(rule)?;
|
406 | if tf.is_comparable() {
|
407 | Ok(Comparable::Function(tf))
|
408 | } else {
|
409 | Err(JsonPathError::InvalidJsonPath(format!(
|
410 | "Function {} is not comparable" ,
|
411 | tf.to_string()
|
412 | )))
|
413 | }
|
414 | }
|
415 | _ => Err(rule.into()),
|
416 | }
|
417 | }
|
418 |
|
419 | fn next_down(rule: Pair<Rule>) -> Parsed<Pair<Rule>> {
|
420 | let rule_as_str: String = rule.as_str().to_string();
|
421 | rule.into_inner()
|
422 | .next()
|
423 | .ok_or(err:JsonPathError::InvalidJsonPath(rule_as_str))
|
424 | }
|
425 | |