1//! A lightweight Datalog engine in Rust
2//!
3//! The intended design is that one has static `Relation` types that are sets
4//! of tuples, and `Variable` types that represent monotonically increasing
5//! sets of tuples.
6//!
7//! The types are mostly wrappers around `Vec<Tuple>` indicating sorted-ness,
8//! and the intent is that this code can be dropped in the middle of an otherwise
9//! normal Rust program, run to completion, and then the results extracted as
10//! vectors again.
11
12#![forbid(missing_docs)]
13
14use std::cell::RefCell;
15use std::cmp::Ordering;
16use std::iter::FromIterator;
17use std::rc::Rc;
18
19mod join;
20mod map;
21mod test;
22mod treefrog;
23pub use crate::join::JoinInput;
24pub use crate::treefrog::{
25 extend_anti::ExtendAnti,
26 extend_with::ExtendWith,
27 filter_anti::FilterAnti,
28 filter_with::FilterWith,
29 filters::{PrefixFilter, ValueFilter},
30 Leaper, Leapers, RelationLeaper,
31};
32
33/// A static, ordered list of key-value pairs.
34///
35/// A relation represents a fixed set of key-value pairs. In many places in a
36/// Datalog computation we want to be sure that certain relations are not able
37/// to vary (for example, in antijoins).
38#[derive(Clone)]
39pub struct Relation<Tuple: Ord> {
40 /// Sorted list of distinct tuples.
41 pub elements: Vec<Tuple>,
42}
43
44impl<Tuple: Ord> Relation<Tuple> {
45 /// Merges two relations into their union.
46 pub fn merge(self, other: Self) -> Self {
47 let Relation {
48 elements: mut elements1,
49 } = self;
50 let Relation {
51 elements: mut elements2,
52 } = other;
53
54 // If one of the element lists is zero-length, we don't need to do any work
55 if elements1.is_empty() {
56 return Relation {
57 elements: elements2,
58 };
59 }
60
61 if elements2.is_empty() {
62 return Relation {
63 elements: elements1,
64 };
65 }
66
67 // Make sure that elements1 starts with the lower element
68 // Will not panic since both collections must have at least 1 element at this point
69 if elements1[0] > elements2[0] {
70 std::mem::swap(&mut elements1, &mut elements2);
71 }
72
73 // Fast path for when all the new elements are after the exiting ones
74 if elements1[elements1.len() - 1] < elements2[0] {
75 elements1.extend(elements2.into_iter());
76 // println!("fast path");
77 return Relation {
78 elements: elements1,
79 };
80 }
81
82 let mut elements = Vec::with_capacity(elements1.len() + elements2.len());
83 let mut elements1 = elements1.drain(..);
84 let mut elements2 = elements2.drain(..).peekable();
85
86 elements.push(elements1.next().unwrap());
87 if elements.first() == elements2.peek() {
88 elements2.next();
89 }
90
91 for elem in elements1 {
92 while elements2.peek().map(|x| x.cmp(&elem)) == Some(Ordering::Less) {
93 elements.push(elements2.next().unwrap());
94 }
95 if elements2.peek().map(|x| x.cmp(&elem)) == Some(Ordering::Equal) {
96 elements2.next();
97 }
98 elements.push(elem);
99 }
100
101 // Finish draining second list
102 elements.extend(elements2);
103
104 Relation { elements }
105 }
106
107 /// Creates a `Relation` from the elements of the `iterator`.
108 ///
109 /// Same as the `from_iter` method from `std::iter::FromIterator` trait.
110 pub fn from_iter<I>(iterator: I) -> Self
111 where
112 I: IntoIterator<Item = Tuple>,
113 {
114 iterator.into_iter().collect()
115 }
116
117 /// Creates a `Relation` using the `leapjoin` logic;
118 /// see [`Variable::from_leapjoin`]
119 pub fn from_leapjoin<'leap, SourceTuple: Ord, Val: Ord + 'leap>(
120 source: &Relation<SourceTuple>,
121 leapers: impl Leapers<'leap, SourceTuple, Val>,
122 logic: impl FnMut(&SourceTuple, &Val) -> Tuple,
123 ) -> Self {
124 treefrog::leapjoin(&source.elements, leapers, logic)
125 }
126
127 /// Creates a `Relation` by joining the values from `input1` and
128 /// `input2` and then applying `logic`. Like
129 /// [`Variable::from_join`] except for use where the inputs are
130 /// not varying across iterations.
131 pub fn from_join<Key: Ord, Val1: Ord, Val2: Ord>(
132 input1: &Relation<(Key, Val1)>,
133 input2: &Relation<(Key, Val2)>,
134 logic: impl FnMut(&Key, &Val1, &Val2) -> Tuple,
135 ) -> Self {
136 join::join_into_relation(input1, input2, logic)
137 }
138
139 /// Creates a `Relation` by removing all values from `input1` that
140 /// share a key with `input2`, and then transforming the resulting
141 /// tuples with the `logic` closure. Like
142 /// [`Variable::from_antijoin`] except for use where the inputs
143 /// are not varying across iterations.
144 pub fn from_antijoin<Key: Ord, Val1: Ord>(
145 input1: &Relation<(Key, Val1)>,
146 input2: &Relation<Key>,
147 logic: impl FnMut(&Key, &Val1) -> Tuple,
148 ) -> Self {
149 join::antijoin(input1, input2, logic)
150 }
151
152 /// Construct a new relation by mapping another one. Equivalent to
153 /// creating an iterator but perhaps more convenient. Analogous to
154 /// `Variable::from_map`.
155 pub fn from_map<T2: Ord>(input: &Relation<T2>, logic: impl FnMut(&T2) -> Tuple) -> Self {
156 input.iter().map(logic).collect()
157 }
158
159 /// Creates a `Relation` from a vector of tuples.
160 pub fn from_vec(mut elements: Vec<Tuple>) -> Self {
161 elements.sort();
162 elements.dedup();
163 Relation { elements }
164 }
165}
166
167impl<Tuple: Ord> From<Vec<Tuple>> for Relation<Tuple> {
168 fn from(iterator: Vec<Tuple>) -> Self {
169 Self::from_vec(elements:iterator)
170 }
171}
172
173impl<Tuple: Ord> FromIterator<Tuple> for Relation<Tuple> {
174 fn from_iter<I>(iterator: I) -> Self
175 where
176 I: IntoIterator<Item = Tuple>,
177 {
178 Relation::from_vec(elements:iterator.into_iter().collect())
179 }
180}
181
182impl<'tuple, Tuple: 'tuple + Copy + Ord> FromIterator<&'tuple Tuple> for Relation<Tuple> {
183 fn from_iter<I>(iterator: I) -> Self
184 where
185 I: IntoIterator<Item = &'tuple Tuple>,
186 {
187 Relation::from_vec(elements:iterator.into_iter().cloned().collect())
188 }
189}
190
191impl<Tuple: Ord> std::ops::Deref for Relation<Tuple> {
192 type Target = [Tuple];
193 fn deref(&self) -> &Self::Target {
194 &self.elements[..]
195 }
196}
197
198/// An iterative context for recursive evaluation.
199///
200/// An `Iteration` tracks monotonic variables, and monitors their progress.
201/// It can inform the user if they have ceased changing, at which point the
202/// computation should be done.
203pub struct Iteration {
204 variables: Vec<Box<dyn VariableTrait>>,
205}
206
207impl Iteration {
208 /// Create a new iterative context.
209 pub fn new() -> Self {
210 Iteration {
211 variables: Vec::new(),
212 }
213 }
214 /// Reports whether any of the monitored variables have changed since
215 /// the most recent call.
216 pub fn changed(&mut self) -> bool {
217 let mut result = false;
218 for variable in self.variables.iter_mut() {
219 if variable.changed() {
220 result = true;
221 }
222 }
223 result
224 }
225 /// Creates a new named variable associated with the iterative context.
226 pub fn variable<Tuple: Ord + 'static>(&mut self, name: &str) -> Variable<Tuple> {
227 let variable = Variable::new(name);
228 self.variables.push(Box::new(variable.clone()));
229 variable
230 }
231 /// Creates a new named variable associated with the iterative context.
232 ///
233 /// This variable will not be maintained distinctly, and may advertise tuples as
234 /// recent multiple times (perhaps unboundedly many times).
235 pub fn variable_indistinct<Tuple: Ord + 'static>(&mut self, name: &str) -> Variable<Tuple> {
236 let mut variable = Variable::new(name);
237 variable.distinct = false;
238 self.variables.push(Box::new(variable.clone()));
239 variable
240 }
241}
242
243/// A type that can report on whether it has changed.
244trait VariableTrait {
245 /// Reports whether the variable has changed since it was last asked.
246 fn changed(&mut self) -> bool;
247}
248
249/// An monotonically increasing set of `Tuple`s.
250///
251/// There are three stages in the lifecycle of a tuple:
252///
253/// 1. A tuple is added to `self.to_add`, but is not yet visible externally.
254/// 2. Newly added tuples are then promoted to `self.recent` for one iteration.
255/// 3. After one iteration, recent tuples are moved to `self.tuples` for posterity.
256///
257/// Each time `self.changed()` is called, the `recent` relation is folded into `tuples`,
258/// and the `to_add` relations are merged, potentially deduplicated against `tuples`, and
259/// then made `recent`. This way, across calls to `changed()` all added tuples are in
260/// `recent` at least once and eventually all are in `tuples`.
261///
262/// A `Variable` may optionally be instructed not to de-duplicate its tuples, for reasons
263/// of performance. Such a variable cannot be relied on to terminate iterative computation,
264/// and it is important that any cycle of derivations have at least one de-duplicating
265/// variable on it.
266pub struct Variable<Tuple: Ord> {
267 /// Should the variable be maintained distinctly.
268 distinct: bool,
269 /// A useful name for the variable.
270 name: String,
271 /// A list of relations whose union are the accepted tuples.
272 pub stable: Rc<RefCell<Vec<Relation<Tuple>>>>,
273 /// A list of recent tuples, still to be processed.
274 pub recent: Rc<RefCell<Relation<Tuple>>>,
275 /// A list of future tuples, to be introduced.
276 to_add: Rc<RefCell<Vec<Relation<Tuple>>>>,
277}
278
279// Operator implementations.
280impl<Tuple: Ord> Variable<Tuple> {
281 /// Adds tuples that result from joining `input1` and `input2` --
282 /// each of the inputs must be a set of (Key, Value) tuples. Both
283 /// `input1` and `input2` must have the same type of key (`K`) but
284 /// they can have distinct value types (`V1` and `V2`
285 /// respectively). The `logic` closure will be invoked for each
286 /// key that appears in both inputs; it is also given the two
287 /// values, and from those it should construct the resulting
288 /// value.
289 ///
290 /// Note that `input1` must be a variable, but `input2` can be a
291 /// relation or a variable. Therefore, you cannot join two
292 /// relations with this method. This is not because the result
293 /// would be wrong, but because it would be inefficient: the
294 /// result from such a join cannot vary across iterations (as
295 /// relations are fixed), so you should prefer to invoke `insert`
296 /// on a relation created by `Relation::from_join` instead.
297 ///
298 /// # Examples
299 ///
300 /// This example starts a collection with the pairs (x, x+1) and (x+1, x) for x in 0 .. 10.
301 /// It then adds pairs (y, z) for which (x, y) and (x, z) are present. Because the initial
302 /// pairs are symmetric, this should result in all pairs (x, y) for x and y in 0 .. 11.
303 ///
304 /// ```
305 /// use datafrog::{Iteration, Relation};
306 ///
307 /// let mut iteration = Iteration::new();
308 /// let variable = iteration.variable::<(usize, usize)>("source");
309 /// variable.extend((0 .. 10).map(|x| (x, x + 1)));
310 /// variable.extend((0 .. 10).map(|x| (x + 1, x)));
311 ///
312 /// while iteration.changed() {
313 /// variable.from_join(&variable, &variable, |&key, &val1, &val2| (val1, val2));
314 /// }
315 ///
316 /// let result = variable.complete();
317 /// assert_eq!(result.len(), 121);
318 /// ```
319 pub fn from_join<'me, K: Ord, V1: Ord, V2: Ord>(
320 &self,
321 input1: &'me Variable<(K, V1)>,
322 input2: impl JoinInput<'me, (K, V2)>,
323 logic: impl FnMut(&K, &V1, &V2) -> Tuple,
324 ) {
325 join::join_into(input1, input2, self, logic)
326 }
327
328 /// Adds tuples from `input1` whose key is not present in `input2`.
329 ///
330 /// Note that `input1` must be a variable: if you have a relation
331 /// instead, you can use `Relation::from_antijoin` and then
332 /// `Variable::insert`. Note that the result will not vary during
333 /// the iteration.
334 ///
335 /// # Examples
336 ///
337 /// This example starts a collection with the pairs (x, x+1) for x in 0 .. 10. It then
338 /// adds any pairs (x+1,x) for which x is not a multiple of three. That excludes four
339 /// pairs (for 0, 3, 6, and 9) which should leave us with 16 total pairs.
340 ///
341 /// ```
342 /// use datafrog::{Iteration, Relation};
343 ///
344 /// let mut iteration = Iteration::new();
345 /// let variable = iteration.variable::<(usize, usize)>("source");
346 /// variable.extend((0 .. 10).map(|x| (x, x + 1)));
347 ///
348 /// let relation: Relation<_> = (0 .. 10).filter(|x| x % 3 == 0).collect();
349 ///
350 /// while iteration.changed() {
351 /// variable.from_antijoin(&variable, &relation, |&key, &val| (val, key));
352 /// }
353 ///
354 /// let result = variable.complete();
355 /// assert_eq!(result.len(), 16);
356 /// ```
357 pub fn from_antijoin<K: Ord, V: Ord>(
358 &self,
359 input1: &Variable<(K, V)>,
360 input2: &Relation<K>,
361 logic: impl FnMut(&K, &V) -> Tuple,
362 ) {
363 self.insert(join::antijoin(input1, input2, logic))
364 }
365
366 /// Adds tuples that result from mapping `input`.
367 ///
368 /// # Examples
369 ///
370 /// This example starts a collection with the pairs (x, x) for x in 0 .. 10. It then
371 /// repeatedly adds any pairs (x, z) for (x, y) in the collection, where z is the Collatz
372 /// step for y: it is y/2 if y is even, and 3*y + 1 if y is odd. This produces all of the
373 /// pairs (x, y) where x visits y as part of its Collatz journey.
374 ///
375 /// ```
376 /// use datafrog::{Iteration, Relation};
377 ///
378 /// let mut iteration = Iteration::new();
379 /// let variable = iteration.variable::<(usize, usize)>("source");
380 /// variable.extend((0 .. 10).map(|x| (x, x)));
381 ///
382 /// while iteration.changed() {
383 /// variable.from_map(&variable, |&(key, val)|
384 /// if val % 2 == 0 {
385 /// (key, val/2)
386 /// }
387 /// else {
388 /// (key, 3*val + 1)
389 /// });
390 /// }
391 ///
392 /// let result = variable.complete();
393 /// assert_eq!(result.len(), 74);
394 /// ```
395 pub fn from_map<T2: Ord>(&self, input: &Variable<T2>, logic: impl FnMut(&T2) -> Tuple) {
396 map::map_into(input, self, logic)
397 }
398
399 /// Adds tuples that result from combining `source` with the
400 /// relations given in `leapers`. This operation is very flexible
401 /// and can be used to do a combination of joins and anti-joins.
402 /// The main limitation is that the things being combined must
403 /// consist of one dynamic variable (`source`) and then several
404 /// fixed relations (`leapers`).
405 ///
406 /// The idea is as follows:
407 ///
408 /// - You will be inserting new tuples that result from joining (and anti-joining)
409 /// some dynamic variable `source` of source tuples (`SourceTuple`)
410 /// with some set of values (of type `Val`).
411 /// - You provide these values by combining `source` with a set of leapers
412 /// `leapers`, each of which is derived from a fixed relation. The `leapers`
413 /// should be either a single leaper (of suitable type) or else a tuple of leapers.
414 /// You can create a leaper in one of two ways:
415 /// - Extension: In this case, you have a relation of type `(K, Val)` for some
416 /// type `K`. You provide a closure that maps from `SourceTuple` to the key
417 /// `K`. If you use `relation.extend_with`, then any `Val` values the
418 /// relation provides will be added to the set of values; if you use
419 /// `extend_anti`, then the `Val` values will be removed.
420 /// - Filtering: In this case, you have a relation of type `K` for some
421 /// type `K` and you provide a closure that maps from `SourceTuple` to
422 /// the key `K`. Filters don't provide values but they remove source
423 /// tuples.
424 /// - Finally, you get a callback `logic` that accepts each `(SourceTuple, Val)`
425 /// that was successfully joined (and not filtered) and which maps to the
426 /// type of this variable.
427 pub fn from_leapjoin<'leap, SourceTuple: Ord, Val: Ord + 'leap>(
428 &self,
429 source: &Variable<SourceTuple>,
430 leapers: impl Leapers<'leap, SourceTuple, Val>,
431 logic: impl FnMut(&SourceTuple, &Val) -> Tuple,
432 ) {
433 self.insert(treefrog::leapjoin(&source.recent.borrow(), leapers, logic));
434 }
435}
436
437impl<Tuple: Ord> Clone for Variable<Tuple> {
438 fn clone(&self) -> Self {
439 Variable {
440 distinct: self.distinct,
441 name: self.name.clone(),
442 stable: self.stable.clone(),
443 recent: self.recent.clone(),
444 to_add: self.to_add.clone(),
445 }
446 }
447}
448
449impl<Tuple: Ord> Variable<Tuple> {
450 fn new(name: &str) -> Self {
451 Variable {
452 distinct: true,
453 name: name.to_string(),
454 stable: Rc::new(RefCell::new(Vec::new())),
455 recent: Rc::new(RefCell::new(Vec::new().into())),
456 to_add: Rc::new(RefCell::new(Vec::new())),
457 }
458 }
459
460 /// Inserts a relation into the variable.
461 ///
462 /// This is most commonly used to load initial values into a variable.
463 /// it is not obvious that it should be commonly used otherwise, but
464 /// it should not be harmful.
465 pub fn insert(&self, relation: Relation<Tuple>) {
466 if !relation.is_empty() {
467 self.to_add.borrow_mut().push(relation);
468 }
469 }
470
471 /// Extend the variable with values from the iterator.
472 ///
473 /// This is most commonly used to load initial values into a variable.
474 /// it is not obvious that it should be commonly used otherwise, but
475 /// it should not be harmful.
476 pub fn extend<T>(&self, iterator: impl IntoIterator<Item = T>)
477 where
478 Relation<Tuple>: FromIterator<T>,
479 {
480 self.insert(iterator.into_iter().collect());
481 }
482
483 /// Consumes the variable and returns a relation.
484 ///
485 /// This method removes the ability for the variable to develop, and
486 /// flattens all internal tuples down to one relation. The method
487 /// asserts that iteration has completed, in that `self.recent` and
488 /// `self.to_add` should both be empty.
489 pub fn complete(self) -> Relation<Tuple> {
490 assert!(self.recent.borrow().is_empty());
491 assert!(self.to_add.borrow().is_empty());
492 let mut result: Relation<Tuple> = Vec::new().into();
493 while let Some(batch) = self.stable.borrow_mut().pop() {
494 result = result.merge(batch);
495 }
496 result
497 }
498}
499
500impl<Tuple: Ord> VariableTrait for Variable<Tuple> {
501 fn changed(&mut self) -> bool {
502 // 1. Merge self.recent into self.stable.
503 if !self.recent.borrow().is_empty() {
504 let mut recent =
505 ::std::mem::replace(&mut (*self.recent.borrow_mut()), Vec::new().into());
506 while self
507 .stable
508 .borrow()
509 .last()
510 .map(|x| x.len() <= 2 * recent.len())
511 == Some(true)
512 {
513 let last = self.stable.borrow_mut().pop().unwrap();
514 recent = recent.merge(last);
515 }
516 self.stable.borrow_mut().push(recent);
517 }
518
519 // 2. Move self.to_add into self.recent.
520 let to_add = self.to_add.borrow_mut().pop();
521 if let Some(mut to_add) = to_add {
522 while let Some(to_add_more) = self.to_add.borrow_mut().pop() {
523 to_add = to_add.merge(to_add_more);
524 }
525 // 2b. Restrict `to_add` to tuples not in `self.stable`.
526 if self.distinct {
527 for batch in self.stable.borrow().iter() {
528 let mut slice = &batch[..];
529 // Only gallop if the slice is relatively large.
530 if slice.len() > 4 * to_add.elements.len() {
531 to_add.elements.retain(|x| {
532 slice = join::gallop(slice, |y| y < x);
533 slice.is_empty() || &slice[0] != x
534 });
535 } else {
536 to_add.elements.retain(|x| {
537 while !slice.is_empty() && &slice[0] < x {
538 slice = &slice[1..];
539 }
540 slice.is_empty() || &slice[0] != x
541 });
542 }
543 }
544 }
545 *self.recent.borrow_mut() = to_add;
546 }
547
548 // let mut total = 0;
549 // for tuple in self.stable.borrow().iter() {
550 // total += tuple.len();
551 // }
552
553 // println!("Variable\t{}\t{}\t{}", self.name, total, self.recent.borrow().len());
554
555 !self.recent.borrow().is_empty()
556 }
557}
558
559// impl<Tuple: Ord> Drop for Variable<Tuple> {
560// fn drop(&mut self) {
561// let mut total = 0;
562// for batch in self.stable.borrow().iter() {
563// total += batch.len();
564// }
565// println!("FINAL: {:?}\t{:?}", self.name, total);
566// }
567// }
568