lib.rs source code [crates/datafrog/src/lib.rs]

1	//! A lightweight Datalog engine in Rust
2	//!
3	//! The intended design is that one has static `Relation` types that are sets
4	//! of tuples, and `Variable` types that represent monotonically increasing
5	//! sets of tuples.
6	//!
7	//! The types are mostly wrappers around `Vec<Tuple>` indicating sorted-ness,
8	//! and the intent is that this code can be dropped in the middle of an otherwise
9	//! normal Rust program, run to completion, and then the results extracted as
10	//! vectors again.
11
12	#![forbid(missing_docs)]
13
14	use std::cell::RefCell;
15	use std::cmp::Ordering;
16	use std::iter::FromIterator;
17	use std::rc::Rc;
18
19	mod join;
20	mod map;
21	mod test;
22	mod treefrog;
23	pub use crate::join::JoinInput;
24	pub use crate::treefrog::{
25	extend_anti::ExtendAnti,
26	extend_with::ExtendWith,
27	filter_anti::FilterAnti,
28	filter_with::FilterWith,
29	filters::{PrefixFilter, ValueFilter},
30	Leaper, Leapers, RelationLeaper,
31	};
32
33	/// A static, ordered list of key-value pairs.
34	///
35	/// A relation represents a fixed set of key-value pairs. In many places in a
36	/// Datalog computation we want to be sure that certain relations are not able
37	/// to vary (for example, in antijoins).
38	#[derive(Clone)]
39	pub struct Relation<Tuple: Ord> {
40	/// Sorted list of distinct tuples.
41	pub elements: Vec<Tuple>,
42	}
43
44	impl<Tuple: Ord> Relation<Tuple> {
45	/// Merges two relations into their union.
46	pub fn merge(self, other: Self) -> Self {
47	let Relation {
48	elements: mut elements1,
49	} = self;
50	let Relation {
51	elements: mut elements2,
52	} = other;
53
54	// If one of the element lists is zero-length, we don't need to do any work
55	if elements1.is_empty() {
56	return Relation {
57	elements: elements2,
58	};
59	}
60
61	if elements2.is_empty() {
62	return Relation {
63	elements: elements1,
64	};
65	}
66
67	// Make sure that elements1 starts with the lower element
68	// Will not panic since both collections must have at least 1 element at this point
69	if elements1[`0`] > elements2[`0`] {
70	std::mem::swap(&mut elements1, &mut elements2);
71	}
72
73	// Fast path for when all the new elements are after the exiting ones
74	if elements1[elements1.len() - `1`] < elements2[`0`] {
75	elements1.extend(elements2.into_iter());
76	// println!("fast path");
77	return Relation {
78	elements: elements1,
79	};
80	}
81
82	let mut elements = Vec::with_capacity(elements1.len() + elements2.len());
83	let mut elements1 = elements1.drain(..);
84	let mut elements2 = elements2.drain(..).peekable();
85
86	elements.push(elements1.next().unwrap());
87	if elements.first() == elements2.peek() {
88	elements2.next();
89	}
90
91	for elem in elements1 {
92	while elements2.peek().map(\|x\| x.cmp(&elem)) == Some(Ordering::Less) {
93	elements.push(elements2.next().unwrap());
94	}
95	if elements2.peek().map(\|x\| x.cmp(&elem)) == Some(Ordering::Equal) {
96	elements2.next();
97	}
98	elements.push(elem);
99	}
100
101	// Finish draining second list
102	elements.extend(elements2);
103
104	Relation { elements }
105	}
106
107	/// Creates a `Relation` from the elements of the `iterator`.
108	///
109	/// Same as the `from_iter` method from `std::iter::FromIterator` trait.
110	pub fn from_iter<I>(iterator: I) -> Self
111	where
112	I: IntoIterator<Item = Tuple>,
113	{
114	iterator.into_iter().collect()
115	}
116
117	/// Creates a `Relation` using the `leapjoin` logic;
118	/// see [`Variable::from_leapjoin`]
119	pub fn from_leapjoin<'leap, SourceTuple: Ord, Val: Ord + 'leap>(
120	source: &Relation<SourceTuple>,
121	leapers: impl Leapers<'leap, SourceTuple, Val>,
122	logic: impl FnMut(&SourceTuple, &Val) -> Tuple,
123	) -> Self {
124	treefrog::leapjoin(&source.elements, leapers, logic)
125	}
126
127	/// Creates a `Relation` by joining the values from `input1` and
128	/// `input2` and then applying `logic`. Like
129	/// [`Variable::from_join`] except for use where the inputs are
130	/// not varying across iterations.
131	pub fn from_join<Key: Ord, Val1: Ord, Val2: Ord>(
132	input1: &Relation<(Key, Val1)>,
133	input2: &Relation<(Key, Val2)>,
134	logic: impl FnMut(&Key, &Val1, &Val2) -> Tuple,
135	) -> Self {
136	join::join_into_relation(input1, input2, logic)
137	}
138
139	/// Creates a `Relation` by removing all values from `input1` that
140	/// share a key with `input2`, and then transforming the resulting
141	/// tuples with the `logic` closure. Like
142	/// [`Variable::from_antijoin`] except for use where the inputs
143	/// are not varying across iterations.
144	pub fn from_antijoin<Key: Ord, Val1: Ord>(
145	input1: &Relation<(Key, Val1)>,
146	input2: &Relation<Key>,
147	logic: impl FnMut(&Key, &Val1) -> Tuple,
148	) -> Self {
149	join::antijoin(input1, input2, logic)
150	}
151
152	/// Construct a new relation by mapping another one. Equivalent to
153	/// creating an iterator but perhaps more convenient. Analogous to
154	/// `Variable::from_map`.
155	pub fn from_map<T2: Ord>(input: &Relation<T2>, logic: impl FnMut(&T2) -> Tuple) -> Self {
156	input.iter().map(logic).collect()
157	}
158
159	/// Creates a `Relation` from a vector of tuples.
160	pub fn from_vec(mut elements: Vec<Tuple>) -> Self {
161	elements.sort();
162	elements.dedup();
163	Relation { elements }
164	}
165	}
166
167	impl<Tuple: Ord> From<Vec<Tuple>> for Relation<Tuple> {
168	fn from(iterator: Vec<Tuple>) -> Self {
169	Self::from_vec(elements:iterator)
170	}
171	}
172
173	impl<Tuple: Ord> FromIterator<Tuple> for Relation<Tuple> {
174	fn from_iter<I>(iterator: I) -> Self
175	where
176	I: IntoIterator<Item = Tuple>,
177	{
178	Relation::from_vec(elements:iterator.into_iter().collect())
179	}
180	}
181
182	impl<'tuple, Tuple: 'tuple + Copy + Ord> FromIterator<&'tuple Tuple> for Relation<Tuple> {
183	fn from_iter<I>(iterator: I) -> Self
184	where
185	I: IntoIterator<Item = &'tuple Tuple>,
186	{
187	Relation::from_vec(elements:iterator.into_iter().cloned().collect())
188	}
189	}
190
191	impl<Tuple: Ord> std::ops::Deref for Relation<Tuple> {
192	type Target = [Tuple];
193	fn deref(&self) -> &Self::Target {
194	&self.elements[..]
195	}
196	}
197
198	/// An iterative context for recursive evaluation.
199	///
200	/// An `Iteration` tracks monotonic variables, and monitors their progress.
201	/// It can inform the user if they have ceased changing, at which point the
202	/// computation should be done.
203	pub struct Iteration {
204	variables: Vec<Box<dyn VariableTrait>>,
205	}
206
207	impl Iteration {
208	/// Create a new iterative context.
209	pub fn new() -> Self {
210	Iteration {
211	variables: Vec::new(),
212	}
213	}
214	/// Reports whether any of the monitored variables have changed since
215	/// the most recent call.
216	pub fn changed(&mut self) -> bool {
217	let mut result = `false`;
218	for variable in self.variables.iter_mut() {
219	if variable.changed() {
220	result = `true`;
221	}
222	}
223	result
224	}
225	/// Creates a new named variable associated with the iterative context.
226	pub fn variable<Tuple: Ord + 'static>(&mut self, name: &str) -> Variable<Tuple> {
227	let variable = Variable::new(name);
228	self.variables.push(Box::new(variable.clone()));
229	variable
230	}
231	/// Creates a new named variable associated with the iterative context.
232	///
233	/// This variable will not be maintained distinctly, and may advertise tuples as
234	/// recent multiple times (perhaps unboundedly many times).
235	pub fn variable_indistinct<Tuple: Ord + 'static>(&mut self, name: &str) -> Variable<Tuple> {
236	let mut variable = Variable::new(name);
237	variable.distinct = `false`;
238	self.variables.push(Box::new(variable.clone()));
239	variable
240	}
241	}
242
243	/// A type that can report on whether it has changed.
244	trait VariableTrait {
245	/// Reports whether the variable has changed since it was last asked.
246	fn changed(&mut self) -> bool;
247	}
248
249	/// An monotonically increasing set of `Tuple`s.
250	///
251	/// There are three stages in the lifecycle of a tuple:
252	///
253	/// 1. A tuple is added to `self.to_add`, but is not yet visible externally.
254	/// 2. Newly added tuples are then promoted to `self.recent` for one iteration.
255	/// 3. After one iteration, recent tuples are moved to `self.tuples` for posterity.
256	///
257	/// Each time `self.changed()` is called, the `recent` relation is folded into `tuples`,
258	/// and the `to_add` relations are merged, potentially deduplicated against `tuples`, and
259	/// then made `recent`. This way, across calls to `changed()` all added tuples are in
260	/// `recent` at least once and eventually all are in `tuples`.
261	///
262	/// A `Variable` may optionally be instructed not to de-duplicate its tuples, for reasons
263	/// of performance. Such a variable cannot be relied on to terminate iterative computation,
264	/// and it is important that any cycle of derivations have at least one de-duplicating
265	/// variable on it.
266	pub struct Variable<Tuple: Ord> {
267	/// Should the variable be maintained distinctly.
268	distinct: bool,
269	/// A useful name for the variable.
270	name: String,
271	/// A list of relations whose union are the accepted tuples.
272	pub stable: Rc<RefCell<Vec<Relation<Tuple>>>>,
273	/// A list of recent tuples, still to be processed.
274	pub recent: Rc<RefCell<Relation<Tuple>>>,
275	/// A list of future tuples, to be introduced.
276	to_add: Rc<RefCell<Vec<Relation<Tuple>>>>,
277	}
278
279	// Operator implementations.
280	impl<Tuple: Ord> Variable<Tuple> {
281	/// Adds tuples that result from joining `input1` and `input2` --
282	/// each of the inputs must be a set of (Key, Value) tuples. Both
283	/// `input1` and `input2` must have the same type of key (`K`) but
284	/// they can have distinct value types (`V1` and `V2`
285	/// respectively). The `logic` closure will be invoked for each
286	/// key that appears in both inputs; it is also given the two
287	/// values, and from those it should construct the resulting
288	/// value.
289	///
290	/// Note that `input1` must be a variable, but `input2` can be a
291	/// relation or a variable. Therefore, you cannot join two
292	/// relations with this method. This is not because the result
293	/// would be wrong, but because it would be inefficient: the
294	/// result from such a join cannot vary across iterations (as
295	/// relations are fixed), so you should prefer to invoke `insert`
296	/// on a relation created by `Relation::from_join` instead.
297	///
298	/// # Examples
299	///
300	/// This example starts a collection with the pairs (x, x+1) and (x+1, x) for x in 0 .. 10.
301	/// It then adds pairs (y, z) for which (x, y) and (x, z) are present. Because the initial
302	/// pairs are symmetric, this should result in all pairs (x, y) for x and y in 0 .. 11.
303	///
304	/// ```
305	/// use datafrog::{Iteration, Relation};
306	///
307	/// let mut iteration = Iteration::new();
308	/// let variable = iteration.variable::<(usize, usize)>("source");
309	/// variable.extend((`0` .. `10`).map(\|x\| (x, x + `1`)));
310	/// variable.extend((`0` .. `10`).map(\|x\| (x + `1`, x)));
311	///
312	/// while iteration.changed() {
313	/// variable.from_join(&variable, &variable, \|&key, &val1, &val2\| (val1, val2));
314	/// }
315	///
316	/// let result = variable.complete();
317	/// assert_eq!(result.len(), `121`);
318	/// ```
319	pub fn from_join<'me, K: Ord, V1: Ord, V2: Ord>(
320	&self,
321	input1: &'me Variable<(K, V1)>,
322	input2: impl JoinInput<'me, (K, V2)>,
323	logic: impl FnMut(&K, &V1, &V2) -> Tuple,
324	) {
325	join::join_into(input1, input2, self, logic)
326	}
327
328	/// Adds tuples from `input1` whose key is not present in `input2`.
329	///
330	/// Note that `input1` must be a variable: if you have a relation
331	/// instead, you can use `Relation::from_antijoin` and then
332	/// `Variable::insert`. Note that the result will not vary during
333	/// the iteration.
334	///
335	/// # Examples
336	///
337	/// This example starts a collection with the pairs (x, x+1) for x in 0 .. 10. It then
338	/// adds any pairs (x+1,x) for which x is not a multiple of three. That excludes four
339	/// pairs (for 0, 3, 6, and 9) which should leave us with 16 total pairs.
340	///
341	/// ```
342	/// use datafrog::{Iteration, Relation};
343	///
344	/// let mut iteration = Iteration::new();
345	/// let variable = iteration.variable::<(usize, usize)>("source");
346	/// variable.extend((`0` .. `10`).map(\|x\| (x, x + `1`)));
347	///
348	/// let relation: Relation<_> = (`0` .. `10`).filter(\|x\| x % `3` == `0`).collect();
349	///
350	/// while iteration.changed() {
351	/// variable.from_antijoin(&variable, &relation, \|&key, &val\| (val, key));
352	/// }
353	///
354	/// let result = variable.complete();
355	/// assert_eq!(result.len(), `16`);
356	/// ```
357	pub fn from_antijoin<K: Ord, V: Ord>(
358	&self,
359	input1: &Variable<(K, V)>,
360	input2: &Relation<K>,
361	logic: impl FnMut(&K, &V) -> Tuple,
362	) {
363	self.insert(join::antijoin(input1, input2, logic))
364	}
365
366	/// Adds tuples that result from mapping `input`.
367	///
368	/// # Examples
369	///
370	/// This example starts a collection with the pairs (x, x) for x in 0 .. 10. It then
371	/// repeatedly adds any pairs (x, z) for (x, y) in the collection, where z is the Collatz
372	/// step for y: it is y/2 if y is even, and 3y + 1 if y is odd. This produces all of the*
373	/// pairs (x, y) where x visits y as part of its Collatz journey.
374	///
375	/// ```
376	/// use datafrog::{Iteration, Relation};
377	///
378	/// let mut iteration = Iteration::new();
379	/// let variable = iteration.variable::<(usize, usize)>("source");
380	/// variable.extend((`0` .. `10`).map(\|x\| (x, x)));
381	///
382	/// while iteration.changed() {
383	/// variable.from_map(&variable, \|&(key, val)\|
384	/// if val % `2` == `0` {
385	/// (key, val/`2`)
386	/// }
387	/// else {
388	/// (key, `3`*val + `1`)
389	/// });
390	/// }
391	///
392	/// let result = variable.complete();
393	/// assert_eq!(result.len(), `74`);
394	/// ```
395	pub fn from_map<T2: Ord>(&self, input: &Variable<T2>, logic: impl FnMut(&T2) -> Tuple) {
396	map::map_into(input, self, logic)
397	}
398
399	/// Adds tuples that result from combining `source` with the
400	/// relations given in `leapers`. This operation is very flexible
401	/// and can be used to do a combination of joins and anti-joins.
402	/// The main limitation is that the things being combined must
403	/// consist of one dynamic variable (`source`) and then several
404	/// fixed relations (`leapers`).
405	///
406	/// The idea is as follows:
407	///
408	/// - You will be inserting new tuples that result from joining (and anti-joining)
409	/// some dynamic variable `source` of source tuples (`SourceTuple`)
410	/// with some set of values (of type `Val`).
411	/// - You provide these values by combining `source` with a set of leapers
412	/// `leapers`, each of which is derived from a fixed relation. The `leapers`
413	/// should be either a single leaper (of suitable type) or else a tuple of leapers.
414	/// You can create a leaper in one of two ways:
415	/// - Extension: In this case, you have a relation of type `(K, Val)` for some
416	/// type `K`. You provide a closure that maps from `SourceTuple` to the key
417	/// `K`. If you use `relation.extend_with`, then any `Val` values the
418	/// relation provides will be added to the set of values; if you use
419	/// `extend_anti`, then the `Val` values will be removed.
420	/// - Filtering: In this case, you have a relation of type `K` for some
421	/// type `K` and you provide a closure that maps from `SourceTuple` to
422	/// the key `K`. Filters don't provide values but they remove source
423	/// tuples.
424	/// - Finally, you get a callback `logic` that accepts each `(SourceTuple, Val)`
425	/// that was successfully joined (and not filtered) and which maps to the
426	/// type of this variable.
427	pub fn from_leapjoin<'leap, SourceTuple: Ord, Val: Ord + 'leap>(
428	&self,
429	source: &Variable<SourceTuple>,
430	leapers: impl Leapers<'leap, SourceTuple, Val>,
431	logic: impl FnMut(&SourceTuple, &Val) -> Tuple,
432	) {
433	self.insert(treefrog::leapjoin(&source.recent.borrow(), leapers, logic));
434	}
435	}
436
437	impl<Tuple: Ord> Clone for Variable<Tuple> {
438	fn clone(&self) -> Self {
439	Variable {
440	distinct: self.distinct,
441	name: self.name.clone(),
442	stable: self.stable.clone(),
443	recent: self.recent.clone(),
444	to_add: self.to_add.clone(),
445	}
446	}
447	}
448
449	impl<Tuple: Ord> Variable<Tuple> {
450	fn new(name: &str) -> Self {
451	Variable {
452	distinct: `true`,
453	name: name.to_string(),
454	stable: Rc::new(RefCell::new(Vec::new())),
455	recent: Rc::new(RefCell::new(Vec::new().into())),
456	to_add: Rc::new(RefCell::new(Vec::new())),
457	}
458	}
459
460	/// Inserts a relation into the variable.
461	///
462	/// This is most commonly used to load initial values into a variable.
463	/// it is not obvious that it should be commonly used otherwise, but
464	/// it should not be harmful.
465	pub fn insert(&self, relation: Relation<Tuple>) {
466	if !relation.is_empty() {
467	self.to_add.borrow_mut().push(relation);
468	}
469	}
470
471	/// Extend the variable with values from the iterator.
472	///
473	/// This is most commonly used to load initial values into a variable.
474	/// it is not obvious that it should be commonly used otherwise, but
475	/// it should not be harmful.
476	pub fn extend<T>(&self, iterator: impl IntoIterator<Item = T>)
477	where
478	Relation<Tuple>: FromIterator<T>,
479	{
480	self.insert(iterator.into_iter().collect());
481	}
482
483	/// Consumes the variable and returns a relation.
484	///
485	/// This method removes the ability for the variable to develop, and
486	/// flattens all internal tuples down to one relation. The method
487	/// asserts that iteration has completed, in that `self.recent` and
488	/// `self.to_add` should both be empty.
489	pub fn complete(self) -> Relation<Tuple> {
490	assert!(self.recent.borrow().is_empty());
491	assert!(self.to_add.borrow().is_empty());
492	let mut result: Relation<Tuple> = Vec::new().into();
493	while let Some(batch) = self.stable.borrow_mut().pop() {
494	result = result.merge(batch);
495	}
496	result
497	}
498	}
499
500	impl<Tuple: Ord> VariableTrait for Variable<Tuple> {
501	fn changed(&mut self) -> bool {
502	// 1. Merge self.recent into self.stable.
503	if !self.recent.borrow().is_empty() {
504	let mut recent =
505	::std::mem::replace(&mut (*self.recent.borrow_mut()), Vec::new().into());
506	while self
507	.stable
508	.borrow()
509	.last()
510	.map(\|x\| x.len() <= `2` * recent.len())
511	== Some(`true`)
512	{
513	let last = self.stable.borrow_mut().pop().unwrap();
514	recent = recent.merge(last);
515	}
516	self.stable.borrow_mut().push(recent);
517	}
518
519	// 2. Move self.to_add into self.recent.
520	let to_add = self.to_add.borrow_mut().pop();
521	if let Some(mut to_add) = to_add {
522	while let Some(to_add_more) = self.to_add.borrow_mut().pop() {
523	to_add = to_add.merge(to_add_more);
524	}
525	// 2b. Restrict `to_add` to tuples not in `self.stable`.
526	if self.distinct {
527	for batch in self.stable.borrow().iter() {
528	let mut slice = &batch[..];
529	// Only gallop if the slice is relatively large.
530	if slice.len() > `4` * to_add.elements.len() {
531	to_add.elements.retain(\|x\| {
532	slice = join::gallop(slice, \|y\| y < x);
533	slice.is_empty() \|\| &slice[`0`] != x
534	});
535	} else {
536	to_add.elements.retain(\|x\| {
537	while !slice.is_empty() && &slice[`0`] < x {
538	slice = &slice[`1`..];
539	}
540	slice.is_empty() \|\| &slice[`0`] != x
541	});
542	}
543	}
544	}
545	*self.recent.borrow_mut() = to_add;
546	}
547
548	// let mut total = 0;
549	// for tuple in self.stable.borrow().iter() {
550	// total += tuple.len();
551	// }
552
553	// println!("Variable\t{}\t{}\t{}", self.name, total, self.recent.borrow().len());
554
555	!self.recent.borrow().is_empty()
556	}
557	}
558
559	// impl<Tuple: Ord> Drop for Variable<Tuple> {
560	// fn drop(&mut self) {
561	// let mut total = 0;
562	// for batch in self.stable.borrow().iter() {
563	// total += batch.len();
564	// }
565	// println!("FINAL: {:?}\t{:?}", self.name, total);
566	// }
567	// }
568

Provided by KDAB

Definitions