| 1 | use crate::ast::{Id, Span}; |
| 2 | use anyhow::Result; |
| 3 | use indexmap::IndexMap; |
| 4 | use std::collections::BinaryHeap; |
| 5 | use std::fmt; |
| 6 | use std::mem; |
| 7 | |
| 8 | #[derive (Default, Clone)] |
| 9 | struct State { |
| 10 | /// Number of outbound edges from this node which have still not been |
| 11 | /// processed into the topological ordering. |
| 12 | outbound_remaining: usize, |
| 13 | |
| 14 | /// Indices of nodes that depend on this one, used when this node is added |
| 15 | /// to the binary heap to decrement `outbound_remaining`. |
| 16 | reverse_deps: Vec<usize>, |
| 17 | } |
| 18 | |
| 19 | /// Performs a topological sort of the `deps` provided, returning the order in |
| 20 | /// which to visit the nodes in reverse-dep order. |
| 21 | /// |
| 22 | /// This sort goes one level further as well to produce a stable ordering |
| 23 | /// regardless of the input edges so long as the structure of the graph has |
| 24 | /// changed. Notably the nodes are sorted, by name, in the output in addition to |
| 25 | /// being sorted in dependency order. This is done to assist with round-tripping |
| 26 | /// documents where new edges are discovered during world elaboration that |
| 27 | /// doesn't change the dependency graph but can change the dependency listings |
| 28 | /// between serializations. |
| 29 | /// |
| 30 | /// The algorithm chosen here to do this is: |
| 31 | /// |
| 32 | /// * Build some metadata about all nodes including their count of outbound |
| 33 | /// edges remaining to be added to the order and a reverse dependency list. |
| 34 | /// * Collect all nodes with 0 outbound edges into a binary heap. |
| 35 | /// * Pop from the binary heap and decrement outbound edges that depend on |
| 36 | /// this node. |
| 37 | /// * Iterate until the dependency ordering is the same size as the dependency |
| 38 | /// array. |
| 39 | /// |
| 40 | /// This sort will also detect when dependencies are missing or when cycles are |
| 41 | /// present and return an error. |
| 42 | pub fn toposort<'a>( |
| 43 | kind: &str, |
| 44 | deps: &IndexMap<&'a str, Vec<Id<'a>>>, |
| 45 | ) -> Result<Vec<&'a str>, Error> { |
| 46 | // Initialize a `State` per-node with the number of outbound edges and |
| 47 | // additionally filling out the `reverse_deps` array. |
| 48 | let mut states = vec![State::default(); deps.len()]; |
| 49 | for (i, (_, edges)) in deps.iter().enumerate() { |
| 50 | states[i].outbound_remaining = edges.len(); |
| 51 | for edge in edges { |
| 52 | let (j, _, _) = deps |
| 53 | .get_full(edge.name) |
| 54 | .ok_or_else(|| Error::NonexistentDep { |
| 55 | span: edge.span, |
| 56 | name: edge.name.to_string(), |
| 57 | kind: kind.to_string(), |
| 58 | highlighted: None, |
| 59 | })?; |
| 60 | states[j].reverse_deps.push(i); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | let mut order = Vec::new(); |
| 65 | let mut heap = BinaryHeap::new(); |
| 66 | |
| 67 | // Seed the `heap` with edges that have no outbound edges |
| 68 | // |
| 69 | // The heap here is keyed by `(usize, &str, usize)` where the first `usize` |
| 70 | // is unique which is what determines the order of the heap. The other two |
| 71 | // fields are metadata used when pulling from the heap. The first `usize` is |
| 72 | // the index of the item within the original dependency map which should |
| 73 | // reflect the original source order of the item. Note that this is stored |
| 74 | // in reverse order to ensure that when there are multiple items in the heap |
| 75 | // the first item in the original order is popped first. |
| 76 | for (i, dep) in deps.keys().enumerate() { |
| 77 | if states[i].outbound_remaining == 0 { |
| 78 | heap.push((deps.len() - i, *dep, i)); |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | // Drain the binary heap which represents all nodes that have had all their |
| 83 | // dependencies processed. Iteratively add to the heap as well as nodes are |
| 84 | // removed. |
| 85 | while let Some((_order, node, i)) = heap.pop() { |
| 86 | order.push(node); |
| 87 | for i in mem::take(&mut states[i].reverse_deps) { |
| 88 | states[i].outbound_remaining -= 1; |
| 89 | if states[i].outbound_remaining == 0 { |
| 90 | let (dep, _) = deps.get_index(i).unwrap(); |
| 91 | heap.push((deps.len() - i, *dep, i)); |
| 92 | } |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | // If all nodes are present in order then a topological ordering was |
| 97 | // achieved and it can be returned. |
| 98 | if order.len() == deps.len() { |
| 99 | return Ok(order); |
| 100 | } |
| 101 | |
| 102 | // ... otherwise there are still dependencies with remaining edges which |
| 103 | // means that a cycle must be present, so find the cycle and report the |
| 104 | // error. |
| 105 | for (i, state) in states.iter().enumerate() { |
| 106 | if state.outbound_remaining == 0 { |
| 107 | continue; |
| 108 | } |
| 109 | let (_, edges) = deps.get_index(i).unwrap(); |
| 110 | for dep in edges { |
| 111 | let (j, _, _) = deps.get_full(dep.name).unwrap(); |
| 112 | if states[j].outbound_remaining == 0 { |
| 113 | continue; |
| 114 | } |
| 115 | return Err(Error::Cycle { |
| 116 | span: dep.span, |
| 117 | name: dep.name.to_string(), |
| 118 | kind: kind.to_string(), |
| 119 | highlighted: None, |
| 120 | }); |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | unreachable!() |
| 125 | } |
| 126 | |
| 127 | #[derive (Debug)] |
| 128 | pub enum Error { |
| 129 | NonexistentDep { |
| 130 | span: Span, |
| 131 | name: String, |
| 132 | kind: String, |
| 133 | highlighted: Option<String>, |
| 134 | }, |
| 135 | Cycle { |
| 136 | span: Span, |
| 137 | name: String, |
| 138 | kind: String, |
| 139 | highlighted: Option<String>, |
| 140 | }, |
| 141 | } |
| 142 | |
| 143 | impl Error { |
| 144 | pub(crate) fn highlighted(&self) -> Option<&str> { |
| 145 | match self { |
| 146 | Error::NonexistentDep { highlighted: &Option, .. } | Error::Cycle { highlighted: &Option, .. } => { |
| 147 | highlighted.as_deref() |
| 148 | } |
| 149 | } |
| 150 | } |
| 151 | pub(crate) fn set_highlighted(&mut self, string: String) { |
| 152 | match self { |
| 153 | Error::NonexistentDep { highlighted: &mut Option, .. } | Error::Cycle { highlighted: &mut Option, .. } => { |
| 154 | *highlighted = Some(string); |
| 155 | } |
| 156 | } |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | impl fmt::Display for Error { |
| 161 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 162 | if let Some(s: &str) = self.highlighted() { |
| 163 | return f.write_str(data:s); |
| 164 | } |
| 165 | match self { |
| 166 | Error::NonexistentDep { kind: &String, name: &String, .. } => { |
| 167 | write!(f, " {kind} ` {name}` does not exist" ) |
| 168 | } |
| 169 | Error::Cycle { kind: &String, name: &String, .. } => { |
| 170 | write!(f, " {kind} ` {name}` depends on itself" ) |
| 171 | } |
| 172 | } |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | impl std::error::Error for Error {} |
| 177 | |
| 178 | #[cfg (test)] |
| 179 | mod tests { |
| 180 | use super::*; |
| 181 | |
| 182 | fn id(name: &str) -> Id<'_> { |
| 183 | Id { |
| 184 | name, |
| 185 | span: Span { start: 0, end: 0 }, |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | #[test ] |
| 190 | fn smoke() { |
| 191 | let empty: Vec<&str> = Vec::new(); |
| 192 | assert_eq!(toposort("" , &IndexMap::new()).unwrap(), empty); |
| 193 | |
| 194 | let mut nonexistent = IndexMap::new(); |
| 195 | nonexistent.insert("a" , vec![id("b" )]); |
| 196 | assert!(matches!( |
| 197 | toposort("" , &nonexistent), |
| 198 | Err(Error::NonexistentDep { .. }) |
| 199 | )); |
| 200 | |
| 201 | let mut one = IndexMap::new(); |
| 202 | one.insert("a" , vec![]); |
| 203 | assert_eq!(toposort("" , &one).unwrap(), ["a" ]); |
| 204 | |
| 205 | let mut two = IndexMap::new(); |
| 206 | two.insert("a" , vec![]); |
| 207 | two.insert("b" , vec![id("a" )]); |
| 208 | assert_eq!(toposort("" , &two).unwrap(), ["a" , "b" ]); |
| 209 | |
| 210 | let mut two = IndexMap::new(); |
| 211 | two.insert("a" , vec![id("b" )]); |
| 212 | two.insert("b" , vec![]); |
| 213 | assert_eq!(toposort("" , &two).unwrap(), ["b" , "a" ]); |
| 214 | } |
| 215 | |
| 216 | #[test ] |
| 217 | fn cycles() { |
| 218 | let mut cycle = IndexMap::new(); |
| 219 | cycle.insert("a" , vec![id("a" )]); |
| 220 | assert!(matches!(toposort("" , &cycle), Err(Error::Cycle { .. }))); |
| 221 | |
| 222 | let mut cycle = IndexMap::new(); |
| 223 | cycle.insert("a" , vec![id("b" )]); |
| 224 | cycle.insert("b" , vec![id("c" )]); |
| 225 | cycle.insert("c" , vec![id("a" )]); |
| 226 | assert!(matches!(toposort("" , &cycle), Err(Error::Cycle { .. }))); |
| 227 | } |
| 228 | |
| 229 | #[test ] |
| 230 | fn depend_twice() { |
| 231 | let mut two = IndexMap::new(); |
| 232 | two.insert("b" , vec![id("a" ), id("a" )]); |
| 233 | two.insert("a" , vec![]); |
| 234 | assert_eq!(toposort("" , &two).unwrap(), ["a" , "b" ]); |
| 235 | } |
| 236 | |
| 237 | #[test ] |
| 238 | fn preserve_order() { |
| 239 | let mut order = IndexMap::new(); |
| 240 | order.insert("a" , vec![]); |
| 241 | order.insert("b" , vec![]); |
| 242 | assert_eq!(toposort("" , &order).unwrap(), ["a" , "b" ]); |
| 243 | |
| 244 | let mut order = IndexMap::new(); |
| 245 | order.insert("b" , vec![]); |
| 246 | order.insert("a" , vec![]); |
| 247 | assert_eq!(toposort("" , &order).unwrap(), ["b" , "a" ]); |
| 248 | } |
| 249 | } |
| 250 | |