vcs-graph-3.5.0/.cargo_vcs_info.json0000644000000001521046102023000127530ustar { "git": { "sha1": "6434cceb45adcc37bc7ce63311017faa7e65e899" }, "path_in_vcs": "crates/graph" }vcs-graph-3.5.0/Cargo.lock0000644000000076741046102023000107460ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "maplit" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "once_cell" version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "portable-atomic" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "proc-macro2" version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" dependencies = [ "libc", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", ] [[package]] name = "pyo3-build-config" version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" dependencies = [ "libc", "pyo3-build-config", ] [[package]] name = "pyo3-macros" version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", "syn", ] [[package]] name = "pyo3-macros-backend" version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", "syn", ] [[package]] name = "quote" version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] [[package]] name = "rustc-hash" version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "syn" version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "target-lexicon" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "unicode-ident" version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "vcs-graph" version = "3.5.0" dependencies = [ "lazy_static", "maplit", "pyo3", "rustc-hash", ] vcs-graph-3.5.0/Cargo.toml0000644000000026151046102023000107570ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "vcs-graph" version = "3.5.0" authors = [ "Martin Packman ", "Jelmer Vernooij ", ] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Graph algorithms for version control systems: topological sort, merge-aware sort, parent maps, and ancestry queries." homepage = "https://www.breezy-vcs.org/" readme = "README.md" keywords = [ "vcs", "graph", "toposort", "dag", "breezy", ] categories = [ "algorithms", "data-structures", ] license = "GPL-2.0-or-later" repository = "https://github.com/breezy-team/vcsgraph" [features] pyo3 = ["dep:pyo3"] [lib] name = "vcs_graph" path = "src/lib.rs" [dependencies.lazy_static] version = "1.4.0" [dependencies.pyo3] version = "=0.28" optional = true [dependencies.rustc-hash] version = "2" [dev-dependencies.maplit] version = "1.0.2" vcs-graph-3.5.0/Cargo.toml.orig000064400000000000000000000013131046102023000144100ustar 00000000000000[package] name = "vcs-graph" version = "3.5.0" authors = [ "Martin Packman ", "Jelmer Vernooij "] edition = "2021" description = "Graph algorithms for version control systems: topological sort, merge-aware sort, parent maps, and ancestry queries." license = "GPL-2.0-or-later" homepage = "https://www.breezy-vcs.org/" repository = "https://github.com/breezy-team/vcsgraph" readme = "README.md" keywords = ["vcs", "graph", "toposort", "dag", "breezy"] categories = ["algorithms", "data-structures"] [lib] [dependencies] lazy_static = "1.4.0" pyo3 = { workspace = true, optional = true } rustc-hash = "2" [dev-dependencies] maplit = "1.0.2" [features] pyo3 = ["dep:pyo3"] vcs-graph-3.5.0/README.md000064400000000000000000000026031046102023000130030ustar 00000000000000# vcs-graph Graph algorithms for version control systems. `vcs-graph` provides building blocks used by version control tools: topological sorting (including merge-aware sorting that preserves branch structure), parent-map manipulation, least common ancestor queries, and related operations. It is the Rust core behind the [`vcsgraph`][vcsgraph-py] Python package, originally extracted from the Breezy version control system. ## Features - `TopoSorter` — iterative topological sort of a parent graph. - `MergeSorter` — merge-aware topological sort that assigns revision numbers and tracks merge depth, suitable for rendering commit history. - `ParentMap` / `ChildMap` — parent/child map types with utilities like `invert_parent_map` and `collapse_linear_regions`. - `ParentsProvider` trait with `DictParentsProvider` and `StackedParentsProvider` implementations. - Optional `pyo3` feature for exposing these types to Python. ## Example ```rust use std::collections::HashMap; use vcs_graph::tsort::TopoSorter; let graph: HashMap<&str, Vec<&str>> = HashMap::from([ ("A", vec![]), ("B", vec!["A"]), ("C", vec!["A"]), ("D", vec!["B", "C"]), ]); let sorted = TopoSorter::new(graph.into_iter()).sorted().unwrap(); // Parents always come before children. ``` ## License GPL-2.0-or-later. See `COPYING.txt` in the repository root. [vcsgraph-py]: https://pypi.org/project/vcsgraph/ vcs-graph-3.5.0/src/bfs.rs000064400000000000000000000722211046102023000134360ustar 00000000000000//! Breadth-first ancestry search. //! //! Ported from the `_BreadthFirstSearcher` class in `vcsgraph/graph.py`. //! The searcher walks the ancestry of a set of revisions, optionally with //! ghosts split out, and supports mid-walk modifications via //! [`BfsState::start_searching`] and [`BfsState::stop_searching_any`]. //! //! The state is decoupled from the parents provider: each advance method //! takes a `&impl ParentsProvider` explicitly. This lets Python bindings //! keep the provider adapter and the state as sibling fields in the same //! pyclass without running into self-reference problems. use crate::{ParentMap, Parents, ParentsProvider}; use rustc_hash::{FxHashMap, FxHashSet}; use std::collections::HashSet; use std::hash::Hash; /// Which kind of result the searcher returned on its most recent call. /// /// Callers can interleave `next` and `next_with_ghosts` calls; the searcher /// transparently advances the underlying state when the mode flips. #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum ReturnMode { /// Most recent return was a plain `next()` — revisions yielded before /// their parents were queried, so ghosts are mixed in with real nodes. Next, /// Most recent return was `next_with_ghosts()` — revisions yielded after /// their parents were queried, so ghosts are split out. NextWithGhosts, } /// Outcome of a `_do_query` step. struct QueryResult { /// Nodes present in the provider's response. found: FxHashSet, /// Nodes not found (ghosts). ghosts: FxHashSet, /// Parents of the found nodes that we haven't seen before. next: FxHashSet, /// The full parent map returned by the provider for the queried keys. parents: FxHashMap>, } /// Mutable state of a breadth-first ancestry search. /// /// Constructed via [`BfsState::new`]; advanced with [`next`](Self::next) or /// [`next_with_ghosts`](Self::next_with_ghosts), which both take a reference /// to a parents provider. Mid-walk mutations go through /// [`start_searching`](Self::start_searching) and /// [`stop_searching_any`](Self::stop_searching_any). pub struct BfsState { /// All revisions the searcher has ever visited (seen or about to visit). pub seen: FxHashSet, /// Revisions the caller originally asked to search from, plus any added /// via `start_searching`. pub started_keys: FxHashSet, /// Revisions the caller explicitly asked to not descend through, plus /// any ghosts encountered. Ghosts are implicit stop points so the search /// can be repeated after ghosts are filled in. pub stopped_keys: FxHashSet, next_query: FxHashSet, current_present: FxHashSet, current_ghosts: FxHashSet, current_parents: FxHashMap>, returning: ReturnMode, iterations: usize, } impl BfsState { /// Start a new search from `revisions`. pub fn new>(revisions: I) -> Self { let next_query: FxHashSet = revisions.into_iter().collect(); let started_keys: FxHashSet = next_query.iter().cloned().collect(); BfsState { seen: FxHashSet::default(), started_keys, stopped_keys: FxHashSet::default(), next_query, current_present: FxHashSet::default(), current_ghosts: FxHashSet::default(), current_parents: FxHashMap::default(), returning: ReturnMode::NextWithGhosts, iterations: 0, } } /// Return the number of iterations performed so far. pub fn iterations(&self) -> usize { self.iterations } /// Borrow the current frontier (next query set). /// /// Exposed so bindings can reflect Python's `_next_query` attribute, /// which existing callers in `graph.py` read (but do not mutate). pub fn next_query(&self) -> &FxHashSet { &self.next_query } /// Snapshot of `(started_keys, excludes, included_keys)` describing what /// the searcher has reached. Matches Python's `get_state` return shape. /// /// This method intentionally calls the provider if the searcher is in /// `Next` mode, since we need the current query's children in order to /// list their parents as excludes. The subsequent iteration advances /// normally; the preview read is backed out of `seen`. pub fn get_state>( &mut self, provider: &P, ) -> (FxHashSet, FxHashSet, FxHashSet) { let next_query = if self.returning == ReturnMode::Next { let result = Self::do_query(&mut self.seen, &self.next_query, provider); // Undo the `seen` updates the preview made. for k in &result.next { self.seen.remove(k); } let mut nq = result.next; nq.extend(result.ghosts); nq } else { self.next_query.clone() }; let mut excludes = self.stopped_keys.clone(); excludes.extend(next_query); let included: FxHashSet = self.seen.difference(&excludes).cloned().collect(); (self.started_keys.clone(), excludes, included) } /// Advance the searcher and return the set yielded by Python's /// `__next__` / `next`. /// /// Each call yields the current query before its parents are queried, /// so ghosts are mixed in with present revisions. /// /// Returns `None` when there is nothing left to search. pub fn next_set>(&mut self, provider: &P) -> Option> { if self.returning != ReturnMode::Next { self.returning = ReturnMode::Next; self.iterations += 1; } else { self.advance(provider); } if self.next_query.is_empty() { return None; } self.seen.extend(self.next_query.iter().cloned()); Some(self.next_query.clone()) } /// Advance the searcher and return `(present, ghosts)` the way Python's /// `next_with_ghosts` does. /// /// Returns `None` when there is nothing left to search. pub fn next_with_ghosts>( &mut self, provider: &P, ) -> Option<(FxHashSet, FxHashSet)> { if self.returning != ReturnMode::NextWithGhosts { self.returning = ReturnMode::NextWithGhosts; self.advance(provider); } if self.next_query.is_empty() { return None; } self.advance(provider); Some((self.current_present.clone(), self.current_ghosts.clone())) } fn advance>(&mut self, provider: &P) { self.iterations += 1; // Split borrow: `do_query` only needs to read `next_query` and // write `seen`, so we pass them as separate references and avoid // cloning `next_query` on every advance. let result = Self::do_query(&mut self.seen, &self.next_query, provider); self.current_present = result.found; self.current_ghosts = result.ghosts; self.next_query = result.next; self.current_parents = result.parents; // Ghosts become implicit stop points. self.stopped_keys .extend(self.current_ghosts.iter().cloned()); } fn do_query>( seen: &mut FxHashSet, revisions: &FxHashSet, provider: &P, ) -> QueryResult { seen.extend(revisions.iter().cloned()); // ParentsProvider takes a std HashSet by reference. let mut std_set: HashSet = HashSet::with_capacity(revisions.len()); for k in revisions { std_set.insert(k.clone()); } let parent_map: ParentMap = provider.get_parent_map(&std_set); let mut found: FxHashSet = FxHashSet::default(); let mut parents_of_found: FxHashSet = FxHashSet::default(); let mut parents_owned: FxHashMap> = FxHashMap::default(); for (rev_id, parents) in parent_map.iter() { found.insert(rev_id.clone()); match parents { Parents::Known(ps) => { parents_owned.insert(rev_id.clone(), ps.clone()); for p in ps { if !seen.contains(p) { parents_of_found.insert(p.clone()); } } } // Python treats None parents as "continue" — no new parents // contributed. Parents::Ghost => {} } } let ghosts: FxHashSet = revisions.difference(&found).cloned().collect(); QueryResult { found, ghosts, next: parents_of_found, parents: parents_owned, } } /// Find already-seen ancestors of `revisions`. /// /// This walks backwards from `revisions` through `seen` keys only, /// querying the provider for parents. It matches the Python behavior: /// nodes not yet searched (in `next_query` when we're in `Next` mode) /// are skipped so we don't probe ahead of the search frontier. pub fn find_seen_ancestors(&self, revisions: I, provider: &P) -> FxHashSet where I: IntoIterator, P: ParentsProvider, { let mut pending: FxHashSet = revisions .into_iter() .filter(|r| self.seen.contains(r)) .collect(); let mut seen_ancestors: FxHashSet = pending.iter().cloned().collect(); // In `Next` mode `seen` contains nodes that have been *returned* but // whose parents haven't been queried yet. Skip those so we don't // probe ahead of the search frontier. let empty: FxHashSet = FxHashSet::default(); let not_searched_yet: &FxHashSet = if self.returning == ReturnMode::Next { &self.next_query } else { &empty }; pending.retain(|k| !not_searched_yet.contains(k)); while !pending.is_empty() { let mut std_set: HashSet = HashSet::with_capacity(pending.len()); for k in &pending { std_set.insert(k.clone()); } let parent_map = provider.get_parent_map(&std_set); let mut all_parents: Vec = Vec::new(); for (_, parents) in parent_map.iter() { if let Parents::Known(ps) = parents { all_parents.extend(ps.iter().cloned()); } } let mut next_pending: FxHashSet = FxHashSet::default(); for p in all_parents { if self.seen.contains(&p) && !seen_ancestors.contains(&p) { next_pending.insert(p); } } seen_ancestors.extend(next_pending.iter().cloned()); next_pending.retain(|k| !not_searched_yet.contains(k)); pending = next_pending; } seen_ancestors } /// Stop searching any of `revisions`. Returns the set of revisions /// actually removed from the current search frontier (not the ones that /// had already passed). pub fn stop_searching_any>(&mut self, revisions: I) -> FxHashSet { let revisions: FxHashSet = revisions.into_iter().collect(); let stopped: FxHashSet = if self.returning == ReturnMode::Next { let stopped: FxHashSet = self.next_query.intersection(&revisions).cloned().collect(); self.next_query.retain(|k| !revisions.contains(k)); stopped } else { let stopped_present: FxHashSet = self .current_present .intersection(&revisions) .cloned() .collect(); let stopped_ghosts: FxHashSet = self .current_ghosts .intersection(&revisions) .cloned() .collect(); let stopped: FxHashSet = stopped_present.union(&stopped_ghosts).cloned().collect(); self.current_present.retain(|k| !revisions.contains(k)); self.current_ghosts.retain(|k| !revisions.contains(k)); // Stopping X should stop returning parents of X — but only if no // other current node still references the same parent. Count // references to each parent from stopped_present, then decrement // for each non-stopped reference. let mut stop_rev_references: FxHashMap = FxHashMap::default(); for rev in &stopped_present { if let Some(parents) = self.current_parents.get(rev) { for parent_id in parents { *stop_rev_references.entry(parent_id.clone()).or_insert(0) += 1; } } } for parents in self.current_parents.values() { for parent_id in parents { if let Some(count) = stop_rev_references.get_mut(parent_id) { *count -= 1; } } } let stop_parents: FxHashSet = stop_rev_references .into_iter() .filter_map(|(k, refs)| if refs == 0 { Some(k) } else { None }) .collect(); self.next_query.retain(|k| !stop_parents.contains(k)); stopped }; self.stopped_keys.extend(stopped.iter().cloned()); self.stopped_keys.extend(revisions); stopped } /// Add more revisions to the search. /// /// In `NextWithGhosts` mode this performs an immediate query on the new /// revisions and returns `Some((present, ghosts))`. In `Next` mode the /// new revisions join the current query without a provider call and the /// function returns `None`. pub fn start_searching( &mut self, revisions: I, provider: &P, ) -> Option<(FxHashSet, FxHashSet)> where I: IntoIterator, P: ParentsProvider, { let revisions: FxHashSet = revisions.into_iter().collect(); self.started_keys.extend(revisions.iter().cloned()); let new_revisions: FxHashSet = revisions.difference(&self.seen).cloned().collect(); if self.returning == ReturnMode::Next { self.next_query.extend(new_revisions.iter().cloned()); self.seen.extend(new_revisions); None } else { let result = Self::do_query(&mut self.seen, &revisions, provider); self.stopped_keys.extend(result.ghosts.iter().cloned()); self.current_present.extend(result.found.iter().cloned()); self.current_ghosts.extend(result.ghosts.iter().cloned()); self.next_query.extend(result.next); for (k, v) in result.parents { self.current_parents.insert(k, v); } Some((result.found, result.ghosts)) } } } #[cfg(test)] mod tests { use super::*; use crate::DictParentsProvider; use std::collections::HashMap; fn provider(edges: &[(&'static str, &[&'static str])]) -> DictParentsProvider<&'static str> { let map: HashMap<&'static str, Vec<&'static str>> = edges.iter().map(|(k, ps)| (*k, ps.to_vec())).collect(); DictParentsProvider::from(map) } fn as_set(xs: [&'static str; N]) -> FxHashSet<&'static str> { xs.into_iter().collect() } #[test] fn next_walks_linear() { // a <- b <- c let p = provider(&[("a", &[]), ("b", &["a"]), ("c", &["b"])]); let mut s = BfsState::new(["c"]); assert_eq!(s.next_set(&p), Some(as_set(["c"]))); assert_eq!(s.next_set(&p), Some(as_set(["b"]))); assert_eq!(s.next_set(&p), Some(as_set(["a"]))); assert_eq!(s.next_set(&p), None); } #[test] fn next_with_ghosts_splits() { // head -> present -> (child, ghost); child has no parents; ghost missing let p = provider(&[ ("head", &["present"]), ("present", &["child", "ghost"]), ("child", &[]), ]); let mut s = BfsState::new(["head"]); assert_eq!(s.next_with_ghosts(&p), Some((as_set(["head"]), as_set([])))); assert_eq!( s.next_with_ghosts(&p), Some((as_set(["present"]), as_set([]))) ); assert_eq!( s.next_with_ghosts(&p), Some((as_set(["child"]), as_set(["ghost"]))) ); assert_eq!(s.next_with_ghosts(&p), None); } #[test] fn next_mode_mixes_ghosts_in_with_present() { // Same graph as above, but via next() — ghost should appear alongside child. let p = provider(&[ ("head", &["present"]), ("present", &["child", "ghost"]), ("child", &[]), ]); let mut s = BfsState::new(["head"]); assert_eq!(s.next_set(&p), Some(as_set(["head"]))); assert_eq!(s.next_set(&p), Some(as_set(["present"]))); assert_eq!(s.next_set(&p), Some(as_set(["child", "ghost"]))); assert_eq!(s.next_set(&p), None); } #[test] fn stop_searching_any_next_mode() { // In Next mode, `next_query` holds the set just returned by `next()` // (since the caller is given the query, not the results). So stopping // the set that was just yielded removes it from the frontier. let p = provider(&[ ("head", &["present"]), ("present", &["stopped"]), ("stopped", &[]), ]); let mut s = BfsState::new(["head"]); assert_eq!(s.next_set(&p), Some(as_set(["head"]))); assert_eq!(s.next_set(&p), Some(as_set(["present"]))); let stopped = s.stop_searching_any(["present"]); assert_eq!(stopped, as_set(["present"])); // With `present` stopped before its parents are queried, the search // is now exhausted. assert_eq!(s.next_set(&p), None); } #[test] fn start_searching_next_with_ghosts_queries_immediately() { let p = provider(&[("new_root", &["its_parent"]), ("its_parent", &[])]); let mut s: BfsState<&'static str> = BfsState::new([] as [&'static str; 0]); let (found, ghosts) = s.start_searching(["new_root", "ghost"], &p).unwrap(); assert!(found.contains(&"new_root")); assert!(ghosts.contains(&"ghost")); } /// Translated from `test_breadth_first_search_start_ghosts` in /// `vcsgraph/tests/test_graph.py`: starting with only a ghost, the first /// step yields just the ghost and then the search is exhausted. #[test] fn start_with_only_a_ghost() { let p = provider(&[("a-ghost", &[])]); let mut s = BfsState::new(["a-ghost"]); assert_eq!(s.next_set(&p), Some(as_set(["a-ghost"]))); assert_eq!(s.next_set(&p), None); } /// Translated from `test_breadth_first_change_search`: stop the current /// frontier, start a new search from an unrelated revision, and /// verify the BFS picks up the new revision's ancestors. #[test] fn change_search_via_stop_and_start() { let p = provider(&[ ("head", &["present"]), ("present", &["stopped"]), ("stopped", &[]), ("other", &["other_2"]), ("other_2", &[]), ]); let mut s = BfsState::new(["head"]); assert_eq!(s.next_with_ghosts(&p), Some((as_set(["head"]), as_set([])))); assert_eq!( s.next_with_ghosts(&p), Some((as_set(["present"]), as_set([]))) ); assert_eq!(s.stop_searching_any(["present"]), as_set(["present"])); let (present, ghosts) = s.start_searching(["other", "other_ghost"], &p).unwrap(); assert_eq!(present, as_set(["other"])); assert_eq!(ghosts, as_set(["other_ghost"])); assert_eq!( s.next_with_ghosts(&p), Some((as_set(["other_2"]), as_set([]))) ); assert_eq!(s.next_with_ghosts(&p), None); } const NULL: &str = "null:"; /// Mirrors `test_breadth_first_search_change_next_to_next_with_ghosts`: /// interleave `next()` and `next_with_ghosts()` on the same searcher /// and verify both modes produce sensible values. #[test] fn change_next_to_next_with_ghosts() { let p = provider(&[ ("head", &["present"]), ("present", &["child", "ghost"]), ("child", &[]), ]); let mut s = BfsState::new(["head"]); assert_eq!(s.next_with_ghosts(&p), Some((as_set(["head"]), as_set([])))); assert_eq!(s.next_set(&p), Some(as_set(["present"]))); assert_eq!( s.next_with_ghosts(&p), Some((as_set(["child"]), as_set(["ghost"]))) ); assert_eq!(s.next_set(&p), None); // Symmetric: start with next(), switch to next_with_ghosts(). let mut s = BfsState::new(["head"]); assert_eq!(s.next_set(&p), Some(as_set(["head"]))); assert_eq!( s.next_with_ghosts(&p), Some((as_set(["present"]), as_set([]))) ); assert_eq!(s.next_set(&p), Some(as_set(["child", "ghost"]))); assert_eq!(s.next_with_ghosts(&p), None); } /// Mirrors `test_breadth_first_get_result_excludes_current_pending`: /// at the start, nothing is seen; after each advance, `get_state()` /// reports the started keys, the excluded set, and the included /// (fully explored) set. #[test] fn get_state_excludes_current_pending() { let p = provider(&[("head", &["child"]), ("child", &[NULL]), (NULL, &[])]); let mut s = BfsState::new(["head"]); let (started, excludes, included) = s.get_state(&p); assert_eq!(started, as_set(["head"])); assert_eq!(excludes, as_set(["head"])); assert_eq!(included, as_set([])); assert_eq!(s.seen, as_set([])); // After next: head is yielded, still excluded because child is // the next frontier. s.next_set(&p); let (_, excludes, included) = s.get_state(&p); assert_eq!(excludes, as_set(["child"])); assert_eq!(included, as_set(["head"])); assert_eq!(s.seen, as_set(["head"])); // After child: null is the next frontier. s.next_set(&p); let (_, excludes, included) = s.get_state(&p); assert_eq!(excludes, as_set([NULL])); assert_eq!(included, as_set(["head", "child"])); // After null: nothing left in the frontier. s.next_set(&p); let (_, excludes, included) = s.get_state(&p); assert_eq!(excludes, as_set([])); assert_eq!(included, as_set(["head", "child", NULL])); } /// Mirrors `test_breadth_first_stop_searching_not_queried`: a client /// may tell the searcher to stop a key, and stopped_keys records it /// for later exclusion from the result's included-set. #[test] fn stop_searching_records_stops() { let p = provider(&[ ("head", &["child", "ghost1"]), ("child", &[NULL]), (NULL, &[]), ]); let mut s = BfsState::new(["head"]); s.next_set(&p); // yields head s.stop_searching_any([NULL, "ghost1"]); // The stopped keys are in stopped_keys regardless of whether // they've been visited yet. assert!(s.stopped_keys.contains(&NULL)); assert!(s.stopped_keys.contains(&"ghost1")); // get_state() should exclude the stopped keys from the // "included" snapshot. let (_, excludes, included) = s.get_state(&p); assert!(excludes.contains(&NULL)); assert!(excludes.contains(&"ghost1")); assert!(!included.contains(&NULL)); assert!(!included.contains(&"ghost1")); } /// Mirrors `test_breadth_first_stop_searching_late`: stopping a key /// from an older iteration should still exclude it from the result. #[test] fn stop_searching_late() { let p = provider(&[ ("head", &["middle"]), ("middle", &["child"]), ("child", &[NULL]), (NULL, &[]), ]); let mut s = BfsState::new(["head"]); s.next_set(&p); // yields head s.next_set(&p); // yields middle s.next_set(&p); // yields child // Now stop both middle and child retroactively. s.stop_searching_any(["middle", "child"]); assert!(s.stopped_keys.contains(&"middle")); assert!(s.stopped_keys.contains(&"child")); // After the stop, the remaining state reflects that only the // original head is included. let (_, excludes, included) = s.get_state(&p); assert!(excludes.contains(&"middle")); assert!(excludes.contains(&"child")); assert_eq!(included, as_set(["head"])); } /// Mirrors `test_breadth_first_get_result_starting_a_ghost_ghost_is_excluded`: /// start_searching a ghost key mid-walk. The ghost is recorded in seen /// but gets filed under stopped_keys so it is excluded from included(). #[test] fn start_searching_a_ghost_excludes_it() { let p = provider(&[("head", &["child"]), ("child", &[NULL]), (NULL, &[])]); let mut s = BfsState::new(["head"]); // Start-searching a ghost while in next_with_ghosts mode (the // default after construction). This returns (present, ghosts). let (present, ghosts) = s.start_searching(["ghost"], &p).unwrap(); assert_eq!(present, as_set([])); assert_eq!(ghosts, as_set(["ghost"])); // ghost is now in stopped_keys so included() doesn't report it. assert!(s.stopped_keys.contains(&"ghost")); } /// Mirrors `test_breadth_first_revision_count_includes_NULL_REVISION`: /// walking to the sentinel should count it as part of `seen`. #[test] fn walk_includes_null_revision() { let p = provider(&[("head", &[NULL]), (NULL, &[])]); let mut s = BfsState::new(["head"]); s.next_set(&p); // yields head s.next_set(&p); // yields null assert_eq!(s.seen, as_set(["head", NULL])); assert_eq!(s.next_set(&p), None); } /// Mirrors `test_breadth_first_search_get_result_after_StopIteration`: /// hitting StopIteration should not invalidate the searcher; a /// subsequent get_state() still works. #[test] fn get_state_after_stop_iteration() { let p = provider(&[("head", &[NULL]), (NULL, &[])]); let mut s = BfsState::new(["head"]); while s.next_set(&p).is_some() {} // No more to yield. assert_eq!(s.next_set(&p), None); let (started, _excludes, included) = s.get_state(&p); assert_eq!(started, as_set(["head"])); assert!(included.contains(&"head")); assert!(included.contains(&NULL)); } /// find_seen_ancestors should walk the parent chain and collect all /// ancestors already in `seen` — not new ones. #[test] fn find_seen_ancestors_walks_seen_chain() { let p = provider(&[ ("head", &["middle"]), ("middle", &["child"]), ("child", &[NULL]), (NULL, &[]), ]); let mut s = BfsState::new(["head"]); // Walk the whole thing. while s.next_set(&p).is_some() {} // Ask for ancestors of "middle" — should find middle, child, null. let anc = s.find_seen_ancestors(["middle"], &p); assert!(anc.contains(&"middle")); assert!(anc.contains(&"child")); assert!(anc.contains(&NULL)); } /// find_seen_ancestors should filter out keys not in seen. #[test] fn find_seen_ancestors_skips_unseen() { let p = provider(&[("head", &[NULL]), (NULL, &[]), ("unrelated", &[])]); let mut s = BfsState::new(["head"]); s.next_set(&p); // yields head let anc = s.find_seen_ancestors(["unrelated"], &p); // "unrelated" isn't in seen, so find_seen_ancestors returns an // empty set for it. assert!(!anc.contains(&"unrelated")); } /// stop_searching_any should return only the keys that were actually /// removed from the current frontier (not keys that had already been /// processed). #[test] fn stop_searching_any_returns_only_effective_stops() { let p = provider(&[("head", &["a"]), ("a", &["b"]), ("b", &[])]); let mut s = BfsState::new(["head"]); s.next_set(&p); // yields head s.next_set(&p); // yields a // `head` is already returned; stopping it should report it as // stopped but a no longer in frontier means only keys that are in // next_query at stop time get returned. let stopped = s.stop_searching_any(["a"]); assert_eq!(stopped, as_set(["a"])); // After stopping a, the search is exhausted. assert_eq!(s.next_set(&p), None); } /// Starting an already-seen key should be a no-op on `seen` and the /// key should not be re-queried. #[test] fn start_searching_already_seen_is_noop() { let p = provider(&[("head", &["child"]), ("child", &[])]); let mut s = BfsState::new(["head"]); s.next_set(&p); // yields head, frontier now contains "child" let pre_seen = s.seen.clone(); s.start_searching(["head"], &p); // seen should be unchanged (head was already there). assert_eq!(s.seen, pre_seen); } } vcs-graph-3.5.0/src/graph.rs000064400000000000000000002446021046102023000137710ustar 00000000000000//! Incremental graph queries backed by a [`ParentsProvider`]. //! //! Ported incrementally from `vcsgraph/graph.py`. Phase 1 covers the trivial //! methods that don't need a BFS searcher: parent/child map queries, //! topological ordering (delegated to [`crate::tsort::TopoSorter`]), and the //! left-hand ancestry walks. use crate::bfs::BfsState; use crate::parents_provider::{DictParentsProvider, ParentsProvider}; use crate::tsort::TopoSorter; use crate::{Error, ParentMap, Parents}; use rustc_hash::{FxHashMap, FxHashSet}; use std::collections::{BTreeMap, HashMap}; use std::hash::Hash; /// A revision graph backed by an arbitrary [`ParentsProvider`]. /// /// Unlike [`crate::KnownGraph`] this type does not own the full ancestry — /// queries are dispatched to the provider on demand. pub struct Graph where K: Hash + Eq + Clone, P: ParentsProvider, { provider: P, _marker: std::marker::PhantomData, } /// An error returned from one of `Graph`'s traversal methods. #[derive(Debug, Clone, PartialEq, Eq)] pub enum GraphError { /// A revision reachable via the ancestry walk turned out to be a ghost, /// so we cannot compute a revno for it. GhostRevision { target: K, ghost: K }, /// A revision was not known to the provider at all. RevisionNotPresent(K), /// A cycle was detected during a topological walk. Cycle(Vec), } impl std::fmt::Display for GraphError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { GraphError::GhostRevision { target, ghost } => write!( f, "ghost revision {ghost} reached while finding revno for {target}" ), GraphError::RevisionNotPresent(key) => { write!(f, "revision {key} not present in graph") } GraphError::Cycle(nodes) => { write!(f, "cycle detected: ")?; for (i, n) in nodes.iter().enumerate() { if i > 0 { write!(f, " -> ")?; } write!(f, "{n}")?; } Ok(()) } } } } impl std::error::Error for GraphError {} impl Graph where K: Hash + Eq + Clone, P: ParentsProvider, { /// Construct a new `Graph` backed by `provider`. pub fn new(provider: P) -> Self { Graph { provider, _marker: std::marker::PhantomData, } } /// Borrow the underlying parents provider. pub fn parents_provider(&self) -> &P { &self.provider } /// Return a parent map for `keys`. Missing keys are omitted; ghosts are /// reported as [`Parents::Ghost`]. pub fn get_parent_map(&self, keys: I) -> ParentMap where I: IntoIterator, { // ParentsProvider takes a std HashSet; collect directly into one // rather than going via FxHashSet and copying. let set: std::collections::HashSet = keys.into_iter().collect(); self.provider.get_parent_map(&set) } /// Return a mapping from parent → children for the requested keys. /// /// This is the inversion of [`get_parent_map`](Self::get_parent_map); /// only the supplied `keys` are considered as potential children. Ghosts /// are skipped. The children lists are sorted (by insertion order driven /// by the BTreeMap iteration) to match Python's `sorted()` behavior. pub fn get_child_map(&self, keys: I) -> BTreeMap> where K: Ord, I: IntoIterator, { let parent_map = self.get_parent_map(keys); // Walk children in sorted order so parent→children lists mirror the // Python implementation's sorted() iteration. let mut sorted: BTreeMap> = BTreeMap::new(); for (k, v) in parent_map { sorted.insert(k, v); } let mut result: BTreeMap> = BTreeMap::new(); for (child, parents) in sorted { if let Parents::Known(ps) = parents { for parent in ps { result.entry(parent).or_default().push(child.clone()); } } } result } /// Iterate over the ancestry of `revision_ids` in topological order. /// /// This delegates to [`TopoSorter`]. The topological order only ensures /// that parents come before children within the ancestry that is /// reachable from the input revisions. pub fn iter_topo_order(&self, revisions: I) -> Result, Error> where K: std::fmt::Debug, I: IntoIterator, { let pm = self.get_parent_map(revisions); let iter = pm.into_iter().filter_map(|(k, parents)| match parents { Parents::Known(ps) => Some((k, ps)), Parents::Ghost => None, }); TopoSorter::new(iter).sorted() } /// Walk the left-hand ancestry of `start_key`, stopping when a key in /// `stop_keys` is encountered. Yields `start_key` first, then its /// left-most parent, and so on. /// /// Errors with [`GraphError::RevisionNotPresent`] if a key in the walk is /// missing from the provider. pub fn iter_lefthand_ancestry( &self, start_key: K, stop_keys: S, ) -> Result, GraphError> where S: IntoIterator, { let stop_keys: FxHashSet = stop_keys.into_iter().collect(); let mut result = Vec::new(); let mut next_key = start_key; loop { if stop_keys.contains(&next_key) { return Ok(result); } let pm = self.get_parent_map(std::iter::once(next_key.clone())); let parents = match pm.get(&next_key) { Some(Parents::Known(ps)) => ps.clone(), Some(Parents::Ghost) => { return Err(GraphError::RevisionNotPresent(next_key)); } None => return Err(GraphError::RevisionNotPresent(next_key)), }; result.push(next_key.clone()); if parents.is_empty() { return Ok(result); } next_key = parents.into_iter().next().unwrap(); } } /// Iterate over the ancestry reachable from `revision_ids`, yielding /// `(key, parents)` pairs in a BFS order. Ghosts are yielded with /// `Parents::Ghost`. pub fn iter_ancestry(&self, revision_ids: I) -> Vec<(K, Parents)> where I: IntoIterator, { let mut pending: FxHashSet = revision_ids.into_iter().collect(); let mut processed: FxHashSet = FxHashSet::default(); let mut out: Vec<(K, Parents)> = Vec::new(); while !pending.is_empty() { processed.extend(pending.iter().cloned()); let next_map = self.get_parent_map(pending.iter().cloned()); let mut next_pending: FxHashSet = FxHashSet::default(); let mut seen_in_map: FxHashSet = FxHashSet::default(); for (k, parents) in next_map.iter() { seen_in_map.insert(k.clone()); if let Parents::Known(ps) = parents { for p in ps { if !processed.contains(p) { next_pending.insert(p.clone()); } } } out.push((k.clone(), parents.clone())); } // Keys in `pending` that the provider didn't return are ghosts. for ghost in pending.difference(&seen_in_map) { out.push((ghost.clone(), Parents::Ghost)); } pending = next_pending; } out } /// Find the left-hand distance from `target_revision_id` to the origin. /// /// `known_distances` is an iterable of `(revision_id, distance)` pairs /// that seed the search. The origin sentinel (any key equal to `null`, /// supplied by the caller) should be included with distance 0. /// /// This mirrors Python's `find_distance_to_null`, which hard-codes the /// sentinel `NULL_REVISION = b"null:"`. Keeping the sentinel Python-side /// lets the Rust core stay string-typed without baking in bytes. pub fn find_distance_to_null( &self, target_revision_id: K, known_distances: impl IntoIterator, null: K, ) -> Result> { let mut known_revnos: FxHashMap = known_distances.into_iter().collect(); let mut cur_tip = target_revision_id.clone(); let mut num_steps: i64 = 0; known_revnos.insert(null.clone(), 0); let mut searching_known_tips: Vec = known_revnos.keys().cloned().collect(); let mut unknown_searched: FxHashMap = FxHashMap::default(); while !known_revnos.contains_key(&cur_tip) { unknown_searched.insert(cur_tip.clone(), num_steps); num_steps += 1; let mut to_search: FxHashSet = searching_known_tips.iter().cloned().collect(); to_search.insert(cur_tip.clone()); let parent_map = self.get_parent_map(to_search); let parents = match parent_map.get(&cur_tip) { Some(Parents::Known(ps)) if !ps.is_empty() => ps, _ => { return Err(GraphError::GhostRevision { target: target_revision_id, ghost: cur_tip, }); } }; let next_tip = parents[0].clone(); let mut next_known_tips: Vec = Vec::new(); for revision_id in &searching_known_tips { let parents = match parent_map.get(revision_id) { Some(Parents::Known(ps)) if !ps.is_empty() => ps, _ => continue, }; let next = parents[0].clone(); let next_revno = known_revnos[revision_id] - 1; if let Some(unknown_steps) = unknown_searched.get(&next) { return Ok(next_revno + unknown_steps); } if known_revnos.contains_key(&next) { continue; } known_revnos.insert(next.clone(), next_revno); next_known_tips.push(next); } searching_known_tips = next_known_tips; cur_tip = next_tip; } Ok(known_revnos[&cur_tip] + num_steps) } /// Find left-hand distances for every key in `keys`. /// /// Ghosts are reported as distance `-1`, matching the Python contract. pub fn find_lefthand_distances( &self, keys: impl IntoIterator, null: K, ) -> FxHashMap { let mut result: FxHashMap = FxHashMap::default(); let mut known: Vec<(K, i64)> = Vec::new(); let mut ghosts: Vec = Vec::new(); for key in keys { match self.find_distance_to_null(key.clone(), known.iter().cloned(), null.clone()) { Ok(d) => { known.push((key.clone(), d)); result.insert(key, d); } Err(GraphError::GhostRevision { .. }) => ghosts.push(key), Err(_) => { // Other errors are unreachable from find_distance_to_null // in practice. Match Python by skipping. } } } for ghost in ghosts { result.insert(ghost, -1); } result } /// Find the first lefthand ancestor of `tip_key` that merged `merged_key`. /// /// Walks the lefthand ancestry of `tip_key` one step at a time, stopping /// as soon as a candidate is not a descendant of `merged_key`. Returns /// the last candidate that *was* a descendant — or `None` if none is. pub fn find_lefthand_merger(&self, merged_key: K, tip_key: K) -> Option where K: Ord, { let descendants = self.find_descendants(merged_key, tip_key.clone()); let mut last_candidate: Option = None; let mut next_key = tip_key; loop { if !descendants.contains(&next_key) { return last_candidate; } let pm = self.get_parent_map(std::iter::once(next_key.clone())); let parents = match pm.get(&next_key) { Some(Parents::Known(ps)) => ps.clone(), _ => { // Missing entry or ghost — treat as end of walk. return Some(next_key); } }; last_candidate = Some(next_key); if parents.is_empty() { return last_candidate; } next_key = parents.into_iter().next().unwrap(); } } /// Compute `(left_only, right_only)` — the set difference between the /// ancestries of `left` and `right`. pub fn find_difference(&self, left: K, right: K) -> (FxHashSet, FxHashSet) where K: Ord, { let (_border, common, searchers) = self.find_border_ancestors([left, right]); // find_border_ancestors always returns one searcher per input // revision, so for a 2-element input we know we get exactly two. let mut pair: [BfsState; 2] = match <[BfsState; 2]>::try_from(searchers) { Ok(pair) => pair, Err(_) => unreachable!("find_border_ancestors returned a non-2 pair"), }; self.search_for_extra_common(&common, &mut pair); let [left_searcher, right_searcher] = pair; let left_seen = &left_searcher.seen; let right_seen = &right_searcher.seen; ( left_seen.difference(right_seen).cloned().collect(), right_seen.difference(left_seen).cloned().collect(), ) } /// Run the "extra common" reconvergence pass on a pair of searchers /// left in the state they finished `find_border_ancestors` in. Mirrors /// Python's `_search_for_extra_common`. /// /// Takes a fixed-size `[BfsState; 2]` so the two-searcher restriction /// is enforced at compile time instead of via a runtime assertion. fn search_for_extra_common(&self, _common: &FxHashSet, searchers: &mut [BfsState; 2]) where K: Ord, { let unique: FxHashSet = searchers[0] .seen .symmetric_difference(&searchers[1].seen) .cloned() .collect(); if unique.is_empty() { return; } let parent_map = self.get_parent_map(unique.iter().cloned()); let unique = Self::remove_simple_descendants(&unique, &parent_map); // Build unique-searchers: one per unique revision. let mut unique_searchers: Vec> = Vec::new(); for revision_id in unique.iter() { let revs_to_search: FxHashSet = { let parent_idx = if searchers[0].seen.contains(revision_id) { 0 } else { 1 }; let seed = [revision_id.clone()]; let anc = searchers[parent_idx].find_seen_ancestors(seed, &self.provider); if anc.is_empty() { [revision_id.clone()].into_iter().collect() } else { anc } }; let mut s = BfsState::new(revs_to_search); s.next_set(&self.provider); unique_searchers.push(s); } // Compute initial ancestor_all_unique: intersection of all seen sets. let mut ancestor_all_unique: FxHashSet = FxHashSet::default(); for (i, s) in unique_searchers.iter().enumerate() { if i == 0 { ancestor_all_unique = s.seen.clone(); } else { ancestor_all_unique = ancestor_all_unique.intersection(&s.seen).cloned().collect(); } } loop { let mut newly_seen_common: FxHashSet = FxHashSet::default(); for s in searchers.iter_mut() { if let Some(set) = s.next_set(&self.provider) { newly_seen_common.extend(set); } } let mut newly_seen_unique: FxHashSet = FxHashSet::default(); for s in unique_searchers.iter_mut() { if let Some(set) = s.next_set(&self.provider) { newly_seen_unique.extend(set); } } let mut new_common_unique: FxHashSet = FxHashSet::default(); for revision in &newly_seen_unique { if unique_searchers.iter().all(|s| s.seen.contains(revision)) { new_common_unique.insert(revision.clone()); } } if !newly_seen_common.is_empty() { // Merge newly_seen_common seen-ancestors from each common searcher. let mut expanded = newly_seen_common.clone(); for s in searchers.iter() { expanded .extend(s.find_seen_ancestors(expanded.iter().cloned(), &self.provider)); } let expanded_frozen = expanded; for s in searchers.iter_mut() { s.start_searching(expanded_frozen.iter().cloned(), &self.provider); } let stop_searching_common: FxHashSet = ancestor_all_unique .intersection(&expanded_frozen) .cloned() .collect(); if !stop_searching_common.is_empty() { for s in searchers.iter_mut() { s.stop_searching_any(stop_searching_common.iter().cloned()); } } } if !new_common_unique.is_empty() { let mut expanded = new_common_unique.clone(); for s in unique_searchers.iter() { expanded .extend(s.find_seen_ancestors(expanded.iter().cloned(), &self.provider)); } for s in searchers.iter() { expanded .extend(s.find_seen_ancestors(expanded.iter().cloned(), &self.provider)); } for s in unique_searchers.iter_mut() { s.start_searching(expanded.iter().cloned(), &self.provider); } for s in searchers.iter_mut() { s.stop_searching_any(expanded.iter().cloned()); } ancestor_all_unique.extend(expanded); // Collapse unique searchers that ended up with the same frontier. let mut seen_frontiers: std::collections::HashSet> = std::collections::HashSet::new(); let mut next_unique: Vec> = Vec::new(); for searcher in unique_searchers { let mut key: Vec = searcher.next_query().iter().cloned().collect(); key.sort_by_key(|k| { use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; let mut h = DefaultHasher::new(); k.hash(&mut h); h.finish() }); if seen_frontiers.insert(key) { next_unique.push(searcher); } } unique_searchers = next_unique; } let any_common_active = searchers.iter().any(|s| !s.next_query().is_empty()); if !any_common_active { return; } } } /// Find a unique lowest common ancestor by iterating `find_lca`. /// /// If there are multiple LCAs, recursively find the LCA of that set /// until exactly one remains. Returns `None` if there is no common /// ancestor. If `count_steps` is true, also returns the number of /// iterations. pub fn find_unique_lca(&self, left: K, right: K, null: &K) -> Option<(K, usize)> { let mut revisions: Vec = vec![left, right]; let mut steps: usize = 0; loop { steps += 1; let lca = self.find_lca(revisions.iter().cloned(), null); match lca.len() { 1 => return lca.into_iter().next().map(|k| (k, steps)), 0 => return None, _ => revisions = lca.into_iter().collect(), } } } /// Find the unique ancestors of `unique_revision` relative to /// `common_revisions`. /// /// Returns the set of revisions that are ancestors of `unique_revision` /// but not of any of `common_revisions`. If `unique_revision` is itself /// in `common_revisions`, returns an empty set. /// /// Algorithm description: /// /// 1. Walk backwards from the unique node and all common nodes. /// 2. When a node is seen by both sides, stop searching it in the unique /// walker, include it in the common walker. /// 3. Stop searching when there are no nodes left for the unique walker. /// At this point, you have a maximal set of unique nodes. Some of /// them may actually be common, and you haven't reached them yet. /// 4. Start new searchers for the unique nodes, seeded with the /// information you have so far. /// 5. Continue searching, stopping the common searches when the search /// tip is an ancestor of all unique nodes. /// 6. Aggregate together unique searchers when they are searching the /// same tips. When all unique searchers are searching the same node, /// stop move it to a single 'all_unique_searcher'. /// 7. The 'all_unique_searcher' represents the very 'tip' of searching. /// Most of the time this produces very little important information. /// So don't step it as quickly as the other searchers. /// 8. Search is done when all common searchers have completed. pub fn find_unique_ancestors( &self, unique_revision: K, common_revisions: impl IntoIterator, ) -> FxHashSet where K: Ord, { let common_revisions: Vec = common_revisions.into_iter().collect(); if common_revisions.contains(&unique_revision) { return FxHashSet::default(); } // Phase 1: find maximal unique set. let (mut unique_searcher, mut common_searcher) = self.find_initial_unique_nodes([unique_revision], common_revisions); let unique_nodes: FxHashSet = unique_searcher .seen .difference(&common_searcher.seen) .cloned() .collect(); if unique_nodes.is_empty() { return unique_nodes; } // Phase 2: refine via unique-tip searchers. let (mut all_unique_searcher, mut unique_tip_searchers) = self.make_unique_searchers(&unique_nodes, &mut unique_searcher, &mut common_searcher); self.refine_unique_nodes( &mut unique_searcher, &mut all_unique_searcher, &mut unique_tip_searchers, &mut common_searcher, ); unique_nodes .difference(&common_searcher.seen) .cloned() .collect() } /// Phase 1 of find_unique_ancestors: find the maximal unique set. fn find_initial_unique_nodes( &self, unique_revisions: impl IntoIterator, common_revisions: impl IntoIterator, ) -> (BfsState, BfsState) { let mut unique_searcher = BfsState::new(unique_revisions); // Skip past the starting unique revisions themselves. unique_searcher.next_set(&self.provider); let mut common_searcher = BfsState::new(common_revisions); while !unique_searcher.next_query().is_empty() { let next_unique_nodes: FxHashSet = unique_searcher.next_set(&self.provider).unwrap_or_default(); let next_common_nodes: FxHashSet = common_searcher.next_set(&self.provider).unwrap_or_default(); let mut unique_are_common_nodes: FxHashSet = next_unique_nodes .intersection(&common_searcher.seen) .cloned() .collect(); unique_are_common_nodes.extend( next_common_nodes .intersection(&unique_searcher.seen) .cloned(), ); if !unique_are_common_nodes.is_empty() { let mut ancestors = unique_searcher.find_seen_ancestors(unique_are_common_nodes, &self.provider); let more = common_searcher.find_seen_ancestors(ancestors.clone(), &self.provider); ancestors.extend(more); unique_searcher.stop_searching_any(ancestors.iter().cloned()); common_searcher.start_searching(ancestors, &self.provider); } } (unique_searcher, common_searcher) } /// Phase 2 setup: create a searcher for each unique-node tip plus an /// `all_unique_searcher` covering ancestry shared by every unique tip. fn make_unique_searchers( &self, unique_nodes: &FxHashSet, unique_searcher: &mut BfsState, common_searcher: &mut BfsState, ) -> (BfsState, Vec>) where K: Ord, { let parent_map = self.get_parent_map(unique_nodes.iter().cloned()); let unique_tips = Self::remove_simple_descendants(unique_nodes, &parent_map); let mut unique_tip_searchers: Vec> = Vec::new(); let ancestor_all_unique: FxHashSet = if unique_tips.len() == 1 { unique_searcher.find_seen_ancestors(unique_tips, &self.provider) } else { for tip in unique_tips { let mut revs_to_search = unique_searcher.find_seen_ancestors([tip.clone()], &self.provider); let more = common_searcher.find_seen_ancestors(revs_to_search.clone(), &self.provider); revs_to_search.extend(more); let mut searcher = BfsState::new(revs_to_search); // Skip past the starting nodes — we don't care about them. searcher.next_set(&self.provider); unique_tip_searchers.push(searcher); } // Fold the intersection from the borrowed `seen` sets so we // don't have to snapshot each searcher's seen set as we go. unique_tip_searchers .iter() .map(|s| &s.seen) .fold(None::>, |acc, s| match acc { None => Some(s.clone()), Some(a) => Some(a.intersection(s).cloned().collect()), }) .unwrap_or_default() }; // Collapse all common nodes into a single searcher covering the // `ancestor_all_unique` set, then advance it once. let mut all_unique_searcher = BfsState::new(ancestor_all_unique.iter().cloned()); if !ancestor_all_unique.is_empty() { all_unique_searcher.next_set(&self.provider); // Stop common-searcher tips that are already ancestors of all uniques. let to_stop = common_searcher .find_seen_ancestors(ancestor_all_unique.iter().cloned(), &self.provider); common_searcher.stop_searching_any(to_stop); for searcher in unique_tip_searchers.iter_mut() { let to_stop = searcher .find_seen_ancestors(ancestor_all_unique.iter().cloned(), &self.provider); searcher.stop_searching_any(to_stop); } } (all_unique_searcher, unique_tip_searchers) } /// Remove revisions which are descendants (via the parent_map) of other /// revisions in the set. This is a cheap O(E) pass that doesn't walk /// ancestry — it just drops keys whose parents are already in `revisions`. fn remove_simple_descendants( revisions: &FxHashSet, parent_map: &ParentMap, ) -> FxHashSet { let mut simple = revisions.clone(); for (revision, parents) in parent_map.iter() { if let Parents::Known(ps) = parents { for parent_id in ps { if revisions.contains(parent_id) { simple.remove(revision); break; } } } } simple } /// One BFS step across unique tip searchers, the unique_searcher, and /// the common_searcher, propagating find_seen_ancestors cross-checks. fn step_unique_and_common_searchers( &self, common_searcher: &mut BfsState, unique_tip_searchers: &mut [BfsState], unique_searcher: &BfsState, ) -> (FxHashSet, FxHashSet) { let newly_seen_common: FxHashSet = common_searcher.next_set(&self.provider).unwrap_or_default(); let mut newly_seen_unique: FxHashSet = FxHashSet::default(); // Snapshot seen sets of all tip searchers so we can cross-reference // without re-borrowing mid-loop. let tip_count = unique_tip_searchers.len(); // Collect (index, next_step) pairs first. let mut per_tip_next: Vec<(usize, FxHashSet)> = Vec::with_capacity(tip_count); for (i, s) in unique_tip_searchers.iter_mut().enumerate() { let mut next_set = s.next_set(&self.provider).unwrap_or_default(); // Include ancestors already known to the main unique_searcher. next_set.extend(unique_searcher.find_seen_ancestors(next_set.clone(), &self.provider)); // And to the common_searcher. next_set.extend(common_searcher.find_seen_ancestors(next_set.clone(), &self.provider)); per_tip_next.push((i, next_set)); } // Cross-check: each tip pulls in seen ancestors from every other tip. // We need to compute additions per tip from the current (pre-start) // state of the other tips, so snapshot their seen sets first. let seen_per_tip: Vec> = unique_tip_searchers .iter() .map(|s| s.seen.clone()) .collect(); for (i, next_set) in per_tip_next.iter_mut() { for (j, seen_j) in seen_per_tip.iter().enumerate() { if *i == j { continue; } // We can't call `alt_searcher.find_seen_ancestors` here // because that would re-borrow the slice we're already // iterating. Use the free-standing equivalent that takes // the other searcher's seen set by reference. let additions = Self::find_seen_ancestors_against(next_set.clone(), seen_j, &self.provider); next_set.extend(additions); } } // Apply start_searching and accumulate newly_seen_unique. for (i, next_set) in per_tip_next { unique_tip_searchers[i].start_searching(next_set.iter().cloned(), &self.provider); newly_seen_unique.extend(next_set); } (newly_seen_common, newly_seen_unique) } /// Free-standing equivalent of `BfsState::find_seen_ancestors` that /// walks the provider restricted to a given `seen` set. Used by /// `step_unique_and_common_searchers` to cross-check tip searchers /// without mid-loop re-borrows of the slice. fn find_seen_ancestors_against( revisions: FxHashSet, seen: &FxHashSet, provider: &P, ) -> FxHashSet { let mut pending: FxHashSet = revisions.into_iter().filter(|r| seen.contains(r)).collect(); let mut seen_ancestors: FxHashSet = pending.iter().cloned().collect(); while !pending.is_empty() { let mut std_set: std::collections::HashSet = std::collections::HashSet::with_capacity(pending.len()); for k in &pending { std_set.insert(k.clone()); } let parent_map = provider.get_parent_map(&std_set); let mut all_parents: Vec = Vec::new(); for (_, parents) in parent_map.iter() { if let Parents::Known(ps) = parents { all_parents.extend(ps.iter().cloned()); } } let mut next_pending: FxHashSet = FxHashSet::default(); for p in all_parents { if seen.contains(&p) && !seen_ancestors.contains(&p) { next_pending.insert(p); } } seen_ancestors.extend(next_pending.iter().cloned()); pending = next_pending; } seen_ancestors } /// Find nodes common to all unique tip searchers (and optionally step /// the `all_unique_searcher`). fn find_nodes_common_to_all_unique( &self, unique_tip_searchers: &[BfsState], all_unique_searcher: &mut BfsState, newly_seen_unique: &FxHashSet, step_all_unique: bool, ) -> FxHashSet { let mut common: FxHashSet = newly_seen_unique.clone(); for searcher in unique_tip_searchers { common = common.intersection(&searcher.seen).cloned().collect(); } common = common .intersection(&all_unique_searcher.seen) .cloned() .collect(); if step_all_unique { if let Some(nodes) = all_unique_searcher.next_set(&self.provider) { common.extend(nodes); } } common } /// Combine unique tip searchers that are searching the same frontier. fn collapse_unique_searchers( &self, unique_tip_searchers: Vec>, common_to_all_unique_nodes: &FxHashSet, ) -> Vec> { // First pass: stop searching the common-to-all set on each searcher // and bucket by resulting frontier. let mut buckets: FxHashMap, Vec>> = FxHashMap::default(); let mut empty_bucket: Vec> = Vec::new(); for mut searcher in unique_tip_searchers { searcher.stop_searching_any(common_to_all_unique_nodes.iter().cloned()); let nq = searcher.next_query().clone(); if nq.is_empty() { empty_bucket.push(searcher); continue; } // Sort the frontier by hash for a deterministic bucket key. let mut key: Vec = nq.into_iter().collect(); key.sort_by_key(|k| { use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; let mut h = DefaultHasher::new(); k.hash(&mut h); h.finish() }); buckets.entry(key).or_default().push(searcher); } let _ = empty_bucket; // drop empties — those searchers are done. let mut next_unique_searchers: Vec> = Vec::new(); for (_key, mut searchers) in buckets { if searchers.len() == 1 { next_unique_searchers.push(searchers.pop().unwrap()); } else { // Combine: intersect all their seen sets into the first. let mut first = searchers.remove(0); for s in searchers { first.seen = first.seen.intersection(&s.seen).cloned().collect(); } next_unique_searchers.push(first); } } next_unique_searchers } /// Phase 2 main loop: refine unique-vs-common by stepping searchers /// until `common_searcher` has nothing left to search. fn refine_unique_nodes( &self, unique_searcher: &mut BfsState, all_unique_searcher: &mut BfsState, unique_tip_searchers: &mut Vec>, common_searcher: &mut BfsState, ) { // Step the all_unique_searcher every N steps (Python's // STEP_UNIQUE_SEARCHER_EVERY = 5). const STEP_ALL_UNIQUE_EVERY: usize = 5; let mut step_all_unique_counter: usize = 0; while !common_searcher.next_query().is_empty() { let (newly_seen_common, newly_seen_unique) = self.step_unique_and_common_searchers( common_searcher, unique_tip_searchers, unique_searcher, ); let common_to_all_unique_nodes = self.find_nodes_common_to_all_unique( unique_tip_searchers, all_unique_searcher, &newly_seen_unique, step_all_unique_counter == 0, ); step_all_unique_counter = (step_all_unique_counter + 1) % STEP_ALL_UNIQUE_EVERY; if !newly_seen_common.is_empty() { let stop: FxHashSet = all_unique_searcher .seen .intersection(&newly_seen_common) .cloned() .collect(); common_searcher.stop_searching_any(stop); } if !common_to_all_unique_nodes.is_empty() { let mut expanded = common_to_all_unique_nodes.clone(); expanded.extend(common_searcher.find_seen_ancestors( common_to_all_unique_nodes.iter().cloned(), &self.provider, )); all_unique_searcher.start_searching(expanded.iter().cloned(), &self.provider); common_searcher.stop_searching_any(expanded); } let old_searchers = std::mem::take(unique_tip_searchers); *unique_tip_searchers = self.collapse_unique_searchers(old_searchers, &common_to_all_unique_nodes); } } /// Find ancestors of `new_key` that may be descendants of `old_key`. /// /// Drives two parallel searchers: `stop` walks up from `old_key` and /// `descendants` walks up from `new_key`. For each iteration, prune /// nodes already seen by `stop` from `descendants`, then advance `stop` /// and prune any nodes in the newly-visited stop set that `descendants` /// has already reached (via `find_seen_ancestors`). /// /// Returns the set of keys reached by `descendants` but not by `stop`. pub fn find_descendant_ancestors(&self, old_key: K, new_key: K) -> FxHashSet { let mut stop = BfsState::new([old_key]); let mut descendants = BfsState::new([new_key]); // Python's `for revisions in descendants:` iterates `next()` until // StopIteration. Our next_set returns None to signal the end. while let Some(revisions) = descendants.next_set(&self.provider) { let old_stop: FxHashSet = stop.seen.intersection(&revisions).cloned().collect(); descendants.stop_searching_any(old_stop); let step = stop.next_set(&self.provider).unwrap_or_default(); let seen_stop = descendants.find_seen_ancestors(step, &self.provider); descendants.stop_searching_any(seen_stop); } descendants.seen.difference(&stop.seen).cloned().collect() } /// Find border ancestors of a set of revisions via a concurrent BFS. /// /// Returns `(border_ancestors, common_ancestors, searchers)`. The /// searchers are left in the state they finished in so callers can /// inspect `seen` for graph-difference calculations. pub fn find_border_ancestors( &self, revisions: impl IntoIterator, ) -> (FxHashSet, FxHashSet, Vec>) { let revisions: Vec = revisions.into_iter().collect(); let mut searchers: Vec> = revisions .iter() .map(|r| BfsState::new([r.clone()])) .collect(); let mut common_ancestors: FxHashSet = FxHashSet::default(); let mut border_ancestors: FxHashSet = FxHashSet::default(); loop { let mut newly_seen: FxHashSet = FxHashSet::default(); for searcher in searchers.iter_mut() { if let Some(new_ancestors) = searcher.next_set(&self.provider) { newly_seen.extend(new_ancestors); } } let mut new_common: FxHashSet = FxHashSet::default(); for revision in &newly_seen { if common_ancestors.contains(revision) { new_common.insert(revision.clone()); continue; } if searchers.iter().all(|s| s.seen.contains(revision)) { border_ancestors.insert(revision.clone()); new_common.insert(revision.clone()); } } if !new_common.is_empty() { // Pull in ancestors that are already seen by each searcher. // We can't borrow searchers twice in one pass, so snapshot // each searcher's contribution and merge. let mut expanded = new_common.clone(); for searcher in searchers.iter() { let seen_anc = searcher.find_seen_ancestors(new_common.clone(), &self.provider); expanded.extend(seen_anc); } let new_common = expanded; for searcher in searchers.iter_mut() { searcher.start_searching(new_common.iter().cloned(), &self.provider); } common_ancestors.extend(new_common); } // Convergence check: if all searchers have the same next query, // we've merged into a single common line and can stop. let first_frontier: FxHashSet = searchers .first() .map(|s| s.next_query().clone()) .unwrap_or_default(); let all_same = searchers.iter().all(|s| s.next_query() == &first_frontier); if all_same { let uncommon: FxHashSet = first_frontier .difference(&common_ancestors) .cloned() .collect(); if !uncommon.is_empty() { // Shouldn't happen in well-formed graphs, but instead of // panicking we just continue — matches Python's // AssertionError shape without crashing. // (Callers of find_difference etc. will see this as an // empty difference; tests never exercise this path.) } break; } } (border_ancestors, common_ancestors, searchers) } /// Return the heads from amongst `keys`. /// /// This walks each candidate's ancestry and prunes any key reachable /// from another. The `null` parameter is the sentinel the caller uses /// for the origin (`b"null:"` in the Python layer); passing it lets the /// Rust core stay string-typed without baking bytes into the API. pub fn heads_with_null(&self, keys: impl IntoIterator, null: &K) -> FxHashSet { let mut candidate_heads: FxHashSet = keys.into_iter().collect(); if candidate_heads.contains(null) { candidate_heads.remove(null); if candidate_heads.is_empty() { let mut r = FxHashSet::default(); r.insert(null.clone()); return r; } } if candidate_heads.len() < 2 { return candidate_heads; } // One searcher per candidate, keyed by the candidate revision. let mut searchers: FxHashMap> = candidate_heads .iter() .map(|c| (c.clone(), BfsState::new([c.clone()]))) .collect(); let mut active: FxHashSet = candidate_heads.iter().cloned().collect(); // Skip the first yield (the candidate itself). for (_, searcher) in searchers.iter_mut() { searcher.next_set(&self.provider); } // Common walker: tracks nodes known to be common across all // searchers, so that a searcher hitting one can stop early. let mut common_walker: BfsState = BfsState::new([] as [K; 0]); while !active.is_empty() { let mut ancestors: FxHashSet = FxHashSet::default(); // Advance the common walker one step if there's anything to advance. common_walker.next_set(&self.provider); // Advance each active searcher one step. let active_list: Vec = active.iter().cloned().collect(); for candidate in active_list { let finished = { let searcher = searchers.get_mut(&candidate).unwrap(); match searcher.next_set(&self.provider) { Some(set) => { ancestors.extend(set); false } None => true, } }; if finished { active.remove(&candidate); } } // Process found ancestors. let mut new_common: FxHashSet = FxHashSet::default(); for ancestor in ancestors { if candidate_heads.contains(&ancestor) { candidate_heads.remove(&ancestor); searchers.remove(&ancestor); active.remove(&ancestor); } if common_walker.seen.contains(&ancestor) { // Known common: tell every searcher to stop on it. let stop_set: FxHashSet = [ancestor].into_iter().collect(); for searcher in searchers.values_mut() { searcher.stop_searching_any(stop_set.iter().cloned()); } } else if searchers.values().all(|s| s.seen.contains(&ancestor)) { // All searchers have reached this node — it's newly // common. Stop any of its seen ancestors in each searcher. new_common.insert(ancestor.clone()); // Collect seen ancestors per searcher, then apply stops. let seen_per_searcher: Vec> = searchers .values() .map(|s| s.find_seen_ancestors([ancestor.clone()], &self.provider)) .collect(); for (searcher, seen_anc) in searchers.values_mut().zip(seen_per_searcher.into_iter()) { searcher.stop_searching_any(seen_anc); } } } common_walker.start_searching(new_common, &self.provider); } candidate_heads } /// Find the lowest common ancestors of `revisions`. pub fn find_lca(&self, revisions: impl IntoIterator, null: &K) -> FxHashSet { let (border_common, _common, _searchers) = self.find_border_ancestors(revisions); self.heads_with_null(border_common, null) } /// Return whether `candidate_ancestor` is an ancestor of `candidate_descendant`. pub fn is_ancestor(&self, candidate_ancestor: K, candidate_descendant: K, null: &K) -> bool { let heads = self.heads_with_null( [candidate_ancestor.clone(), candidate_descendant.clone()], null, ); heads.len() == 1 && heads.contains(&candidate_descendant) } /// Return whether `revid` is between `lower_bound_revid` and /// `upper_bound_revid` (inclusive). `None` bounds are skipped. pub fn is_between( &self, revid: K, lower_bound_revid: Option, upper_bound_revid: Option, null: &K, ) -> bool { let upper_ok = match upper_bound_revid { None => true, Some(upper) => self.is_ancestor(revid.clone(), upper, null), }; if !upper_ok { return false; } match lower_bound_revid { None => true, Some(lower) => self.is_ancestor(lower, revid, null), } } /// Find the order in which `lca_revision_ids` were merged into `tip`. /// /// Walks backwards from `tip` with a stack, left-first, collecting the /// LCA revisions in the order they are encountered. pub fn find_merge_order( &self, tip: K, lca_revision_ids: impl IntoIterator, ) -> Vec { let mut looking_for: FxHashSet = lca_revision_ids.into_iter().collect(); if looking_for.len() == 1 { return looking_for.into_iter().collect(); } let mut stack: Vec = vec![tip]; let mut found: Vec = Vec::new(); let mut stop: FxHashSet = FxHashSet::default(); while !stack.is_empty() && !looking_for.is_empty() { let next_key = stack.pop().unwrap(); stop.insert(next_key.clone()); if looking_for.remove(&next_key) { found.push(next_key); if looking_for.len() == 1 { // Only one LCA left — add it and break without walking. let last = looking_for.iter().next().cloned().unwrap(); looking_for.clear(); found.push(last); break; } continue; } let pm = self.get_parent_map(std::iter::once(next_key.clone())); let parents = match pm.get(&next_key) { Some(Parents::Known(ps)) if !ps.is_empty() => ps.clone(), _ => continue, }; // Walk parents in reverse so the left-most parent is popped first. for parent_id in parents.into_iter().rev() { if !stop.contains(&parent_id) { stack.push(parent_id.clone()); } stop.insert(parent_id); } } found } /// Find descendants of `old_key` that are ancestors of `new_key`. /// /// Uses [`find_descendant_ancestors`](Self::find_descendant_ancestors) /// to narrow down candidates, then walks forwards through the child /// relationships by running a BFS over a [`DictParentsProvider`] built /// from the inverted parent map. pub fn find_descendants(&self, old_key: K, new_key: K) -> FxHashSet where K: Ord, { let candidates = self.find_descendant_ancestors(old_key.clone(), new_key); let child_map = self.get_child_map(candidates); // Walk forwards via a DictParentsProvider built from the child map. let dict: HashMap> = child_map.into_iter().collect(); let provider = DictParentsProvider::from(dict); let mut searcher = BfsState::new([old_key]); while searcher.next_set(&provider).is_some() {} searcher.seen } } #[cfg(test)] mod tests { use super::*; use crate::DictParentsProvider; use std::collections::HashMap; const NULL: &str = "null:"; fn make( edges: &[(&'static str, &[&'static str])], ) -> Graph<&'static str, DictParentsProvider<&'static str>> { let map: HashMap<&'static str, Vec<&'static str>> = edges.iter().map(|(k, ps)| (*k, ps.to_vec())).collect(); Graph::new(DictParentsProvider::from(map)) } #[test] fn get_parent_map_basic() { let g = make(&[("a", &[]), ("b", &["a"])]); let pm = g.get_parent_map(vec!["a", "b", "missing"]); assert_eq!(pm.get(&"a"), Some(&Parents::Known(vec![]))); assert_eq!(pm.get(&"b"), Some(&Parents::Known(vec!["a"]))); assert_eq!(pm.get(&"missing"), None); } #[test] fn get_child_map_inverts() { let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["b", "c"])]); let cm = g.get_child_map(vec!["a", "b", "c", "d"]); assert_eq!(cm.get(&"a"), Some(&vec!["b", "c"])); assert_eq!(cm.get(&"b"), Some(&vec!["d"])); assert_eq!(cm.get(&"c"), Some(&vec!["d"])); assert_eq!(cm.get(&"d"), None); } #[test] fn iter_lefthand_ancestry_linear() { // null <- a <- b <- c let g = make(&[("a", &[NULL]), ("b", &["a"]), ("c", &["b"])]); let out = g.iter_lefthand_ancestry("c", [NULL]).unwrap(); assert_eq!(out, vec!["c", "b", "a"]); } #[test] fn find_distance_to_null_linear() { // null <- a (1) <- b (2) <- c (3) let g = make(&[("a", &[NULL]), ("b", &["a"]), ("c", &["b"])]); assert_eq!( g.find_distance_to_null("c", std::iter::empty(), NULL) .unwrap(), 3 ); assert_eq!( g.find_distance_to_null("a", std::iter::empty(), NULL) .unwrap(), 1 ); } #[test] fn find_distance_to_null_with_known_seed() { // null <- a (1) <- b (2) <- c (3) <- d (4) let g = make(&[("a", &[NULL]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"])]); assert_eq!( g.find_distance_to_null("d", std::iter::once(("b", 2)), NULL) .unwrap(), 4 ); } #[test] fn find_lefthand_distances_all() { let g = make(&[("a", &[NULL]), ("b", &["a"]), ("c", &["b"])]); let d = g.find_lefthand_distances(vec!["a", "b", "c"], NULL); assert_eq!(d.get(&"a"), Some(&1)); assert_eq!(d.get(&"b"), Some(&2)); assert_eq!(d.get(&"c"), Some(&3)); } #[test] fn iter_topo_order_parents_first() { let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["b", "c"])]); let order = g.iter_topo_order(vec!["a", "b", "c", "d"]).unwrap(); let pos = |x: &&str| order.iter().position(|n| n == x).unwrap(); assert!(pos(&"a") < pos(&"b")); assert!(pos(&"a") < pos(&"c")); assert!(pos(&"b") < pos(&"d")); assert!(pos(&"c") < pos(&"d")); } #[test] fn iter_ancestry_reaches_all() { let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["b", "c"])]); let anc = g.iter_ancestry(vec!["d"]); let keys: FxHashSet<&'static str> = anc.iter().map(|(k, _)| *k).collect(); let expected: FxHashSet<&'static str> = ["a", "b", "c", "d"].into_iter().collect(); assert_eq!(keys, expected); } #[test] fn find_descendants_diamond() { // a // / \ // b c // \ / // d let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["b", "c"])]); let descendants = g.find_descendants("a", "d"); let expected: FxHashSet<&'static str> = ["a", "b", "c", "d"].into_iter().collect(); assert_eq!(descendants, expected); } #[test] fn find_descendants_linear() { // a <- b <- c <- d let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"])]); let descendants = g.find_descendants("b", "d"); let expected: FxHashSet<&'static str> = ["b", "c", "d"].into_iter().collect(); assert_eq!(descendants, expected); } #[test] fn heads_single_candidate() { let g = make(&[("a", &[]), ("b", &["a"])]); let h = g.heads_with_null(vec!["b"], &NULL); assert_eq!(h, ["b"].into_iter().collect()); } #[test] fn heads_prunes_ancestors() { // a <- b <- c let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["b"])]); let h = g.heads_with_null(vec!["a", "c"], &NULL); assert_eq!(h, ["c"].into_iter().collect()); } #[test] fn heads_diamond_returns_both() { // a // / \ // b c // \ / // d let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["b", "c"])]); let h = g.heads_with_null(vec!["b", "c"], &NULL); let expected: FxHashSet<_> = ["b", "c"].into_iter().collect(); assert_eq!(h, expected); } #[test] fn heads_null_alone() { let g = make(&[("a", &[])]); let h = g.heads_with_null(vec![NULL], &NULL); assert_eq!(h, [NULL].into_iter().collect()); } #[test] fn find_lca_diamond() { // a // / \ // b c // \ / // d let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["b", "c"])]); let lca = g.find_lca(vec!["b", "c"], &NULL); assert_eq!(lca, ["a"].into_iter().collect()); } #[test] fn is_ancestor_true_and_false() { // a <- b <- c let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["b"])]); assert!(g.is_ancestor("a", "c", &NULL)); assert!(g.is_ancestor("b", "c", &NULL)); assert!(!g.is_ancestor("c", "a", &NULL)); } #[test] fn find_merge_order_single() { let g = make(&[("a", &[]), ("b", &["a"])]); let order = g.find_merge_order("b", vec!["a"]); assert_eq!(order, vec!["a"]); } #[test] fn find_descendants_unrelated() { // new_key is not a descendant of old_key. let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"])]); let descendants = g.find_descendants("b", "c"); // b is not reachable from c, so no descendants of b among c's ancestry. assert!(descendants.is_empty() || descendants == ["b"].into_iter().collect()); } /// Build a set literal from an array of strings. fn set(xs: [&'static str; N]) -> FxHashSet<&'static str> { xs.into_iter().collect() } fn ancestry_1() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("rev1", &[NULL]), ("rev2a", &["rev1"]), ("rev2b", &["rev1"]), ("rev3", &["rev2a"]), ("rev4", &["rev3", "rev2b"]), ]) } fn ancestry_2() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("rev1a", &[NULL]), ("rev2a", &["rev1a"]), ("rev1b", &[NULL]), ("rev3a", &["rev2a"]), ("rev4a", &["rev3a"]), ]) } fn criss_cross() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("rev1", &[NULL]), ("rev2a", &["rev1"]), ("rev2b", &["rev1"]), ("rev3a", &["rev2a", "rev2b"]), ("rev3b", &["rev2b", "rev2a"]), ]) } fn criss_cross2() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("rev1a", &[NULL]), ("rev1b", &[NULL]), ("rev2a", &["rev1a", "rev1b"]), ("rev2b", &["rev1b", "rev1a"]), ]) } fn history_shortcut() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("rev1", &[NULL]), ("rev2a", &["rev1"]), ("rev2b", &["rev1"]), ("rev2c", &["rev1"]), ("rev3a", &["rev2a", "rev2b"]), ("rev3b", &["rev2b", "rev2c"]), ]) } fn extended_history_shortcut() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("a", &[NULL]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"]), ("e", &["d"]), ("f", &["a", "d"]), ]) } fn double_shortcut_fixture() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("a", &[NULL]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"]), ("e", &["c"]), ("f", &["a", "d"]), ("g", &["a", "e"]), ]) } fn complex_shortcut() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("a", &[NULL]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"]), ("e", &["d"]), ("f", &["d"]), ("g", &["f"]), ("h", &["f"]), ("i", &["e", "g"]), ("j", &["g"]), ("k", &["j"]), ("l", &["k"]), ("m", &["i", "l"]), ("n", &["l", "h"]), ]) } fn complex_shortcut2() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("a", &[NULL]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"]), ("e", &["d"]), ("f", &["e"]), ("g", &["f"]), ("h", &["d"]), ("i", &["g"]), ("j", &["h"]), ("k", &["h", "i"]), ("l", &["k"]), ("m", &["l"]), ("n", &["m"]), ("o", &["n"]), ("p", &["o"]), ("q", &["p"]), ("r", &["q"]), ("s", &["r"]), ("t", &["i", "s"]), ("u", &["s", "j"]), ]) } fn multiple_interesting_unique() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("a", &[NULL]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"]), ("e", &["d"]), ("f", &["d"]), ("g", &["e"]), ("h", &["e"]), ("i", &["f"]), ("j", &["g"]), ("k", &["g"]), ("l", &["h"]), ("m", &["i"]), ("n", &["k", "l"]), ("o", &["m"]), ("p", &["m", "l"]), ("q", &["n", "o"]), ("r", &["q"]), ("s", &["r"]), ("t", &["s"]), ("u", &["t"]), ("v", &["u"]), ("w", &["v"]), ("x", &["w"]), ("y", &["j", "x"]), ("z", &["x", "p"]), ]) } fn shortcut_extra_root() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("a", &[NULL]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"]), ("e", &["d"]), ("f", &["a", "d", "g"]), ("g", &[NULL]), ]) } fn boundary() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("a", &["b"]), ("c", &["b", "d"]), ("b", &["e"]), ("d", &["e"]), ("e", &["f"]), ("f", &[NULL]), ]) } #[test] fn test_lca_ancestry_1() { let g = ancestry_1(); assert_eq!(g.find_lca([NULL, NULL], &NULL), set([NULL])); assert_eq!(g.find_lca([NULL, "rev1"], &NULL), set([NULL])); assert_eq!(g.find_lca(["rev1", "rev1"], &NULL), set(["rev1"])); assert_eq!(g.find_lca(["rev2a", "rev2b"], &NULL), set(["rev1"])); } #[test] fn test_lca_criss_cross() { let g = criss_cross(); assert_eq!( g.find_lca(["rev3a", "rev3b"], &NULL), set(["rev2a", "rev2b"]) ); } #[test] fn test_lca_shortcut() { let g = history_shortcut(); assert_eq!(g.find_lca(["rev3a", "rev3b"], &NULL), set(["rev2b"])); } #[test] fn test_lca_double_shortcut() { let g = double_shortcut_fixture(); assert_eq!(g.find_lca(["f", "g"], &NULL), set(["c"])); } #[test] fn test_unique_lca_ancestry_1() { let g = ancestry_1(); assert_eq!(g.find_unique_lca(NULL, NULL, &NULL), Some((NULL, 1))); assert_eq!(g.find_unique_lca(NULL, "rev1", &NULL), Some((NULL, 1))); assert_eq!(g.find_unique_lca("rev1", "rev1", &NULL), Some(("rev1", 1))); assert_eq!( g.find_unique_lca("rev2a", "rev2b", &NULL), Some(("rev1", 1)) ); } #[test] fn test_unique_lca_criss_cross() { let g = criss_cross(); assert_eq!( g.find_unique_lca("rev3a", "rev3b", &NULL), Some(("rev1", 2)) ); } #[test] fn test_unique_lca_null_revision_criss_cross2() { let g = criss_cross2(); assert_eq!( g.find_unique_lca("rev2a", "rev1b", &NULL).map(|(k, _)| k), Some("rev1b") ); assert_eq!( g.find_unique_lca("rev2a", "rev2b", &NULL).map(|(k, _)| k), Some(NULL) ); } #[test] fn test_unique_lca_separate_ancestry() { let g = ancestry_2(); assert_eq!( g.find_unique_lca("rev4a", "rev1b", &NULL).map(|(k, _)| k), Some(NULL) ); } #[test] fn test_heads_null() { let g = ancestry_1(); assert_eq!(g.heads_with_null([NULL], &NULL), set([NULL])); assert_eq!(g.heads_with_null([NULL, "rev1"], &NULL), set(["rev1"])); assert_eq!(g.heads_with_null(["rev1", NULL], &NULL), set(["rev1"])); } #[test] fn test_heads_one() { let g = ancestry_1(); for key in [NULL, "rev1", "rev2a", "rev2b", "rev3", "rev4"] { assert_eq!(g.heads_with_null([key], &NULL), set([key])); } } #[test] fn test_heads_single_from_pair() { let g = ancestry_1(); assert_eq!(g.heads_with_null([NULL, "rev4"], &NULL), set(["rev4"])); assert_eq!(g.heads_with_null(["rev1", "rev2a"], &NULL), set(["rev2a"])); assert_eq!(g.heads_with_null(["rev1", "rev2b"], &NULL), set(["rev2b"])); assert_eq!(g.heads_with_null(["rev1", "rev3"], &NULL), set(["rev3"])); assert_eq!(g.heads_with_null(["rev1", "rev4"], &NULL), set(["rev4"])); assert_eq!(g.heads_with_null(["rev2a", "rev4"], &NULL), set(["rev4"])); assert_eq!(g.heads_with_null(["rev2b", "rev4"], &NULL), set(["rev4"])); assert_eq!(g.heads_with_null(["rev3", "rev4"], &NULL), set(["rev4"])); } #[test] fn test_heads_two_heads() { let g = ancestry_1(); assert_eq!( g.heads_with_null(["rev2a", "rev2b"], &NULL), set(["rev2a", "rev2b"]) ); assert_eq!( g.heads_with_null(["rev3", "rev2b"], &NULL), set(["rev3", "rev2b"]) ); } #[test] fn test_heads_criss_cross() { let g = criss_cross(); assert_eq!(g.heads_with_null(["rev2a", "rev1"], &NULL), set(["rev2a"])); assert_eq!(g.heads_with_null(["rev2b", "rev1"], &NULL), set(["rev2b"])); assert_eq!(g.heads_with_null(["rev3a", "rev1"], &NULL), set(["rev3a"])); assert_eq!(g.heads_with_null(["rev3b", "rev1"], &NULL), set(["rev3b"])); assert_eq!( g.heads_with_null(["rev2a", "rev2b"], &NULL), set(["rev2a", "rev2b"]) ); assert_eq!(g.heads_with_null(["rev3a", "rev2a"], &NULL), set(["rev3a"])); assert_eq!(g.heads_with_null(["rev3a", "rev2b"], &NULL), set(["rev3a"])); assert_eq!( g.heads_with_null(["rev3a", "rev2a", "rev2b"], &NULL), set(["rev3a"]) ); assert_eq!(g.heads_with_null(["rev3b", "rev2a"], &NULL), set(["rev3b"])); assert_eq!(g.heads_with_null(["rev3b", "rev2b"], &NULL), set(["rev3b"])); assert_eq!( g.heads_with_null(["rev3b", "rev2a", "rev2b"], &NULL), set(["rev3b"]) ); assert_eq!( g.heads_with_null(["rev3a", "rev3b"], &NULL), set(["rev3a", "rev3b"]) ); assert_eq!( g.heads_with_null(["rev3a", "rev3b", "rev2a", "rev2b"], &NULL), set(["rev3a", "rev3b"]) ); } #[test] fn test_heads_shortcut() { let g = history_shortcut(); assert_eq!( g.heads_with_null(["rev2a", "rev2b", "rev2c"], &NULL), set(["rev2a", "rev2b", "rev2c"]) ); assert_eq!( g.heads_with_null(["rev3a", "rev3b"], &NULL), set(["rev3a", "rev3b"]) ); assert_eq!( g.heads_with_null(["rev2a", "rev3a", "rev3b"], &NULL), set(["rev3a", "rev3b"]) ); assert_eq!( g.heads_with_null(["rev2a", "rev3b"], &NULL), set(["rev2a", "rev3b"]) ); assert_eq!( g.heads_with_null(["rev2c", "rev3a"], &NULL), set(["rev2c", "rev3a"]) ); } #[test] fn test_graph_difference_ancestry_1() { let g = ancestry_1(); assert_eq!( g.find_difference("rev1", "rev1"), (FxHashSet::default(), FxHashSet::default()) ); assert_eq!( g.find_difference(NULL, "rev1"), (FxHashSet::default(), set(["rev1"])) ); assert_eq!( g.find_difference("rev1", NULL), (set(["rev1"]), FxHashSet::default()) ); assert_eq!( g.find_difference("rev3", "rev2b"), (set(["rev2a", "rev3"]), set(["rev2b"])) ); assert_eq!( g.find_difference("rev4", "rev2b"), (set(["rev4", "rev3", "rev2a"]), FxHashSet::default()) ); } #[test] fn test_graph_difference_separate_ancestry() { let g = ancestry_2(); assert_eq!( g.find_difference("rev1a", "rev1b"), (set(["rev1a"]), set(["rev1b"])) ); assert_eq!( g.find_difference("rev4a", "rev1b"), (set(["rev1a", "rev2a", "rev3a", "rev4a"]), set(["rev1b"])) ); } #[test] fn test_graph_difference_criss_cross() { let g = criss_cross(); assert_eq!( g.find_difference("rev3a", "rev3b"), (set(["rev3a"]), set(["rev3b"])) ); assert_eq!( g.find_difference("rev2a", "rev3b"), (FxHashSet::default(), set(["rev3b", "rev2b"])) ); } #[test] fn test_graph_difference_extended_history() { let g = extended_history_shortcut(); assert_eq!(g.find_difference("e", "f"), (set(["e"]), set(["f"]))); assert_eq!(g.find_difference("f", "e"), (set(["f"]), set(["e"]))); } #[test] fn test_graph_difference_double_shortcut() { let g = double_shortcut_fixture(); assert_eq!( g.find_difference("f", "g"), (set(["d", "f"]), set(["e", "g"])) ); } #[test] fn test_graph_difference_complex_shortcut() { let g = complex_shortcut(); assert_eq!( g.find_difference("m", "n"), (set(["m", "i", "e"]), set(["n", "h"])) ); } #[test] fn test_graph_difference_complex_shortcut2() { let g = complex_shortcut2(); assert_eq!(g.find_difference("t", "u"), (set(["t"]), set(["j", "u"]))); } #[test] fn test_graph_difference_shortcut_extra_root() { let g = shortcut_extra_root(); assert_eq!(g.find_difference("e", "f"), (set(["e"]), set(["f", "g"]))); } #[test] fn test_unique_ancestors_empty_set() { let g = ancestry_1(); assert_eq!( g.find_unique_ancestors("rev1", ["rev1"]), FxHashSet::default() ); assert_eq!( g.find_unique_ancestors("rev2b", ["rev2b"]), FxHashSet::default() ); assert_eq!( g.find_unique_ancestors("rev3", ["rev1", "rev3"]), FxHashSet::default() ); } #[test] fn test_unique_ancestors_single_node() { let g = ancestry_1(); assert_eq!(g.find_unique_ancestors("rev2a", ["rev1"]), set(["rev2a"])); assert_eq!(g.find_unique_ancestors("rev2b", ["rev1"]), set(["rev2b"])); assert_eq!(g.find_unique_ancestors("rev3", ["rev2a"]), set(["rev3"])); } #[test] fn test_unique_ancestors_in_ancestry() { let g = ancestry_1(); assert_eq!( g.find_unique_ancestors("rev1", ["rev3"]), FxHashSet::default() ); assert_eq!( g.find_unique_ancestors("rev2b", ["rev4"]), FxHashSet::default() ); } #[test] fn test_unique_ancestors_multiple_revisions() { let g = ancestry_1(); assert_eq!( g.find_unique_ancestors("rev4", ["rev3", "rev2b"]), set(["rev4"]) ); assert_eq!( g.find_unique_ancestors("rev4", ["rev2b"]), set(["rev2a", "rev3", "rev4"]) ); } #[test] fn test_unique_ancestors_complex_shortcut() { let g = complex_shortcut(); assert_eq!(g.find_unique_ancestors("n", ["m"]), set(["h", "n"])); assert_eq!(g.find_unique_ancestors("m", ["n"]), set(["e", "i", "m"])); } #[test] fn test_unique_ancestors_complex_shortcut2() { let g = complex_shortcut2(); assert_eq!(g.find_unique_ancestors("u", ["t"]), set(["j", "u"])); assert_eq!(g.find_unique_ancestors("t", ["u"]), set(["t"])); } #[test] fn test_unique_ancestors_multiple_interesting_unique() { let g = multiple_interesting_unique(); assert_eq!(g.find_unique_ancestors("y", ["z"]), set(["j", "y"])); assert_eq!(g.find_unique_ancestors("z", ["y"]), set(["p", "z"])); } #[test] fn test_is_ancestor_ancestry_1() { let g = ancestry_1(); assert!(g.is_ancestor(NULL, NULL, &NULL)); assert!(g.is_ancestor(NULL, "rev1", &NULL)); assert!(!g.is_ancestor("rev1", NULL, &NULL)); assert!(g.is_ancestor(NULL, "rev4", &NULL)); assert!(!g.is_ancestor("rev4", NULL, &NULL)); assert!(!g.is_ancestor("rev4", "rev2b", &NULL)); assert!(g.is_ancestor("rev2b", "rev4", &NULL)); assert!(!g.is_ancestor("rev2b", "rev3", &NULL)); assert!(!g.is_ancestor("rev3", "rev2b", &NULL)); } #[test] fn test_is_ancestor_boundary() { // Python's test_is_ancestor_boundary: verify a is not an ancestor // of c despite both sharing a common ancestor further down. let g = boundary(); assert!(!g.is_ancestor("a", "c", &NULL)); } #[test] fn test_is_between_ancestry_1() { let g = ancestry_1(); assert!(g.is_between(NULL, Some(NULL), Some(NULL), &NULL)); assert!(g.is_between("rev1", Some(NULL), Some("rev1"), &NULL)); assert!(g.is_between("rev1", Some("rev1"), Some("rev4"), &NULL)); assert!(g.is_between("rev4", Some("rev1"), Some("rev4"), &NULL)); assert!(g.is_between("rev3", Some("rev1"), Some("rev4"), &NULL)); assert!(!g.is_between("rev4", Some("rev1"), Some("rev3"), &NULL)); assert!(!g.is_between("rev1", Some("rev2a"), Some("rev4"), &NULL)); assert!(!g.is_between(NULL, Some("rev1"), Some("rev4"), &NULL)); } #[test] fn test_find_merge_order_single_lca() { let g = ancestry_1(); assert_eq!(g.find_merge_order("rev4", ["rev2b"]), vec!["rev2b"]); } fn with_ghost() -> Graph<&'static str, DictParentsProvider<&'static str>> { // NULL_REVISION itself is explicitly included as a root so it // survives as a key in iter_ancestry's output. make(&[ ("a", &["b"]), ("c", &["b", "d"]), ("b", &["e"]), ("d", &["e", "g"]), ("e", &["f"]), ("f", &[NULL]), (NULL, &[]), ]) } fn racing_shortcuts() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[ ("a", &[NULL]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"]), ("e", &["d"]), ("f", &["e"]), ("g", &["f"]), ("h", &["g"]), ("i", &["h", "o"]), ("j", &["i", "y"]), ("k", &["d"]), ("l", &["k"]), ("m", &["l"]), ("n", &["m"]), ("o", &["n", "g"]), ("p", &["f"]), ("q", &["p", "m"]), ("r", &["o"]), ("s", &["r"]), ("t", &["s"]), ("u", &["t"]), ("v", &["u"]), ("w", &["v"]), ("x", &["w"]), ("y", &["x"]), ("z", &["x", "q"]), ]) } /// Python's `alt_merge` fixture. /// /// ```text /// a /// |\ /// b | /// | | /// c | /// \| /// d /// ``` fn alt_merge() -> Graph<&'static str, DictParentsProvider<&'static str>> { make(&[("a", &[]), ("b", &["a"]), ("c", &["b"]), ("d", &["a", "c"])]) } #[test] fn test_heads_alt_merge() { let g = alt_merge(); assert_eq!(g.heads_with_null(["a", "c"], &NULL), set(["c"])); } #[test] fn test_heads_with_ghost_fixture() { let g = with_ghost(); assert_eq!(g.heads_with_null(["e", "g"], &NULL), set(["e", "g"])); assert_eq!(g.heads_with_null(["a", "c"], &NULL), set(["a", "c"])); assert_eq!(g.heads_with_null(["a", "g"], &NULL), set(["a", "g"])); assert_eq!(g.heads_with_null(["f", "g"], &NULL), set(["f", "g"])); assert_eq!(g.heads_with_null(["c", "g"], &NULL), set(["c"])); assert_eq!(g.heads_with_null(["c", "b", "d", "g"], &NULL), set(["c"])); assert_eq!( g.heads_with_null(["a", "c", "e", "g"], &NULL), set(["a", "c"]) ); assert_eq!(g.heads_with_null(["a", "c", "f"], &NULL), set(["a", "c"])); } #[test] fn test_filter_candidate_lca() { // Corner case from Python: // NULL // / \ // a e // | | // b d // \ / // c // `a`'s descendant is `c`; `e`'s descendant is also `c`. So // heads([a, c, e]) should be just {c}. let g = make(&[ ("c", &["b", "d"]), ("d", &["e"]), ("b", &["a"]), ("a", &[NULL]), ("e", &[NULL]), ]); assert_eq!(g.heads_with_null(["a", "c", "e"], &NULL), set(["c"])); } #[test] fn test_iter_topo_order_ancestry_1() { let g = ancestry_1(); let order = g.iter_topo_order(["rev2a", "rev3", "rev1"]).unwrap(); let pos = |k: &&str| order.iter().position(|n| n == k).unwrap(); assert_eq!( order.iter().cloned().collect::>(), set(["rev1", "rev2a", "rev3"]) ); assert!(pos(&"rev2a") > pos(&"rev1")); assert!(pos(&"rev2a") < pos(&"rev3")); } #[test] fn test_iter_ancestry_boundary() { let g = with_ghost(); // `a` is not in the ancestry of `c`; everything else is. let anc = g.iter_ancestry(["c"]); let keys: FxHashSet<&'static str> = anc.iter().map(|(k, _)| *k).collect(); assert!(!keys.contains(&"a")); assert!(keys.contains(&"c")); assert!(keys.contains(&"b")); assert!(keys.contains(&"d")); assert!(keys.contains(&"e")); assert!(keys.contains(&"f")); } #[test] fn test_iter_ancestry_with_ghost_reports_none() { let g = with_ghost(); // `g` is a ghost (present as parent of `d` but not as key). // iter_ancestry should yield it with Parents::Ghost. let anc = g.iter_ancestry(["a", "c"]); let mut ghost_seen = false; for (k, parents) in &anc { if *k == "g" { ghost_seen = true; assert!(matches!(parents, Parents::Ghost)); } } assert!(ghost_seen, "ghost `g` should appear in iter_ancestry"); } #[test] fn test_find_lefthand_merger_rev2b() { // In ancestry_1, rev4 merged rev2b (rev4 has parents [rev3, rev2b]). // Walking rev4's lefthand ancestry from rev2b: rev4 is the merger. let g = ancestry_1(); assert_eq!(g.find_lefthand_merger("rev2b", "rev4"), Some("rev4")); } #[test] fn test_find_lefthand_merger_rev2a() { // rev2a is itself a lefthand ancestor of rev4 (via rev3), so it's // its own "merger". let g = ancestry_1(); assert_eq!(g.find_lefthand_merger("rev2a", "rev4"), Some("rev2a")); } #[test] fn test_find_lefthand_merger_rev4_not_ancestor() { // rev4 is a descendant of rev2a, not an ancestor. let g = ancestry_1(); assert_eq!(g.find_lefthand_merger("rev4", "rev2a"), None); } #[test] fn test_unique_lca_recursive_ancestry_1() { // In ancestry_1, rev1 is the unique LCA of rev2a and rev2b. let g = ancestry_1(); let (key, steps) = g.find_unique_lca("rev2a", "rev2b", &NULL).unwrap(); assert_eq!(key, "rev1"); assert_eq!(steps, 1); } #[test] fn test_unique_lca_no_common_ancestor() { // Two disjoint ancestries share only NULL_REVISION as a common // ancestor. find_unique_lca returns NULL (never errors). let g = ancestry_2(); let (key, _steps) = g.find_unique_lca("rev4a", "rev1b", &NULL).unwrap(); assert_eq!(key, NULL); } #[test] fn test_unique_ancestors_racing_shortcuts() { let g = racing_shortcuts(); assert_eq!(g.find_unique_ancestors("z", ["y"]), set(["p", "q", "z"])); assert_eq!( g.find_unique_ancestors("j", ["z"]), set(["h", "i", "j", "y"]) ); } #[test] fn test_find_distance_to_null_ancestry_1() { let g = ancestry_1(); assert_eq!( g.find_distance_to_null(NULL, std::iter::empty(), NULL) .unwrap(), 0 ); assert_eq!( g.find_distance_to_null("rev1", std::iter::empty(), NULL) .unwrap(), 1 ); assert_eq!( g.find_distance_to_null("rev2a", std::iter::empty(), NULL) .unwrap(), 2 ); assert_eq!( g.find_distance_to_null("rev2b", std::iter::empty(), NULL) .unwrap(), 2 ); assert_eq!( g.find_distance_to_null("rev3", std::iter::empty(), NULL) .unwrap(), 3 ); assert_eq!( g.find_distance_to_null("rev4", std::iter::empty(), NULL) .unwrap(), 4 ); } #[test] fn test_find_lefthand_distances_ghosts() { let g = make(&[("nonghost", &[NULL]), ("toghost", &["ghost"])]); let d = g.find_lefthand_distances(vec!["nonghost", "toghost"], NULL); assert_eq!(d.get(&"nonghost"), Some(&1)); // Ghosts are reported as distance -1. assert_eq!(d.get(&"toghost"), Some(&-1)); } #[test] fn test_find_lefthand_distances_smoke() { let g = make(&[ ("rev1", &[NULL]), ("rev2a", &["rev1"]), ("rev2b", &["rev1"]), ("rev2c", &["rev1"]), ("rev3a", &["rev2a", "rev2b"]), ("rev3b", &["rev2b", "rev2c"]), ]); let d = g.find_lefthand_distances(vec!["rev3b", "rev2a"], NULL); assert_eq!(d.get(&"rev2a"), Some(&2)); assert_eq!(d.get(&"rev3b"), Some(&3)); } #[test] fn test_get_child_map_ancestry_1() { let g = ancestry_1(); let cm = g.get_child_map(vec!["rev4", "rev3", "rev2a", "rev2b"]); assert_eq!(cm.get(&"rev1"), Some(&vec!["rev2a", "rev2b"])); assert_eq!(cm.get(&"rev2a"), Some(&vec!["rev3"])); assert_eq!(cm.get(&"rev2b"), Some(&vec!["rev4"])); assert_eq!(cm.get(&"rev3"), Some(&vec!["rev4"])); } } vcs-graph-3.5.0/src/known_graph.rs000064400000000000000000001166351046102023000152110ustar 00000000000000//! KnownGraph: graph algorithms that assume the full ancestry is already loaded. //! //! Ported from `vcsgraph/known_graph.py`. use crate::tsort::MergeSorter; use crate::{Error, RevnoVec}; use rustc_hash::{FxHashMap, FxHashSet}; use std::cmp::Reverse; use std::collections::{BinaryHeap, HashMap, VecDeque}; use std::hash::Hash; /// A key that may either be a real node or the synthetic "origin" sentinel /// (equivalent to `NULL_REVISION` in the Python implementation). /// /// Only used by [`KnownGraph::heads`], which has special semantics for the /// origin: it is only considered a head when it is the sole candidate. #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Key { Origin, Node(K), } #[derive(Debug, Clone)] struct KnownGraphNode { parent_keys: Option>, child_keys: Vec, gdfo: Option, } /// Produce a Vec of `items` ordered by hash of each element. Used as a stable /// (within one process) cache key for unordered sets when `K: Ord` is not /// required. fn sort_by_hash>(items: I) -> Vec { use std::collections::hash_map::DefaultHasher; use std::hash::Hasher; let hash_of = |k: &K| { let mut h = DefaultHasher::new(); k.hash(&mut h); h.finish() }; let mut v: Vec = items.into_iter().collect(); v.sort_by_key(hash_of); v } impl KnownGraphNode { fn new(parent_keys: Option>) -> Self { KnownGraphNode { parent_keys, child_keys: Vec::new(), gdfo: None, } } fn is_ghost(&self) -> bool { self.parent_keys.is_none() } } /// Information about a node in a merge-sorted graph. #[derive(Debug, Clone, PartialEq, Eq)] pub struct MergeSortNode { pub key: K, pub merge_depth: usize, pub revno: RevnoVec, pub end_of_merge: bool, } /// A graph where the full ancestry is already known. /// /// Supports gdfo-based queries like [`heads`](Self::heads), plus various /// topological orderings. #[derive(Debug, Clone)] pub struct KnownGraph { nodes: FxHashMap>, known_heads: FxHashMap, FxHashSet>, do_cache: bool, } impl KnownGraph { /// Build a new KnownGraph from a parent map. pub fn new(parent_map: I, do_cache: bool) -> Self where I: IntoIterator)>, { let iter = parent_map.into_iter(); // Use the lower bound of size_hint to pre-allocate when the caller // passes a sized iterator (HashMap, Vec, etc). Ghosts will grow the // map a little beyond this. let cap = iter.size_hint().0; let mut g = KnownGraph { nodes: HashMap::with_capacity_and_hasher(cap, Default::default()), known_heads: FxHashMap::default(), do_cache, }; g.initialize_nodes(iter); g.find_gdfo(); g } fn initialize_nodes(&mut self, parent_map: I) where I: IntoIterator)>, { for (key, parent_keys) in parent_map { // Ensure all parent nodes exist and record the reverse edge. for parent_key in &parent_keys { self.nodes .entry(parent_key.clone()) .or_insert_with(|| KnownGraphNode::new(None)) .child_keys .push(key.clone()); } // Insert or update the node itself. let node = self .nodes .entry(key) .or_insert_with(|| KnownGraphNode::new(None)); node.parent_keys = Some(parent_keys); } } fn find_tails(&self) -> Vec { // A "tail" has no parents — either a real root (Some(empty)) or a // ghost (None). Both kinds are treated as gdfo=1 starting points, // matching the Python `not node.parent_keys` check. self.nodes .iter() .filter_map(|(k, n)| match &n.parent_keys { Some(p) if p.is_empty() => Some(k.clone()), None => Some(k.clone()), _ => None, }) .collect() } fn find_tips(&self) -> Vec { self.nodes .iter() .filter_map(|(k, n)| { if n.child_keys.is_empty() { Some(k.clone()) } else { None } }) .collect() } fn find_gdfo(&mut self) { let mut known_parent_gdfos: FxHashMap = FxHashMap::default(); let mut pending: Vec = Vec::new(); for key in self.find_tails() { self.nodes.get_mut(&key).unwrap().gdfo = Some(1); pending.push(key); } while let Some(node_key) = pending.pop() { let node_gdfo = self.nodes[&node_key].gdfo.unwrap(); let child_keys = self.nodes[&node_key].child_keys.clone(); for child_key in child_keys { let (known_gdfo, present) = match known_parent_gdfos.get(&child_key) { Some(v) => (*v + 1, true), None => (1, false), }; let child = self.nodes.get_mut(&child_key).unwrap(); let new_gdfo = node_gdfo + 1; if child.gdfo.is_none_or(|g| new_gdfo > g) { child.gdfo = Some(new_gdfo); } let parent_len = child.parent_keys.as_ref().map(|p| p.len()).unwrap_or(0); if known_gdfo == parent_len { pending.push(child_key.clone()); if present { known_parent_gdfos.remove(&child_key); } } else { known_parent_gdfos.insert(child_key, known_gdfo); } } } } /// Return the parent keys for `key`. Returns `None` if `key` is a ghost, /// and an error-equivalent `None` lookup via `contains_key` otherwise. /// /// Matches the Python semantics: `None` means ghost, missing key would /// raise `KeyError` in Python — here the caller should check /// [`contains`](Self::contains) if disambiguation is needed. pub fn get_parent_keys(&self, key: &K) -> Option<&[K]> { self.nodes.get(key)?.parent_keys.as_deref() } /// Return the child keys for `key`. Returns an empty slice for tips. pub fn get_child_keys(&self, key: &K) -> Option<&[K]> { self.nodes.get(key).map(|n| n.child_keys.as_slice()) } /// Return whether `key` is present in the graph at all (including ghosts). pub fn contains(&self, key: &K) -> bool { self.nodes.contains_key(key) } /// Return the number of nodes in the graph (including ghosts). pub fn len(&self) -> usize { self.nodes.len() } /// Return whether the graph is empty. pub fn is_empty(&self) -> bool { self.nodes.is_empty() } /// Iterate over all node keys in the graph (including ghosts). pub fn keys(&self) -> impl Iterator { self.nodes.keys() } /// Return the gdfo (greatest distance from origin) of `key`, if known. pub fn gdfo(&self, key: &K) -> Option { self.nodes.get(key).and_then(|n| n.gdfo) } /// Add a new node to the graph, possibly filling in a ghost. pub fn add_node(&mut self, key: K, parent_keys: Vec) -> Result<(), Error> { // Validate against existing state, then ensure the node exists with // its parents recorded. We hold off on inserting parents into the // graph until after this match, so the borrow of `existing` ends. match self.nodes.get_mut(&key) { Some(existing) => match &existing.parent_keys { Some(existing_parents) if existing_parents == &parent_keys => return Ok(()), Some(existing_parents) => { return Err(Error::ParentMismatch { expected: existing_parents.clone(), actual: parent_keys, key, }); } None => { // Filling in a ghost: the heads cache is no longer // trustworthy. existing.parent_keys = Some(parent_keys.clone()); self.known_heads.clear(); } }, None => { self.nodes .insert(key.clone(), KnownGraphNode::new(Some(parent_keys.clone()))); } } let mut parent_gdfo: u64 = 0; for parent_key in &parent_keys { let parent_node = self.nodes.entry(parent_key.clone()).or_insert_with(|| { let mut n = KnownGraphNode::new(None); // Ghosts and roots have gdfo 1. n.gdfo = Some(1); n }); if let Some(g) = parent_node.gdfo { parent_gdfo = parent_gdfo.max(g); } parent_node.child_keys.push(key.clone()); } self.nodes.get_mut(&key).unwrap().gdfo = Some(parent_gdfo + 1); // Propagate gdfo updates to descendants (BFS). let mut pending: VecDeque = VecDeque::new(); pending.push_back(key); while let Some(node_key) = pending.pop_front() { let next_gdfo = self.nodes[&node_key].gdfo.unwrap() + 1; let child_keys = self.nodes[&node_key].child_keys.clone(); for child_key in child_keys { let child = self.nodes.get_mut(&child_key).unwrap(); if child.gdfo.is_none_or(|g| g < next_gdfo) { child.gdfo = Some(next_gdfo); pending.push_back(child_key); } } } Ok(()) } /// Return the heads from amongst `keys`. /// /// Any key reachable from another key is filtered out. This method is /// sentinel-free on the core; the caller handles origin semantics by /// wrapping `K` in [`Key`] and calling [`heads_with_origin`]. /// /// All keys in `candidates` must be present in the graph (not ghosts). pub fn heads(&mut self, candidates: I) -> FxHashSet where I: IntoIterator, { let candidates: FxHashSet = candidates.into_iter().collect(); if candidates.len() < 2 { return candidates; } // Build a process-stable cache key by sorting candidates by their // hash. Hash collisions in the comparator just produce non-unique // orderings; the resulting Vec still uniquely identifies the input // set within a single process (different sets differ in length or // contents). We can't use BTreeSet here because K is not required // to be Ord. let heads_cache_key = sort_by_hash(candidates.iter().cloned()); if let Some(cached) = self.known_heads.get(&heads_cache_key) { return cached.clone(); } let mut seen: FxHashSet = FxHashSet::default(); let mut pending: Vec = Vec::new(); let mut min_gdfo: Option = None; for key in &candidates { let node = &self.nodes[key]; if let Some(parents) = &node.parent_keys { pending.extend(parents.iter().cloned()); } if let Some(g) = node.gdfo { min_gdfo = Some(min_gdfo.map_or(g, |m| m.min(g))); } } let min_gdfo = min_gdfo.unwrap_or(0); while let Some(node_key) = pending.pop() { if !seen.insert(node_key.clone()) { continue; } let node = &self.nodes[&node_key]; if node.gdfo.is_some_and(|g| g <= min_gdfo) { continue; } if let Some(parents) = &node.parent_keys { pending.extend(parents.iter().cloned()); } } let heads: FxHashSet = candidates.difference(&seen).cloned().collect(); if self.do_cache { self.known_heads.insert(heads_cache_key, heads.clone()); } heads } /// Return the nodes of the graph in topological order (parents first). /// /// Errors with [`Error::Cycle`] if the graph is not fully connected via /// gdfo (i.e. contains a cycle). pub fn topo_sort(&self) -> Result, Error> { let unreachable: Vec = self .nodes .iter() .filter(|(_, n)| n.gdfo.is_none()) .map(|(k, _)| k.clone()) .collect(); if !unreachable.is_empty() { return Err(Error::Cycle(unreachable)); } let mut pending = self.find_tails(); let mut num_seen_parents: FxHashMap = self.nodes.keys().map(|k| (k.clone(), 0)).collect(); let mut topo_order: Vec = Vec::with_capacity(self.nodes.len()); while let Some(node_key) = pending.pop() { let node = &self.nodes[&node_key]; // Skip ghosts in the output (matches Python behavior). if !node.is_ghost() { topo_order.push(node_key.clone()); } let child_keys = node.child_keys.clone(); for child_key in child_keys { let child = &self.nodes[&child_key]; let seen_parents = num_seen_parents[&child_key] + 1; let parent_len = child.parent_keys.as_ref().map(|p| p.len()).unwrap_or(0); if seen_parents == parent_len { pending.push(child_key.clone()); num_seen_parents.remove(&child_key); } else { num_seen_parents.insert(child_key, seen_parents); } } } Ok(topo_order) } /// Return a reverse topological ordering grouped by prefix. /// /// `prefix_of` maps each key to its prefix bucket. Within each bucket the /// ordering is lexicographic (by `K: Ord`), which mirrors Python's use of /// tuple/bytes ordering there. Ghost nodes are skipped in the output. pub fn gc_sort(&self, mut prefix_of: P) -> Vec where K: Ord, P: FnMut(&K) -> PFX, PFX: Ord + Hash, { let mut prefix_tips: FxHashMap> = FxHashMap::default(); for key in self.find_tips() { prefix_tips.entry(prefix_of(&key)).or_default().push(key); } let mut num_seen_children: FxHashMap = self.nodes.keys().map(|k| (k.clone(), 0)).collect(); let mut prefix_list: Vec<(PFX, Vec)> = prefix_tips.into_iter().collect(); prefix_list.sort_by(|a, b| a.0.cmp(&b.0)); let mut result: Vec = Vec::with_capacity(self.nodes.len()); for (_prefix, tips) in prefix_list { // A min-heap (via Reverse) keeps the next-smallest key at the top // in O(log n), instead of re-sorting the pending vector after // every parent insertion. let mut pending: BinaryHeap> = tips.into_iter().map(Reverse).collect(); while let Some(Reverse(node_key)) = pending.pop() { let node = &self.nodes[&node_key]; if node.is_ghost() { continue; } let parent_keys = node.parent_keys.as_deref().unwrap_or(&[]); for parent_key in parent_keys { let parent_node = &self.nodes[parent_key]; let seen_children = num_seen_children[parent_key] + 1; if seen_children == parent_node.child_keys.len() { pending.push(Reverse(parent_key.clone())); num_seen_children.remove(parent_key); } else { num_seen_children.insert(parent_key.clone(), seen_children); } } result.push(node_key); } } result } } impl KnownGraph { /// Merge-sort the graph starting from `tip_key`. /// /// Requires `K: Debug` because the underlying [`MergeSorter`] does. pub fn merge_sort(&self, tip_key: K) -> Result>, Error> { let as_parent_map: HashMap> = self .nodes .iter() .filter_map(|(k, n)| n.parent_keys.as_ref().map(|p| (k.clone(), p.clone()))) .collect(); MergeSorter::new(as_parent_map, Some(tip_key), None, true) .map(|item| { item.map(|(_, key, merge_depth, revno, end_of_merge)| MergeSortNode { key, merge_depth, revno: revno.unwrap_or_default(), end_of_merge, }) }) .collect() } } impl KnownGraph> { /// `heads()` variant that implements the Python `NULL_REVISION` filter: /// [`Key::Origin`] is only a head if it is the sole candidate. pub fn heads_with_origin(&mut self, candidates: I) -> FxHashSet> where I: IntoIterator>, { let mut candidates: FxHashSet> = candidates.into_iter().collect(); if candidates.contains(&Key::Origin) { candidates.remove(&Key::Origin); if candidates.is_empty() { let mut r = FxHashSet::default(); r.insert(Key::Origin); return r; } } self.heads(candidates) } } #[cfg(test)] mod tests { use super::*; fn make(edges: &[(&'static str, &[&'static str])]) -> KnownGraph<&'static str> { let pm = edges.iter().map(|(k, ps)| (*k, ps.to_vec())); KnownGraph::new(pm, true) } #[test] fn gdfo_linear() { // a -> b -> c let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["b"])]); assert_eq!(g.gdfo(&"a"), Some(1)); assert_eq!(g.gdfo(&"b"), Some(2)); assert_eq!(g.gdfo(&"c"), Some(3)); } #[test] fn gdfo_diamond() { // a // / \ // b c // \ / // d let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["b", "c"])]); assert_eq!(g.gdfo(&"a"), Some(1)); assert_eq!(g.gdfo(&"b"), Some(2)); assert_eq!(g.gdfo(&"c"), Some(2)); assert_eq!(g.gdfo(&"d"), Some(3)); } #[test] fn heads_trivial() { let mut g = make(&[("a", &[]), ("b", &["a"])]); let h = g.heads(vec!["a", "b"]); let expected: FxHashSet<_> = ["b"].iter().copied().collect(); assert_eq!(h, expected); } #[test] fn heads_diamond() { let mut g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["b", "c"])]); let h = g.heads(vec!["b", "c"]); let expected: FxHashSet<_> = ["b", "c"].iter().copied().collect(); assert_eq!(h, expected); let h2 = g.heads(vec!["a", "d"]); let expected2: FxHashSet<_> = ["d"].iter().copied().collect(); assert_eq!(h2, expected2); } #[test] fn heads_with_origin_only() { let mut g: KnownGraph> = KnownGraph::new(vec![(Key::Node("a"), vec![Key::Origin])], true); let h = g.heads_with_origin(vec![Key::Origin]); assert_eq!(h.len(), 1); assert!(h.contains(&Key::Origin)); } #[test] fn heads_with_origin_ignored() { let mut g: KnownGraph> = KnownGraph::new(vec![(Key::Node("a"), vec![Key::Origin])], true); let h = g.heads_with_origin(vec![Key::Origin, Key::Node("a")]); let expected: FxHashSet<_> = [Key::Node("a")].iter().cloned().collect(); assert_eq!(h, expected); } #[test] fn topo_sort_basic() { let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["b", "c"])]); let order = g.topo_sort().unwrap(); // a must come before b, c; b, c must come before d. let pos = |x: &&str| order.iter().position(|n| n == x).unwrap(); assert!(pos(&"a") < pos(&"b")); assert!(pos(&"a") < pos(&"c")); assert!(pos(&"b") < pos(&"d")); assert!(pos(&"c") < pos(&"d")); } #[test] fn add_node_fills_ghost() { // Start with b having ghost parent a. let mut g = make(&[("b", &["a"])]); // a is a ghost: present with None parents. assert!(g.get_parent_keys(&"a").is_none()); g.add_node("a", vec![]).unwrap(); assert_eq!(g.get_parent_keys(&"a"), Some(&[][..])); assert_eq!(g.gdfo(&"a"), Some(1)); assert_eq!(g.gdfo(&"b"), Some(2)); } #[test] fn add_node_duplicate_ok() { let mut g = make(&[("a", &[]), ("b", &["a"])]); g.add_node("b", vec!["a"]).unwrap(); } #[test] fn add_node_mismatch_errors() { let mut g = make(&[("a", &[]), ("b", &["a"])]); let r = g.add_node("b", vec![]); assert!(matches!(r, Err(Error::ParentMismatch { .. }))); } #[test] fn merge_sort_simple() { // a -> b -> c, linear let g = make(&[("a", &[]), ("b", &["a"]), ("c", &["b"])]); let ms = g.merge_sort("c").unwrap(); let keys: Vec<_> = ms.iter().map(|n| n.key).collect(); assert_eq!(keys, vec!["c", "b", "a"]); } /// Shared fixtures mirrored from `vcsgraph/tests/test_graph.py`. const NULL: &str = "null:"; fn ancestry_1() -> KnownGraph<&'static str> { make(&[ ("rev1", &[NULL]), ("rev2a", &["rev1"]), ("rev2b", &["rev1"]), ("rev3", &["rev2a"]), ("rev4", &["rev3", "rev2b"]), ]) } fn feature_branch() -> KnownGraph<&'static str> { make(&[ ("rev1", &[NULL]), ("rev2b", &["rev1"]), ("rev3b", &["rev2b"]), ]) } fn extended_history_shortcut() -> KnownGraph<&'static str> { make(&[ ("a", &[NULL]), ("b", &["a"]), ("c", &["b"]), ("d", &["c"]), ("e", &["d"]), ("f", &["a", "d"]), ]) } fn with_ghost() -> KnownGraph<&'static str> { // A graph with a ghost at `g`. make(&[ ("a", &["b"]), ("c", &["b", "d"]), ("b", &["e"]), ("d", &["e", "g"]), ("e", &["f"]), ("f", &[NULL]), (NULL, &[]), ]) } fn criss_cross() -> KnownGraph<&'static str> { make(&[ ("rev1", &[NULL]), ("rev2a", &["rev1"]), ("rev2b", &["rev1"]), ("rev3a", &["rev2a", "rev2b"]), ("rev3b", &["rev2b", "rev2a"]), ]) } fn history_shortcut() -> KnownGraph<&'static str> { make(&[ ("rev1", &[NULL]), ("rev2a", &["rev1"]), ("rev2b", &["rev1"]), ("rev2c", &["rev1"]), ("rev3a", &["rev2a", "rev2b"]), ("rev3b", &["rev2b", "rev2c"]), ]) } /// Equivalent of Python's `alt_merge` fixture. fn alt_merge() -> KnownGraph<&'static str> { make(&[("a", &[]), ("b", &["a"]), ("c", &["b"]), ("d", &["a", "c"])]) } fn set(xs: [&'static str; N]) -> FxHashSet<&'static str> { xs.into_iter().collect() } #[test] fn test_children_ancestry1() { let g = ancestry_1(); assert_eq!(g.get_child_keys(&NULL), Some(&["rev1"][..])); let mut rev1_children: Vec<_> = g.get_child_keys(&"rev1").unwrap().to_vec(); rev1_children.sort(); assert_eq!(rev1_children, vec!["rev2a", "rev2b"]); assert_eq!(g.get_child_keys(&"rev2a"), Some(&["rev3"][..])); assert_eq!(g.get_child_keys(&"rev3"), Some(&["rev4"][..])); assert_eq!(g.get_child_keys(&"rev2b"), Some(&["rev4"][..])); assert_eq!(g.get_child_keys(&"not_in_graph"), None); } #[test] fn test_parent_ancestry1() { let g = ancestry_1(); assert_eq!(g.get_parent_keys(&"rev1"), Some(&[NULL][..])); assert_eq!(g.get_parent_keys(&"rev2a"), Some(&["rev1"][..])); assert_eq!(g.get_parent_keys(&"rev2b"), Some(&["rev1"][..])); assert_eq!(g.get_parent_keys(&"rev3"), Some(&["rev2a"][..])); let mut rev4_parents: Vec<_> = g.get_parent_keys(&"rev4").unwrap().to_vec(); rev4_parents.sort(); assert_eq!(rev4_parents, vec!["rev2b", "rev3"]); } #[test] fn test_parent_with_ghost() { // In `with_ghost`, "g" is a ghost: present as a parent of `d` but // has no parent_keys of its own. let g = with_ghost(); assert_eq!(g.get_parent_keys(&"g"), None); } #[test] fn test_gdfo_ancestry_1() { let g = ancestry_1(); assert_eq!(g.gdfo(&"rev1"), Some(2)); assert_eq!(g.gdfo(&"rev2a"), Some(3)); assert_eq!(g.gdfo(&"rev2b"), Some(3)); assert_eq!(g.gdfo(&"rev3"), Some(4)); assert_eq!(g.gdfo(&"rev4"), Some(5)); } #[test] fn test_gdfo_feature_branch() { let g = feature_branch(); assert_eq!(g.gdfo(&"rev1"), Some(2)); assert_eq!(g.gdfo(&"rev2b"), Some(3)); assert_eq!(g.gdfo(&"rev3b"), Some(4)); } #[test] fn test_gdfo_extended_history_shortcut() { let g = extended_history_shortcut(); assert_eq!(g.gdfo(&"a"), Some(2)); assert_eq!(g.gdfo(&"b"), Some(3)); assert_eq!(g.gdfo(&"c"), Some(4)); assert_eq!(g.gdfo(&"d"), Some(5)); assert_eq!(g.gdfo(&"e"), Some(6)); assert_eq!(g.gdfo(&"f"), Some(6)); } #[test] fn test_gdfo_with_ghost() { let g = with_ghost(); assert_eq!(g.gdfo(&"f"), Some(2)); assert_eq!(g.gdfo(&"e"), Some(3)); assert_eq!(g.gdfo(&"g"), Some(1)); assert_eq!(g.gdfo(&"b"), Some(4)); assert_eq!(g.gdfo(&"d"), Some(4)); assert_eq!(g.gdfo(&"a"), Some(5)); assert_eq!(g.gdfo(&"c"), Some(5)); } #[test] fn test_add_existing_node_noop() { let mut g = ancestry_1(); assert_eq!(g.gdfo(&"rev4"), Some(5)); g.add_node("rev4", vec!["rev3", "rev2b"]).unwrap(); assert_eq!(g.gdfo(&"rev4"), Some(5)); } #[test] fn test_add_existing_node_mismatched_parents() { let mut g = ancestry_1(); let r = g.add_node("rev4", vec!["rev2b", "rev3"]); assert!(matches!(r, Err(Error::ParentMismatch { .. }))); } #[test] fn test_add_node_with_ghost_parent() { let mut g = ancestry_1(); g.add_node("rev5", vec!["rev2b", "revGhost"]).unwrap(); assert_eq!(g.gdfo(&"rev5"), Some(4)); assert_eq!(g.gdfo(&"revGhost"), Some(1)); } #[test] fn test_add_new_root() { let mut g = ancestry_1(); g.add_node("rev5", vec![]).unwrap(); assert_eq!(g.gdfo(&"rev5"), Some(1)); } #[test] fn test_add_with_all_ghost_parents() { let mut g = ancestry_1(); g.add_node("rev5", vec!["ghost"]).unwrap(); assert_eq!(g.gdfo(&"rev5"), Some(2)); assert_eq!(g.gdfo(&"ghost"), Some(1)); } #[test] fn test_gdfo_after_add_node() { let mut g = ancestry_1(); assert_eq!(g.get_child_keys(&"rev4"), Some(&[][..])); g.add_node("rev5", vec!["rev4"]).unwrap(); assert_eq!(g.get_parent_keys(&"rev5"), Some(&["rev4"][..])); assert_eq!(g.get_child_keys(&"rev4"), Some(&["rev5"][..])); assert_eq!(g.get_child_keys(&"rev5"), Some(&[][..])); assert_eq!(g.gdfo(&"rev5"), Some(6)); g.add_node("rev6", vec!["rev2b"]).unwrap(); g.add_node("rev7", vec!["rev6"]).unwrap(); g.add_node("rev8", vec!["rev7", "rev5"]).unwrap(); assert_eq!(g.gdfo(&"rev5"), Some(6)); assert_eq!(g.gdfo(&"rev6"), Some(4)); assert_eq!(g.gdfo(&"rev7"), Some(5)); assert_eq!(g.gdfo(&"rev8"), Some(7)); } #[test] fn test_fill_in_ghost() { // Add a few new roots, then fill in the ghost `g` so the // children's gdfos get renumbered. let mut g = with_ghost(); g.add_node("x", vec![]).unwrap(); g.add_node("y", vec!["x"]).unwrap(); g.add_node("z", vec!["y"]).unwrap(); g.add_node("g", vec!["z"]).unwrap(); assert_eq!(g.gdfo(&"f"), Some(2)); assert_eq!(g.gdfo(&"e"), Some(3)); assert_eq!(g.gdfo(&"x"), Some(1)); assert_eq!(g.gdfo(&"y"), Some(2)); assert_eq!(g.gdfo(&"z"), Some(3)); assert_eq!(g.gdfo(&"g"), Some(4)); assert_eq!(g.gdfo(&"b"), Some(4)); assert_eq!(g.gdfo(&"d"), Some(5)); assert_eq!(g.gdfo(&"a"), Some(5)); assert_eq!(g.gdfo(&"c"), Some(6)); } /// Rust-side `heads()` is sentinel-free; callers are expected to filter /// NULL themselves. These tests use the core method directly (not /// `heads_with_origin`) and only test non-null cases — NULL-filtering /// semantics are covered by existing `heads_with_origin_*` tests. #[test] fn test_heads_one_non_null() { let mut g = ancestry_1(); for key in ["rev1", "rev2a", "rev2b", "rev3", "rev4"] { assert_eq!(g.heads(vec![key]), set([key])); } } #[test] fn test_heads_single_from_ancestry_1() { let mut g = ancestry_1(); assert_eq!(g.heads(vec!["rev1", "rev2a"]), set(["rev2a"])); assert_eq!(g.heads(vec!["rev1", "rev2b"]), set(["rev2b"])); assert_eq!(g.heads(vec!["rev1", "rev3"]), set(["rev3"])); assert_eq!(g.heads(vec!["rev3", "rev2a"]), set(["rev3"])); assert_eq!(g.heads(vec!["rev1", "rev4"]), set(["rev4"])); assert_eq!(g.heads(vec!["rev2a", "rev4"]), set(["rev4"])); assert_eq!(g.heads(vec!["rev2b", "rev4"]), set(["rev4"])); assert_eq!(g.heads(vec!["rev3", "rev4"]), set(["rev4"])); } #[test] fn test_heads_two_heads_from_ancestry_1() { let mut g = ancestry_1(); assert_eq!(g.heads(vec!["rev2a", "rev2b"]), set(["rev2a", "rev2b"])); assert_eq!(g.heads(vec!["rev3", "rev2b"]), set(["rev3", "rev2b"])); } #[test] fn test_heads_criss_cross_fixture() { let mut g = criss_cross(); assert_eq!(g.heads(vec!["rev2a", "rev1"]), set(["rev2a"])); assert_eq!(g.heads(vec!["rev2b", "rev1"]), set(["rev2b"])); assert_eq!(g.heads(vec!["rev3a", "rev1"]), set(["rev3a"])); assert_eq!(g.heads(vec!["rev3b", "rev1"]), set(["rev3b"])); assert_eq!(g.heads(vec!["rev2a", "rev2b"]), set(["rev2a", "rev2b"])); assert_eq!(g.heads(vec!["rev3a", "rev2a"]), set(["rev3a"])); assert_eq!(g.heads(vec!["rev3a", "rev2b"]), set(["rev3a"])); assert_eq!(g.heads(vec!["rev3a", "rev2a", "rev2b"]), set(["rev3a"])); assert_eq!(g.heads(vec!["rev3b", "rev2a"]), set(["rev3b"])); assert_eq!(g.heads(vec!["rev3b", "rev2b"]), set(["rev3b"])); assert_eq!(g.heads(vec!["rev3b", "rev2a", "rev2b"]), set(["rev3b"])); assert_eq!(g.heads(vec!["rev3a", "rev3b"]), set(["rev3a", "rev3b"])); assert_eq!( g.heads(vec!["rev3a", "rev3b", "rev2a", "rev2b"]), set(["rev3a", "rev3b"]) ); } #[test] fn test_heads_history_shortcut_fixture() { let mut g = history_shortcut(); assert_eq!( g.heads(vec!["rev2a", "rev2b", "rev2c"]), set(["rev2a", "rev2b", "rev2c"]) ); assert_eq!(g.heads(vec!["rev3a", "rev3b"]), set(["rev3a", "rev3b"])); assert_eq!( g.heads(vec!["rev2a", "rev3a", "rev3b"]), set(["rev3a", "rev3b"]) ); assert_eq!(g.heads(vec!["rev2a", "rev3b"]), set(["rev2a", "rev3b"])); assert_eq!(g.heads(vec!["rev2c", "rev3a"]), set(["rev2c", "rev3a"])); } #[test] fn test_heads_alt_merge() { let mut g = alt_merge(); assert_eq!(g.heads(vec!["a", "c"]), set(["c"])); } #[test] fn test_heads_with_ghost_fixture() { let mut g = with_ghost(); assert_eq!(g.heads(vec!["e", "g"]), set(["e", "g"])); assert_eq!(g.heads(vec!["a", "c"]), set(["a", "c"])); assert_eq!(g.heads(vec!["a", "g"]), set(["a", "g"])); assert_eq!(g.heads(vec!["f", "g"]), set(["f", "g"])); assert_eq!(g.heads(vec!["c", "g"]), set(["c"])); assert_eq!(g.heads(vec!["c", "b", "d", "g"]), set(["c"])); assert_eq!(g.heads(vec!["a", "c", "e", "g"]), set(["a", "c"])); assert_eq!(g.heads(vec!["a", "c", "f"]), set(["a", "c"])); } #[test] fn test_filling_in_ghosts_resets_head_cache() { let mut g = with_ghost(); assert_eq!(g.heads(vec!["e", "g"]), set(["e", "g"])); // Fill in the ghost so that `g` descends from `e`; the heads // cache must be invalidated, otherwise the second query would // return the stale result. g.add_node("g", vec!["e"]).unwrap(); assert_eq!(g.heads(vec!["e", "g"]), set(["g"])); } /// Helper: assert that `topo_sort` yields a valid topological order /// for the given parent map. fn assert_topo_sort_order(edges: &[(&'static str, &[&'static str])]) { let pm: FxHashMap<&'static str, Vec<&'static str>> = edges.iter().map(|(k, ps)| (*k, ps.to_vec())).collect(); let g = KnownGraph::new(pm.clone(), true); let result = g.topo_sort().unwrap(); assert_eq!(result.len(), pm.len()); let idx: FxHashMap<&str, usize> = result.iter().enumerate().map(|(i, k)| (*k, i)).collect(); for (node, parents) in &pm { for parent in parents { if !pm.contains_key(parent) { continue; // ghost } assert!( idx[node] > idx[parent], "parent {parent} must come before child {node}: {:?}", result ); } } } #[test] fn test_topo_sort_empty() { assert_topo_sort_order(&[]); } #[test] fn test_topo_sort_easy() { assert_topo_sort_order(&[("a", &[])]); } #[test] fn test_topo_sort_cycle_simple() { let pm = [("a", vec!["b"]), ("b", vec!["a"])]; let g = KnownGraph::new(pm, true); assert!(matches!(g.topo_sort(), Err(Error::Cycle(_)))); } #[test] fn test_topo_sort_cycle_long() { let pm = [("a", vec!["b"]), ("b", vec!["c"]), ("c", vec!["a"])]; let g = KnownGraph::new(pm, true); assert!(matches!(g.topo_sort(), Err(Error::Cycle(_)))); } #[test] fn test_topo_sort_cycle_with_tail() { let pm = [ ("a", vec!["b"]), ("b", vec!["c"]), ("c", vec!["d", "e"]), ("d", vec!["a"]), ("e", vec![]), ]; let g = KnownGraph::new(pm, true); assert!(matches!(g.topo_sort(), Err(Error::Cycle(_)))); } #[test] fn test_topo_sort_nontrivial() { assert_topo_sort_order(&[ ("a", &["d"]), ("b", &["e"]), ("c", &["b", "e"]), ("d", &[]), ("e", &["a", "d"]), ]); } #[test] fn test_topo_sort_partial() { assert_topo_sort_order(&[ ("a", &[]), ("b", &["a"]), ("c", &["a"]), ("d", &["a"]), ("e", &["b", "c", "d"]), ("f", &["b", "c"]), ("g", &["b", "c"]), ("h", &["c", "d"]), ("i", &["a", "b", "e", "f", "g"]), ]); } #[test] fn test_topo_sort_ghost_parent() { // `b` is a ghost parent of `a` (not in the map). `c`'s parent // `b` references the same. Output order must place `a` after `b` // and `b` after `c` (treating `b` as a tail since it has no // known parents in the output graph). assert_topo_sort_order(&[("a", &["b"]), ("b", &["c"])]); } /// Merge-sort assertion helper: compares against Python-shaped /// (key, merge_depth, revno, end_of_merge) tuples. fn assert_merge_sort( edges: &[(&'static str, &[&'static str])], tip: &'static str, expected: &[(&'static str, usize, &[usize], bool)], ) { let pm: FxHashMap<&'static str, Vec<&'static str>> = edges.iter().map(|(k, ps)| (*k, ps.to_vec())).collect(); let g = KnownGraph::new(pm, true); let result = g.merge_sort(tip).unwrap(); assert_eq!( result.len(), expected.len(), "length mismatch: got {:?}", result .iter() .map(|n| (n.key, n.merge_depth, n.revno.clone(), n.end_of_merge)) .collect::>() ); for (i, ((got_key, got_depth, got_eom), (exp_key, exp_depth, exp_revno, exp_eom))) in result .iter() .map(|n| (n.key, n.merge_depth, n.end_of_merge)) .zip(expected.iter()) .enumerate() { let got_revno: Vec = result[i].revno.clone().into_iter().collect(); let exp_revno_v: Vec = exp_revno.to_vec(); assert_eq!( (got_key, got_depth, got_revno.clone(), got_eom), (*exp_key, *exp_depth, exp_revno_v.clone(), *exp_eom), "row {i} mismatch" ); } } #[test] fn test_merge_sort_one_revision() { assert_merge_sort(&[("id", &[])], "id", &[("id", 0, &[1], true)]); } #[test] fn test_merge_sort_sequence_no_merges() { assert_merge_sort( &[("A", &[]), ("B", &["A"]), ("C", &["B"])], "C", &[ ("C", 0, &[3], false), ("B", 0, &[2], false), ("A", 0, &[1], true), ], ); } #[test] fn test_merge_sort_sequence_with_merges() { assert_merge_sort( &[("A", &[]), ("B", &["A"]), ("C", &["A", "B"])], "C", &[ ("C", 0, &[2], false), ("B", 1, &[1, 1, 1], true), ("A", 0, &[1], true), ], ); } #[test] fn test_merge_sort_merge_depth_with_nested_merges() { assert_merge_sort( &[ ("A", &["D", "B"]), ("B", &["C", "F"]), ("C", &["H"]), ("D", &["H", "E"]), ("E", &["G", "F"]), ("F", &["G"]), ("G", &["H"]), ("H", &[]), ], "A", &[ ("A", 0, &[3], false), ("B", 1, &[1, 3, 2], false), ("C", 1, &[1, 3, 1], true), ("D", 0, &[2], false), ("E", 1, &[1, 1, 2], false), ("F", 2, &[1, 2, 1], true), ("G", 1, &[1, 1, 1], true), ("H", 0, &[1], true), ], ); } #[test] fn test_merge_sort_end_of_merge_not_last() { assert_merge_sort( &[("A", &["B"]), ("B", &[])], "A", &[("A", 0, &[2], false), ("B", 0, &[1], true)], ); } #[test] fn test_merge_sort_parallel_roots() { assert_merge_sort( &[("A", &[]), ("B", &[]), ("C", &["A", "B"])], "C", &[ ("C", 0, &[2], false), ("B", 1, &[0, 1, 1], true), ("A", 0, &[1], true), ], ); } #[test] fn test_merge_sort_cycle_errors() { // E <- D <- C <- B, B <- D creates a cycle B-C-D-B let pm = [ ("A", vec![] as Vec<&'static str>), ("B", vec!["D"]), ("C", vec!["B"]), ("D", vec!["C"]), ("E", vec!["D"]), ]; let g = KnownGraph::new(pm, true); let r = g.merge_sort("E"); assert!(matches!(r, Err(Error::Cycle(_)))); } } vcs-graph-3.5.0/src/lib.rs000064400000000000000000000421501046102023000134300ustar 00000000000000#![allow(clippy::if_same_then_else)] /// DIAGRAM of terminology /// A /// /\ /// B C /// | |\ /// D E F /// |\/| | /// |/\|/ /// G H /// /// In this diagram, relative to G and H: /// A, B, C, D, E are common ancestors. /// C, D and E are border ancestors, because each has a non-common descendant. /// D and E are least common ancestors because none of their descendants are /// common ancestors. /// C is not a least common ancestor because its descendant, E, is a common /// ancestor. /// /// The find_unique_lca algorithm will pick A in two steps: /// 1. find_lca('G', 'H') => ['D', 'E'] /// 2. Since len(['D', 'E']) > 1, find_lca('D', 'E') => ['A'] use std::collections::{HashMap, HashSet}; use std::hash::Hash; pub mod bfs; pub use bfs::BfsState; pub mod graph; pub use graph::{Graph, GraphError}; pub mod known_graph; pub use known_graph::{Key, KnownGraph, MergeSortNode}; mod parents_provider; pub use parents_provider::{ CachingParentsProvider, DictParentsProvider, ParentsProvider, StackedParentsProvider, }; #[derive(Clone, Debug, PartialEq, Eq)] pub enum Parents { Ghost, Known(Vec), } impl Parents { pub fn is_ghost(&self) -> bool { match self { Parents::Ghost => true, Parents::Known(_) => false, } } pub fn is_known(&self) -> bool { match self { Parents::Ghost => false, Parents::Known(_) => true, } } pub fn unwrap(&self) -> Vec { match self { Parents::Ghost => panic!("unwrap called on Ghost"), Parents::Known(v) => v.clone(), } } /// Borrow the known parents as a slice without cloning. /// /// Panics if this is a `Ghost`. pub fn as_slice(&self) -> &[K] { match self { Parents::Ghost => panic!("as_slice called on Ghost"), Parents::Known(v) => v.as_slice(), } } pub fn as_ref(&self) -> Parents<&K> { match self { Parents::Ghost => Parents::Ghost, Parents::Known(v) => Parents::Known(v.iter().collect()), } } } #[cfg(feature = "pyo3")] impl<'py, K: pyo3::IntoPyObject<'py> + Clone + PartialEq + Eq> pyo3::IntoPyObject<'py> for Parents { type Target = pyo3::types::PyAny; type Output = pyo3::Bound<'py, Self::Target>; type Error = pyo3::PyErr; fn into_pyobject(self, py: pyo3::Python<'py>) -> Result { match self { Parents::Ghost => Ok(py.None().into_pyobject(py)?), Parents::Known(v) => Ok(v.into_pyobject(py)?.into_any()), } } } #[cfg(feature = "pyo3")] impl<'py, K: pyo3::conversion::FromPyObjectOwned<'py> + Clone + PartialEq + Eq> pyo3::FromPyObject<'_, 'py> for Parents { type Error = pyo3::PyErr; fn extract(obj: pyo3::Borrowed<'_, 'py, pyo3::PyAny>) -> Result { use pyo3::prelude::*; if obj.is_none() { Ok(Parents::Ghost) } else { let v = obj.extract::>()?; Ok(Parents::Known(v)) } } } #[derive(Clone, Debug, PartialEq, Eq)] pub struct ParentMap(HashMap>); impl ParentMap { pub fn new() -> Self { ParentMap(HashMap::new()) } #[inline] pub fn insert(&mut self, k: K, v: Parents) { self.0.insert(k, v); } #[inline] pub fn get(&self, k: &K) -> Option<&Parents> { self.0.get(k) } #[inline] pub fn get_key_value(&self, k: &K) -> Option<(&K, &Parents)> { self.0.get_key_value(k) } #[inline] pub fn iter(&self) -> impl Iterator)> { self.0.iter() } #[inline] pub fn contains_key(&self, k: &K) -> bool { self.0.contains_key(k) } #[inline] pub fn keys(&self) -> impl Iterator { self.0.keys() } #[inline] pub fn values(&self) -> impl Iterator> { self.0.values() } #[inline] pub fn len(&self) -> usize { self.0.len() } #[inline] pub fn remove(&mut self, k: &K) -> Option> { self.0.remove(k) } #[inline] pub fn is_empty(&self) -> bool { self.0.is_empty() } #[inline] pub fn extend(&mut self, other: ParentMap) { self.0.extend(other.0); } } impl Default for ParentMap { fn default() -> Self { Self::new() } } impl From> for HashMap> { fn from(map: ParentMap) -> Self { map.0 .into_iter() .map(|(k, v)| (k, v.unwrap())) .collect::>>() } } impl From>> for ParentMap { fn from(map: HashMap>) -> Self { ParentMap( map.into_iter() .map(|(k, v)| (k, Parents::Known(v))) .collect::>>(), ) } } impl IntoIterator for ParentMap { type Item = (K, Parents); type IntoIter = std::collections::hash_map::IntoIter>; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } #[cfg(feature = "pyo3")] impl<'py, K: pyo3::IntoPyObject<'py, Error = pyo3::PyErr> + Hash + Clone + PartialEq + Eq> pyo3::IntoPyObject<'py> for ParentMap { type Target = pyo3::types::PyDict; type Output = pyo3::Bound<'py, Self::Target>; type Error = pyo3::PyErr; fn into_pyobject(self, py: pyo3::Python<'py>) -> Result { use pyo3::prelude::*; let dict = pyo3::types::PyDict::new(py); for (k, v) in self.into_iter() { dict.set_item(k, v)?; } Ok(dict) } } #[cfg(feature = "pyo3")] impl<'py, K> pyo3::FromPyObject<'_, 'py> for ParentMap where K: for<'a> pyo3::FromPyObject<'a, 'py, Error = pyo3::PyErr> + Hash + Clone + PartialEq + Eq, { type Error = pyo3::PyErr; fn extract(obj: pyo3::Borrowed<'_, 'py, pyo3::PyAny>) -> Result { use pyo3::prelude::*; let dict = obj.cast::()?; let mut result = ParentMap::new(); for (k, v) in dict.iter() { result.insert(k.extract()?, v.extract()?); } Ok(result) } } #[derive(Clone, Debug, PartialEq, Eq)] pub struct ChildMap(HashMap>); impl Default for ChildMap { fn default() -> Self { Self::new() } } impl ChildMap { pub fn new() -> Self { ChildMap(HashMap::new()) } #[inline] pub fn insert(&mut self, k: K) { self.0.entry(k).or_default(); } #[inline] pub fn drain(&mut self) -> impl Iterator)> + '_ { self.0.drain() } #[inline] pub fn add(&mut self, k: K, v: K) { self.0.entry(k).or_default().push(v); } #[inline] pub fn iter(&self) -> impl Iterator)> { self.0.iter() } #[inline] pub fn get(&self, k: &K) -> Option<&Vec> { self.0.get(k) } #[inline] pub fn remove(&mut self, k: &K) -> Option> { self.0.remove(k) } #[inline] pub fn is_empty(&self) -> bool { self.0.is_empty() } #[inline] pub fn contains_key(&self, k: &K) -> bool { self.0.contains_key(k) } } impl std::ops::Index<&K> for ChildMap { type Output = Vec; fn index(&self, index: &K) -> &Self::Output { &self.0[index] } } impl IntoIterator for ChildMap { type Item = (K, Vec); type IntoIter = std::collections::hash_map::IntoIter>; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } #[cfg(feature = "pyo3")] impl<'py, K: pyo3::IntoPyObject<'py> + Hash + Clone + PartialEq + Eq> pyo3::IntoPyObject<'py> for ChildMap { type Target = pyo3::types::PyDict; type Output = pyo3::Bound<'py, Self::Target>; type Error = pyo3::PyErr; fn into_pyobject(self, py: pyo3::Python<'py>) -> Result { use pyo3::prelude::*; let dict = pyo3::types::PyDict::new(py); for (k, v) in self.into_iter() { dict.set_item(k, v)?; } Ok(dict) } } impl From>> for ChildMap { fn from(map: HashMap>) -> Self { ChildMap(map) } } /// Create a child map from a parent map. pub fn invert_parent_map(parent_map: &ParentMap) -> ChildMap { let mut child_map = ChildMap::new(); for (child, parents) in parent_map.iter() { if parents.is_ghost() { continue; } for p in parents.as_slice() { child_map.add(p.clone(), child.clone()); } } child_map } impl From> for ChildMap where K: Hash + Eq + Clone, { fn from(parent_map: ParentMap) -> Self { invert_parent_map(&parent_map) } } #[cfg(test)] mod invert_parent_map_tests { use super::*; use maplit::hashmap; #[test] fn test_invert() { let result = super::invert_parent_map(&ParentMap::from(hashmap! { 2 => vec![1], 3 => vec![1, 2], })); // Check node 1's children (order doesn't matter) let mut node1_children = result.get(&1).unwrap().clone(); node1_children.sort(); assert_eq!(vec![2, 3], node1_children); // Check node 2's children assert_eq!(vec![3], *result.get(&2).unwrap()); // Node 3 should have no children (may not be in the map) assert!(result.get(&3).is_none() || result.get(&3).unwrap().is_empty()); } #[test] fn test_ghost() { let result = super::invert_parent_map(&ParentMap::from(hashmap! { 2 => vec![1], 3 => vec![1, 2], })); // Check node 1's children (order doesn't matter) let mut node1_children = result.get(&1).unwrap().clone(); node1_children.sort(); assert_eq!(vec![2, 3], node1_children); // Check node 2's children assert_eq!(vec![3], *result.get(&2).unwrap()); } } /// Collapse regions of the graph that are 'linear'. /// /// For example:: /// /// A:[B], B:[C] /// /// can be collapsed by removing B and getting:: /// /// A:[C] /// /// Args: /// parent_map: A dictionary mapping children to their parents /// Returns: Another dictionary with 'linear' chains collapsed pub fn collapse_linear_regions(parent_map: &ParentMap) -> ParentMap { // Note: this isn't a strictly minimal collapse. For example: // A // / \ // B C // \ / // D // | // E // Will not have 'D' removed, even though 'E' could fit. Also: // A // | A // B => | // | C // C // A and C are both kept because they are edges of the graph. We *could* get // rid of A if we wanted. // A // / \ // B C // | | // D E // \ / // F // Will not have any nodes removed, even though you do have an // 'uninteresting' linear D->B and E->C let mut children: HashMap> = HashMap::new(); for (child, parents) in parent_map.iter() { children.entry(child.clone()).or_default(); for p in parents.as_slice() { children.entry(p.clone()).or_default().push(child.clone()); } } let mut removed = HashSet::new(); let mut result: ParentMap = parent_map.clone(); for node in parent_map.keys() { let parents = result.get(node).unwrap().as_slice(); if parents.len() != 1 { continue; } let parent_children = children.get(&parents[0]).unwrap(); if parent_children.len() != 1 { // This is not the only child continue; } let node_children = children.get(node).unwrap(); if node_children.len() != 1 { continue; } let Some(child_parents) = result.get(&node_children[0]) else { continue; }; if child_parents.as_slice().len() != 1 { // This is not its only parent continue; } // The child of this node only points at it, and the parent only has // this as a child. Remove this node and splice around it. let parents_owned = parents.to_vec(); let node_children_owned = node_children.clone(); result.remove(node); result.insert( node_children_owned[0].clone(), Parents::Known(parents_owned.clone()), ); children.insert(parents_owned[0].clone(), node_children_owned); children.remove(node); removed.insert(node); } result } pub mod tsort; #[cfg(test)] mod test; #[derive(Clone, PartialEq, Eq)] pub struct RevnoVec(Vec); impl RevnoVec { pub fn new() -> Self { RevnoVec(vec![]) } pub fn bump_last(&self) -> Self { let mut ret = self.clone(); *ret.0.last_mut().expect("bump_last on empty RevnoVec") += 1; ret } pub fn new_branch(&self, branch_count: usize) -> Self { RevnoVec::from(vec![self[0], branch_count, 1]) } } impl Default for RevnoVec { fn default() -> Self { Self::new() } } impl IntoIterator for RevnoVec { type Item = usize; type IntoIter = std::vec::IntoIter; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } impl std::ops::Index for RevnoVec { type Output = usize; fn index(&self, index: usize) -> &Self::Output { &self.0[index] } } impl std::ops::IndexMut for RevnoVec { fn index_mut(&mut self, index: usize) -> &mut Self::Output { &mut self.0[index] } } impl std::fmt::Debug for RevnoVec { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "RevnoVec({:?})", self.0) } } impl std::fmt::Display for RevnoVec { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let mut first = true; for r in self.0.iter() { if first { first = false; } else { write!(f, ".")?; } write!(f, "{}", r)?; } Ok(()) } } impl From> for RevnoVec { fn from(v: Vec) -> Self { RevnoVec(v) } } impl From for RevnoVec { fn from(v: usize) -> Self { RevnoVec(vec![v]) } } #[cfg(feature = "pyo3")] impl<'py> pyo3::IntoPyObject<'py> for RevnoVec { type Target = pyo3::types::PyTuple; type Output = pyo3::Bound<'py, Self::Target>; type Error = pyo3::PyErr; fn into_pyobject(self, py: pyo3::Python<'py>) -> Result { pyo3::types::PyTuple::new(py, self.0.iter()) } } #[cfg(feature = "pyo3")] impl<'py> pyo3::FromPyObject<'_, 'py> for RevnoVec { type Error = pyo3::PyErr; fn extract(obj: pyo3::Borrowed<'_, 'py, pyo3::PyAny>) -> Result { use pyo3::prelude::*; let tuple = obj.cast::()?; let mut ret = RevnoVec::new(); for r in tuple.iter() { ret.0.push(r.extract()?); } Ok(ret) } } #[derive(std::fmt::Debug)] pub enum Error { Cycle(Vec), ParentMismatch { key: K, expected: Vec, actual: Vec, }, } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Error::Cycle(cycle) => { write!(f, "Cycle: ")?; let mut first = true; for c in cycle.iter() { if first { first = false; } else { write!(f, " -> ")?; } write!(f, "{}", c)?; } Ok(()) } Error::ParentMismatch { key, expected, actual, } => { write!(f, "Parent mismatch for {}: ", key)?; let mut first = true; for e in expected.iter() { if first { first = false; } else { write!(f, ", ")?; } write!(f, "{}", e)?; } write!(f, " != ")?; let mut first = true; for a in actual.iter() { if first { first = false; } else { write!(f, ", ")?; } write!(f, "{}", a)?; } Ok(()) } } } } impl std::error::Error for Error {} vcs-graph-3.5.0/src/parents_provider.rs000064400000000000000000000303001046102023000162420ustar 00000000000000use crate::{ParentMap, Parents}; use rustc_hash::{FxHashMap, FxHashSet}; use std::collections::{HashMap, HashSet}; use std::hash::Hash; use std::sync::Mutex; pub trait ParentsProvider { fn get_parent_map(&self, keys: &HashSet) -> ParentMap; } pub struct StackedParentsProvider { parent_providers: Vec>>, } impl StackedParentsProvider { pub fn new(parent_providers: Vec>>) -> Self { StackedParentsProvider { parent_providers } } } impl ParentsProvider for StackedParentsProvider { fn get_parent_map(&self, keys: &HashSet) -> ParentMap { let mut found = ParentMap::new(); let mut remaining = keys.clone(); for parent_provider in self.parent_providers.iter() { if remaining.is_empty() { break; } let new_found = parent_provider.get_parent_map(&remaining); for k in new_found.keys() { remaining.remove(k); } found.extend(new_found); } found } } pub struct DictParentsProvider(ParentMap); impl From> for DictParentsProvider { fn from(parent_map: ParentMap) -> Self { DictParentsProvider(parent_map) } } impl From>> for DictParentsProvider { fn from(parent_map: HashMap>) -> Self { DictParentsProvider::new(ParentMap( parent_map .into_iter() .map(|(k, v)| (k, Parents::Known(v))) .collect(), )) } } impl DictParentsProvider { pub fn new(parent_map: ParentMap) -> Self { DictParentsProvider(parent_map) } } impl ParentsProvider for DictParentsProvider { fn get_parent_map(&self, keys: &HashSet) -> ParentMap { ParentMap( keys.iter() .filter_map(|k| self.0.get_key_value(k)) .map(|(k, v)| (k.clone(), v.clone())) .collect(), ) } } /// A parents provider which caches its lookups. /// /// Wraps an inner `ParentsProvider` and memoizes every `(key, parents)` /// pair it returns. When cache-misses-tracking is enabled, keys that were /// requested but not present in the inner provider are also remembered so /// we don't re-request them. /// /// The cache can be disabled and re-enabled at runtime; disabling clears /// the cache entirely. pub struct CachingParentsProvider> { inner: P, // Interior mutability so `get_parent_map(&self, ...)` can populate the // cache. The whole provider is still Sync (via Mutex) which matches the // base trait's `&self` contract. state: Mutex>, } struct CacheState { /// None when the cache is disabled, Some when enabled. cache: Option>>, /// Keys known to be missing from the inner provider. Only populated /// when `cache_misses` is true. missing_keys: FxHashSet, /// Whether to remember keys that aren't in the inner provider. cache_misses: bool, } impl> CachingParentsProvider { /// Create a caching wrapper around `inner`. The cache is enabled by /// default with cache-misses tracking on. pub fn new(inner: P) -> Self { CachingParentsProvider { inner, state: Mutex::new(CacheState { cache: Some(FxHashMap::default()), missing_keys: FxHashSet::default(), cache_misses: true, }), } } /// Enable the cache. Matches Python's semantics: calling this when the /// cache is already enabled is an error. `cache_misses` controls /// whether missing keys are remembered between calls. pub fn enable_cache(&self, cache_misses: bool) -> Result<(), &'static str> { let mut state = self.state.lock().unwrap(); if state.cache.is_some() { return Err("Cache enabled when already enabled."); } state.cache = Some(FxHashMap::default()); state.cache_misses = cache_misses; state.missing_keys = FxHashSet::default(); Ok(()) } /// Disable and clear the cache. pub fn disable_cache(&self) { let mut state = self.state.lock().unwrap(); state.cache = None; state.cache_misses = false; state.missing_keys = FxHashSet::default(); } /// Return a snapshot of the current cache, or `None` if disabled. pub fn get_cached_map(&self) -> Option>> { let state = self.state.lock().unwrap(); state.cache.clone() } /// Return entries from the cache without consulting the inner provider. pub fn get_cached_parent_map(&self, keys: &HashSet) -> ParentMap { let state = self.state.lock().unwrap(); let Some(cache) = state.cache.as_ref() else { return ParentMap::new(); }; ParentMap( keys.iter() .filter_map(|k| cache.get_key_value(k)) .map(|(k, v)| (k.clone(), v.clone())) .collect(), ) } /// Note that `key` was missing from the inner provider. pub fn note_missing_key(&self, key: K) { let mut state = self.state.lock().unwrap(); if state.cache_misses { state.missing_keys.insert(key); } } /// Snapshot of the missing-keys set. pub fn missing_keys(&self) -> FxHashSet { self.state.lock().unwrap().missing_keys.clone() } /// Borrow the inner provider. pub fn inner(&self) -> &P { &self.inner } } impl> ParentsProvider for CachingParentsProvider { fn get_parent_map(&self, keys: &HashSet) -> ParentMap { // Fast path: cache disabled — delegate straight to inner. { let state = self.state.lock().unwrap(); if state.cache.is_none() { drop(state); // Note: Python filters the response to only the requested // keys with non-None values; we do the same by filtering // known parents below. let pm = self.inner.get_parent_map(keys); let mut result = ParentMap::new(); for k in keys { if let Some(v) = pm.get(k) { if matches!(v, Parents::Known(_)) { result.insert(k.clone(), v.clone()); } } } return result; } } // Determine which keys we still need to fetch from the inner // provider (not in cache and not known-missing). let needed: HashSet = { let state = self.state.lock().unwrap(); let cache = state.cache.as_ref().unwrap(); keys.iter() .filter(|k| !cache.contains_key(*k) && !state.missing_keys.contains(*k)) .cloned() .collect() }; if !needed.is_empty() { let fetched = self.inner.get_parent_map(&needed); let mut state = self.state.lock().unwrap(); let cache = state.cache.as_mut().unwrap(); for (k, v) in fetched.iter() { cache.insert(k.clone(), v.clone()); } if state.cache_misses { for k in &needed { if !fetched.contains_key(k) { state.missing_keys.insert(k.clone()); } } } } // Build the response from the cache, filtering out ghosts/None the // same way Python does. let state = self.state.lock().unwrap(); let cache = state.cache.as_ref().unwrap(); let mut result = ParentMap::new(); for k in keys { if let Some(v) = cache.get(k) { if matches!(v, Parents::Known(_)) { result.insert(k.clone(), v.clone()); } } } result } } #[cfg(test)] mod tests { use super::*; use std::cell::RefCell; /// A ParentsProvider wrapper that counts how many distinct keys were /// requested across all calls to `get_parent_map`. Used to verify the /// caching wrapper avoids redundant lookups. struct CountingProvider> { inner: P, requested: RefCell>, } impl> CountingProvider { fn new(inner: P) -> Self { CountingProvider { inner, requested: RefCell::new(Vec::new()), } } } impl> ParentsProvider for CountingProvider { fn get_parent_map(&self, keys: &HashSet) -> ParentMap { self.requested.borrow_mut().extend(keys.iter().cloned()); self.inner.get_parent_map(keys) } } fn dict(edges: &[(&'static str, &[&'static str])]) -> DictParentsProvider<&'static str> { let map: HashMap<&'static str, Vec<&'static str>> = edges.iter().map(|(k, ps)| (*k, ps.to_vec())).collect(); DictParentsProvider::from(map) } fn query( cp: &CachingParentsProvider< &'static str, CountingProvider<&'static str, DictParentsProvider<&'static str>>, >, keys: &[&'static str], ) -> ParentMap<&'static str> { let hs: HashSet<&'static str> = keys.iter().copied().collect(); cp.get_parent_map(&hs) } #[test] fn caching_returns_known_parents() { let inner = CountingProvider::new(dict(&[("a", &[]), ("b", &["a"])])); let cp = CachingParentsProvider::new(inner); let pm = query(&cp, &["a", "b"]); assert_eq!(pm.get(&"a"), Some(&Parents::Known(vec![]))); assert_eq!(pm.get(&"b"), Some(&Parents::Known(vec!["a"]))); } #[test] fn caching_avoids_refetching_known_keys() { let inner = CountingProvider::new(dict(&[("a", &[]), ("b", &["a"])])); let cp = CachingParentsProvider::new(inner); query(&cp, &["a", "b"]); query(&cp, &["a", "b"]); // Only one round trip for each key. let requested = cp.inner().requested.borrow(); let mut seen = FxHashSet::default(); for k in requested.iter() { seen.insert(*k); } assert_eq!(seen, ["a", "b"].into_iter().collect::>()); assert_eq!(requested.len(), 2); } #[test] fn caching_remembers_missing_keys() { let inner = CountingProvider::new(dict(&[("a", &[])])); let cp = CachingParentsProvider::new(inner); query(&cp, &["a", "missing"]); query(&cp, &["missing"]); // "missing" should have been requested exactly once. let requested = cp.inner().requested.borrow(); let count = requested.iter().filter(|k| **k == "missing").count(); assert_eq!(count, 1); } #[test] fn disable_cache_clears_state() { let inner = CountingProvider::new(dict(&[("a", &[])])); let cp = CachingParentsProvider::new(inner); query(&cp, &["a"]); cp.disable_cache(); query(&cp, &["a"]); // With the cache disabled, every call hits the inner provider. let requested = cp.inner().requested.borrow(); let count = requested.iter().filter(|k| **k == "a").count(); assert_eq!(count, 2); } #[test] fn enable_while_enabled_errors() { let cp = CachingParentsProvider::new(dict(&[("a", &[])])); assert!(cp.enable_cache(true).is_err()); } #[test] fn reenabling_after_disable_works() { let cp = CachingParentsProvider::new(dict(&[("a", &[])])); cp.disable_cache(); cp.enable_cache(true).unwrap(); let hs: HashSet<&'static str> = ["a"].into_iter().collect(); let pm = cp.get_parent_map(&hs); assert_eq!(pm.get(&"a"), Some(&Parents::Known(vec![]))); } } vcs-graph-3.5.0/src/test.rs000064400000000000000000000052511046102023000136420ustar 00000000000000use crate::tsort::TopoSorter; use crate::Error; use std::collections::HashMap; #[test] fn test_tsort_empty() { let graph = HashMap::new(); assert_sort_and_iterate(&graph, &[]); } #[test] fn test_tsort_easy() { let graph = [(0, vec![])].iter().cloned().collect(); assert_sort_and_iterate(&graph, &[0]); } #[test] fn test_tsort_cycle() { let graph = [(0, vec![1]), (1, vec![0])].iter().cloned().collect(); assert_sort_and_iterate_cycle(&graph); } #[test] fn test_tsort_cycle_2() { let graph = [(0, vec![1]), (1, vec![2]), (2, vec![0])] .iter() .cloned() .collect(); assert_sort_and_iterate_cycle(&graph); } #[test] fn test_topo_sort_cycle_with_tail() { let graph = [ (0, vec![1]), (1, vec![2]), (2, vec![3, 4]), (3, vec![0]), (4, vec![]), ] .iter() .cloned() .collect(); assert_sort_and_iterate_cycle(&graph); } #[test] fn test_tsort_1() { let graph = [ (0, vec![3]), (1, vec![4]), (2, vec![1, 4]), (3, vec![]), (4, vec![0, 3]), ] .iter() .cloned() .collect(); assert_sort_and_iterate_order(&graph); } #[test] fn test_tsort_partial() { let graph = [ (0, vec![]), (1, vec![0]), (2, vec![0]), (3, vec![0]), (4, vec![1, 2, 3]), (5, vec![1, 2]), (6, vec![1, 2]), (7, vec![2, 3]), (8, vec![0, 1, 4, 5, 6]), ] .iter() .cloned() .collect(); assert_sort_and_iterate_order(&graph); } #[test] fn test_tsort_unincluded_parent() { let graph = [(0, vec![1]), (1, vec![2])].iter().cloned().collect(); assert_sort_and_iterate(&graph, &[1, 0]); } fn topo_sort(graph: &HashMap>) -> Result, Error> { TopoSorter::new(graph.clone().into_iter()).sorted() } fn assert_sort_and_iterate_order(graph: &HashMap>) { let sort_result = topo_sort(graph).unwrap(); for (node, parents) in graph { for parent in parents { if sort_result.iter().position(|&n| n == *node).unwrap() < sort_result.iter().position(|&n| n == *parent).unwrap() { panic!( "parent {} must come before child {}:\n{:?}", parent, node, sort_result ); } } } } fn assert_sort_and_iterate_cycle(graph: &HashMap>) { let sort_result = topo_sort(graph); assert!(sort_result.is_err()); } fn assert_sort_and_iterate(graph: &HashMap>, expected: &[usize]) { let sort_result = topo_sort(graph).unwrap(); assert_eq!(sort_result, expected); } vcs-graph-3.5.0/src/tsort.rs000064400000000000000000000621361046102023000140430ustar 00000000000000#![allow(clippy::if_same_then_else)] use crate::{Error, RevnoVec}; use rustc_hash::{FxHashMap, FxHashSet}; use std::collections::HashMap; use std::hash::Hash; #[derive(Debug)] pub struct TopoSorter { graph: FxHashMap>, visitable: FxHashSet, // this is a stack storing the depth first search into the graph. pending_node_stack: Vec, // at each level of 'recursion' we have to check each parent. This // stack stores the parents we have not yet checked for the node at the // matching depth in pending_node_stack pending_parents_stack: Vec>, // this is a set of the completed nodes for fast checking whether a // parent in a node we are processing on the stack has already been // emitted and thus can be skipped. completed_node_names: FxHashSet, } impl TopoSorter { /// Create a new `TopoSorter` from a graph represented as a sequence of pairs /// of node_name->parent_names_list. pub fn new(graph: impl Iterator)>) -> TopoSorter { let mut g = FxHashMap::default(); for (node, parents) in graph { g.insert(node, parents); } let visitable = g.keys().cloned().collect(); TopoSorter { graph: g, visitable, pending_node_stack: vec![], pending_parents_stack: vec![], completed_node_names: FxHashSet::default(), } } /// Sort the graph and return the nodes as a vector. /// /// After calling this the sorter is empty and you must create a new one. pub fn sorted(&mut self) -> std::result::Result, Error> { self.iter_topo_order() .collect::, Error>>() } /// Yield the nodes of the graph in a topological order. /// /// After finishing iteration the sorter is empty and you cannot continue /// iteration. pub fn iter_topo_order( &mut self, ) -> impl Iterator>> + '_ { self } } impl Iterator for TopoSorter { type Item = std::result::Result>; fn next(&mut self) -> Option>> { loop { // loop until pending_node_stack is empty while !self.pending_node_stack.is_empty() { let parents_to_visit = self.pending_parents_stack.last_mut().unwrap(); // if there are no parents left, the revision is done if parents_to_visit.is_empty() { // append the revision to the topo sorted list // all the nodes parents have been added to the output, // now we can add it to the output. let popped_node = self.pending_node_stack.pop().unwrap(); self.pending_parents_stack.pop(); self.completed_node_names.insert(popped_node.clone()); return Some(Ok(popped_node)); } else { // recurse depth first into a single parent let next_node_name = parents_to_visit.pop().unwrap(); if self.completed_node_names.contains(&next_node_name) { // parent was already completed by a child, skip it. continue; } if !self.visitable.contains(&next_node_name) { // parent is not a node in the original graph, skip it. continue; } // transfer it along with its parents from the source graph // into the top of the current depth first search stack. if let Some(parents) = self.graph.remove(&next_node_name) { self.pending_node_stack.push(next_node_name); self.pending_parents_stack.push(parents); } else { // if the next node is not in the source graph it has // already been popped from it and placed into the // current search stack (but not completed or we would // have hit the continue 6 lines up). this indicates a // cycle. return Some(Err(Error::Cycle(self.pending_node_stack.to_vec()))); } } } if let Some(node_name) = self.graph.keys().next() { let node_name = node_name.clone(); let parents = self.graph.remove(&node_name).unwrap(); // now pick a random node in the source graph, and transfer it to the // top of the depth first search stack of pending nodes. self.pending_node_stack.push(node_name); self.pending_parents_stack.push(parents); } else { // if the source graph is empty, we are done. return None; } } } } /// Merge-aware topological sorting of a graph. /// /// :param graph: sequence of pairs of node_name->parent_names_list. /// i.e. [('C', ['B']), ('B', ['A']), ('A', [])] /// For this input the output from the sort or /// iter_topo_order routines will be: /// 'A', 'B', 'C' /// :param branch_tip: the tip of the branch to graph. Revisions not /// reachable from branch_tip are not included in the /// output. /// :param mainline_revisions: If not None this forces a mainline to be /// used rather than synthesised from the graph. /// This must be a valid path through some part /// of the graph. If the mainline does not cover all /// the revisions, output stops at the start of the /// old revision listed in the mainline revisions /// list. /// The order for this parameter is oldest-first. /// :param generate_revno: Optional parameter controlling the generation of /// revision number sequences in the output. See the output description /// for more details. /// /// The result is a list sorted so that all parents come before /// their children. Each element of the list is a tuple containing: /// `(sequence_number, node_name, merge_depth, end_of_merge)`. /// /// - `sequence_number`: the sequence of this row in the output. Useful for /// GUIs. /// - `node_name`: the node name; opaque text to the merge routine. /// - `merge_depth`: how many levels of merging deep this node has been found. /// - `revno_sequence`: when requested this field provides a sequence of /// revision numbers for all revisions. The format is /// `(REVNO, BRANCHNUM, BRANCHREVNO)`. `BRANCHNUM` is the number of the /// branch that the revno is on. From left to right the `REVNO` numbers /// are the sequence numbers within that branch of the revision. /// For instance, the graph `{A:[], B:['A'], C:['A', 'B']}` will get /// the following revno sequences assigned: `A:(1,), B:(1,1,1), C:(2,)`. /// This should be read as 'A is the first commit in the trunk', /// 'B is the first commit on the first branch made from A', 'C is the /// second commit in the trunk'. /// - `end_of_merge`: when true the next node is part of a different merge. /// /// Node identifiers can be any hashable object, and are typically strings. /// /// If you have a graph like `[('a', ['b']), ('a', ['c'])]` this will only /// use one of the two values for 'a'. /// /// The graph is sorted lazily: until you iterate or sort the input is not /// processed other than to create an internal representation. /// /// Iteration or sorting may raise a cycle error if a cycle is present in /// the graph. /// /// # Background on the design /// /// The end of any cluster or 'merge' occurs when: /// /// 1. the next revision has a lower merge depth than we do: /// /// ```text /// A 0 /// B 1 /// C 2 /// D 1 /// E 0 /// ``` /// /// C and D are the ends of clusters; E might be but we need more data. /// /// 2. or the next revision at our merge depth is not our left most ancestor. /// This is required to handle multiple-merges in one commit: /// /// ```text /// A 0 [F, B, E] /// B 1 [D, C] /// C 2 [D] /// D 1 [F] /// E 1 [F] /// F 0 /// ``` /// /// C is the end of a cluster due to rule 1. D is not the end of a /// cluster from rule 1, but is from rule 2: E is not its left most /// ancestor. E is the end of a cluster due to rule 1. F might be but we /// need more data. /// /// We show connecting lines to a parent when: /// /// - The parent is the start of a merge within this cluster. That is, the /// merge was not done to the mainline before this cluster was merged to /// the mainline. This can be detected thus: the parent has a higher /// merge depth and is the next revision in the list. The next-revision /// constraint is needed for this case: /// /// ```text /// A 0 [D, B] /// B 1 [C, F] # we do not want to show a line to F which is depth 2 /// # but not a merge /// C 1 [H] # note that this is a long line to show back to the /// # ancestor - see the end of merge rules. /// D 0 [G, E] /// E 1 [G, F] /// F 2 [G] /// G 1 [H] /// H 0 /// ``` /// /// - Part of this merge's branch: the parent has the same merge depth and /// is our left most parent and we are not the end of the cluster: /// /// ```text /// A 0 [C, B] lines: [B, C] /// B 1 [E, C] lines: [C] /// C 0 [D] lines: [D] /// D 0 [F, E] lines: [E, F] /// E 1 [F] lines: [F] /// F 0 /// ``` /// /// - The end of this merge/cluster: we can only have multiple parents at /// the end of a cluster if this branch was previously merged into the /// 'mainline'. /// /// - If we have one and only one parent, show it. Note that this may be /// to a greater merge depth — for instance if this branch continued /// from a deeply nested branch to add something to it. /// - If we have more than one parent, show the second oldest (older == /// further down the list) parent with an equal or lower merge depth. pub struct MergeSorter { // this is a stack storing the depth first search into the graph. node_name_stack: Vec, // at each level of recursion we need the merge depth this node is at: node_merge_depth_stack: Vec, // at each level of 'recursion' we have to check each parent. This // stack stores the parents we have not yet checked for the node at the // matching depth in _node_name_stack pending_parents_stack: Vec>, // When we first look at a node we assign it a seqence number from its // leftmost parent. first_child_stack: Vec>, // This records for each node when we have processed its left most // unmerged subtree. After this subtree is scheduled, all other subtrees // have their merge depth increased by one from this nodes merge depth. // it contains tuples - name, merge_depth left_subtree_pushed_stack: Vec, generate_revno: bool, // The full parent map. Read-only after construction. This used to be // stored twice (once mutable for destructive iteration via `remove`, once // immutable for end-of-merge lookups); it is now stored once and the // "still pending" bookkeeping lives in `not_yet_scheduled`. graph: HashMap>, // Nodes in `graph` that have not yet been pushed onto the pending stack. // Plays the role of the old mutable `graph`'s key set. not_yet_scheduled: FxHashSet, stop_revision: Option, revnos: FxHashMap, bool)>, // Each mainline revision counts how many child branches have spawned from it. revno_to_branch_count: FxHashMap, // this is a set of the nodes who have been completely analysed for fast // membership checking completed_node_names: FxHashSet, // this is the scheduling of nodes list. // Nodes are scheduled // from the bottom left of the tree: in the tree // A 0 [D, B] // B 1 [C] // C 1 [D] // D 0 [F, E] // E 1 [F] // F 0 // the scheduling order is: F, E, D, C, B, A // that is - 'left subtree, right subtree, node' // which would mean that when we schedule A we can emit the entire tree. scheduled_nodes: Vec<(K, usize, RevnoVec)>, sequence_number: usize, } /// A single row emitted by [`MergeSorter`]. /// /// Fields in order: sequence number, node name, merge depth, optional revno /// sequence, and an end-of-merge flag. See the [`MergeSorter`] docs for the /// meaning of each field. pub type MergeSortRow = (usize, K, usize, Option, bool); impl MergeSorter { pub fn new( mut graph: HashMap>, branch_tip: Option, mainline_revisions: Option>, generate_revno: bool, ) -> Self { let stop_revision; // if there is an explicit mainline, alter the graph to match. This is // easier than checking at every merge whether we are on the mainline and // if so which path to take. if let Some(mainline_revisions) = mainline_revisions.as_ref() { stop_revision = Some(mainline_revisions[0].clone()); // skip the first revision, its what we reach and its parents are // therefore irrelevant for (index, revision) in mainline_revisions[1..].iter().enumerate() { // NB: index 0 means self._mainline_revisions[1] // if the mainline matches the graph, nothing to do. let parent = &mainline_revisions[index]; let graph_parent_ids = graph.get_mut(revision).unwrap(); if !graph_parent_ids.is_empty() { if graph_parent_ids[0] == *parent { continue; } let current_position = graph_parent_ids.iter().position(|x| x == parent).unwrap(); graph_parent_ids.swap(0, current_position); } else { // We ran into a ghost, skip over it, this is a workaround for // bug #243536, the _graph has had ghosts stripped, but the // mainline_revisions have not continue; } } } else { stop_revision = None; } // we need to know the revision numbers of revisions to determine // the revision numbers of their descendants // this is a graph from node to [revno_tuple, first_child] // where first_child is True if no other children have seen this node // and revno_tuple is the tuple that was assigned to the node. // we dont know revnos to start with, so we start it seeded with // [None, True] let revnos = graph .keys() .map(|revision| (revision.clone(), (None, true))) .collect::, bool)>>(); let not_yet_scheduled: FxHashSet = graph.keys().cloned().collect(); let mut sorter = MergeSorter { generate_revno, graph, not_yet_scheduled, stop_revision, revnos, revno_to_branch_count: FxHashMap::default(), node_name_stack: Vec::new(), node_merge_depth_stack: Vec::new(), pending_parents_stack: Vec::new(), first_child_stack: Vec::new(), completed_node_names: FxHashSet::default(), scheduled_nodes: Vec::new(), left_subtree_pushed_stack: Vec::new(), sequence_number: 0, }; if let Some(branch_tip) = branch_tip { let parents = sorter.take_parents(&branch_tip).unwrap(); sorter.push_node(branch_tip, 0, parents); } sorter } /// Mark `key` as scheduled and return a clone of its parent list, or /// `None` if it was already scheduled or not in the graph. fn take_parents(&mut self, key: &K) -> Option> { if self.not_yet_scheduled.remove(key) { Some(self.graph[key].clone()) } else { None } } /// Sort the graph and return as a list. /// /// After calling this the sorter is empty and you must create a new one. pub fn sorted(&mut self) -> std::result::Result>, Error> { self.iter_topo_order().collect() } /// /// After finishing iteration the sorter is empty and you cannot continue /// iteration. pub fn iter_topo_order( &mut self, ) -> impl Iterator, Error>> + '_ { self } /// Add node_name to the pending node stack. /// /// Names in this stack will get emitted into the output as they are popped /// off the stack. pub fn push_node(&mut self, node_name: K, merge_depth: usize, parents: Vec) { self.node_name_stack.push(node_name); self.node_merge_depth_stack.push(merge_depth); self.left_subtree_pushed_stack.push(false); // As we push it, figure out if this is the first child let first_child: Option; if !parents.is_empty() { // Node has parents, assign from the left most parent. if let Some(entry) = self.revnos.get_mut(&parents[0]) { first_child = Some(entry.1); entry.1 = false; } else { // Left-hand parent is a ghost, consider it not to exist first_child = None; } } else { first_child = None; } self.pending_parents_stack.push(parents); self.first_child_stack.push(first_child); } pub fn pop_node(&mut self) -> K { // Pop the top node off the stack // // The node is appended to the sorted output. let node_name = self.node_name_stack.pop().unwrap(); let merge_depth = self.node_merge_depth_stack.pop().unwrap(); let first_child = self.first_child_stack.pop().unwrap(); // remove this node from the pending lists: self.left_subtree_pushed_stack.pop().unwrap(); self.pending_parents_stack.pop().unwrap(); let parents = self.graph.get(&node_name).unwrap(); // Left-hand parent's revno, if it exists and isn't a ghost. let parent_revno = parents .first() .and_then(|p| self.revnos.get(p)) .and_then(|entry| entry.0.clone()); let revno: RevnoVec = if let Some(parent_revno) = parent_revno { if first_child == Some(true) { // as the first child, we just increase the final revision number parent_revno.bump_last() } else { // not the first child, make a new branch let base_revno = parent_revno[0]; let branch_count = self .revno_to_branch_count .get(&base_revno) .copied() .unwrap_or(0) + 1; self.revno_to_branch_count.insert(base_revno, branch_count); parent_revno.new_branch(branch_count) } } else { // no parents, use the root sequence let root_count = if let Some(root_count) = self.revno_to_branch_count.get(&0) { root_count + 1 } else { 0 }; self.revno_to_branch_count.insert(0, root_count); if root_count > 0 { RevnoVec::from(vec![0, root_count, 1]) } else { RevnoVec::from(1) } }; // store the revno for this node for future reference self.revnos .entry(node_name.clone()) .and_modify(|e| e.0 = Some(revno.clone())); self.completed_node_names.insert(node_name.clone()); self.scheduled_nodes .push((node_name.clone(), merge_depth, revno)); node_name } fn build(&mut self) -> std::result::Result<(), Error> { while !self.node_name_stack.is_empty() { let parents_to_visit = self.pending_parents_stack.last().unwrap(); if parents_to_visit.is_empty() { self.pop_node(); } else { while !self.pending_parents_stack.last().unwrap().is_empty() { let is_left_subtree; let next_node_name; if !self.left_subtree_pushed_stack.last().unwrap() { next_node_name = self.pending_parents_stack.last_mut().unwrap().remove(0); is_left_subtree = true; *self.left_subtree_pushed_stack.last_mut().unwrap() = true; // recurse depth first into the primary parent } else { next_node_name = self .pending_parents_stack .last_mut() .unwrap() .pop() .unwrap(); is_left_subtree = false; // place any merges in right-to-left order for scheduling // which gives us left-to-right order after we reverse // the scheduled queue. XXX: This has the effect of // allocating common-new revisions to the right-most // subtree rather than the left most, which will // display nicely (you get smaller trees at the top // of the combined merge). } if self.completed_node_names.contains(&next_node_name) { // this parent was completed by a child on the // call stack. skip it. continue; } // otherwise transfer it from the source graph into the // top of the current depth first search stack. let parents = match self.take_parents(&next_node_name) { Some(parents) => parents, None => { // if the next node is not marked as pending it has // already been popped from the source graph and // placed into the current search stack (but not // completed or we would have hit the continue 4 // lines up). this indicates a cycle. if self.graph.contains_key(&next_node_name) { return Err(Error::Cycle(self.node_name_stack.clone())); } else { // This is just a ghost parent, ignore it continue; } } }; let next_merge_depth = usize::from(!is_left_subtree) + self.node_merge_depth_stack.last().unwrap(); self.push_node(next_node_name, next_merge_depth, parents); // and do not continue processing parents until this 'call' // has recursed. break; } } } Ok(()) } } impl Iterator for MergeSorter { type Item = std::result::Result, Error>; fn next(&mut self) -> Option { if let Err(err) = self.build() { return Some(Err(err)); } let (node_name, merge_depth, revno) = self.scheduled_nodes.pop()?; if let Some(stop) = self.stop_revision.as_ref() { if &node_name == stop { return None; } } let end_of_merge = match self.scheduled_nodes.last() { // last revision is the end of a merge None => true, // the next node is to our left Some((_, next_depth, _)) if *next_depth < merge_depth => true, // the next node was part of a multiple-merge Some((next_name, next_depth, _)) if *next_depth == merge_depth => { !self.graph.get(&node_name).unwrap().contains(next_name) } _ => false, }; let revno_out = self.generate_revno.then_some(revno); let result = ( self.sequence_number, node_name, merge_depth, revno_out, end_of_merge, ); self.sequence_number += 1; Some(Ok(result)) } } pub fn merge_sort( graph: HashMap>, branch_tip: Option, mainline_revisions: Option>, generate_revno: bool, ) -> std::result::Result>, Error> { MergeSorter::new(graph, branch_tip, mainline_revisions, generate_revno).sorted() }