[Commented On] D9422: copies: iterate over children directly (instead of parents)
baymax (Baymax, Your Personal Patch-care Companion)
phabricator at mercurial-scm.org
Thu Dec 17 20:27:23 UTC 2020
baymax added a comment.
baymax updated this revision to Diff 24351.
✅ refresh by Heptapod after a successful CI run (🐙 💚)
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D9422?vs=24233&id=24351
BRANCH
default
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D9422/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D9422
AFFECTED FILES
mercurial/copies.py
rust/hg-core/src/copy_tracing.rs
rust/hg-cpython/src/copy_tracing.rs
CHANGE DETAILS
diff --git a/rust/hg-cpython/src/copy_tracing.rs b/rust/hg-cpython/src/copy_tracing.rs
--- a/rust/hg-cpython/src/copy_tracing.rs
+++ b/rust/hg-cpython/src/copy_tracing.rs
@@ -23,7 +23,7 @@
pub fn combine_changeset_copies_wrapper(
py: Python,
revs: PyList,
- children: PyDict,
+ children_count: PyDict,
target_rev: Revision,
rev_info: PyObject,
is_ancestor: PyObject,
@@ -93,20 +93,15 @@
(p1, p2, files)
});
- let children: PyResult<_> = children
+ let children_count: PyResult<_> = children_count
.items(py)
.iter()
- .map(|(k, v)| {
- let v: &PyList = v.cast_as(py)?;
- let v: PyResult<_> =
- v.iter(py).map(|child| Ok(child.extract(py)?)).collect();
- Ok((k.extract(py)?, v?))
- })
+ .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?)))
.collect();
let res = combine_changeset_copies(
revs?,
- children?,
+ children_count?,
target_rev,
rev_info_maker,
&is_ancestor_wrap,
diff --git a/rust/hg-core/src/copy_tracing.rs b/rust/hg-core/src/copy_tracing.rs
--- a/rust/hg-core/src/copy_tracing.rs
+++ b/rust/hg-core/src/copy_tracing.rs
@@ -1,6 +1,7 @@
use crate::utils::hg_path::HgPath;
use crate::utils::hg_path::HgPathBuf;
use crate::Revision;
+use crate::NULL_REVISION;
use im_rc::ordmap::DiffItem;
use im_rc::ordmap::OrdMap;
@@ -328,7 +329,7 @@
/// ancestor of another
pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
revs: Vec<Revision>,
- children: HashMap<Revision, Vec<Revision>>,
+ mut children_count: HashMap<Revision, usize>,
target_rev: Revision,
rev_info: RevInfoMaker<D>,
is_ancestor: &A,
@@ -337,57 +338,69 @@
let mut oracle = AncestorOracle::new(is_ancestor);
for rev in revs {
- // Retrieve data computed in a previous iteration
- let copies = all_copies.remove(&rev);
- let copies = match copies {
- Some(c) => c,
- None => TimeStampedPathCopies::default(), // root of the walked set
- };
-
- let current_children = match children.get(&rev) {
- Some(c) => c,
- None => panic!("inconsistent `revs` and `children`"),
- };
-
- for child in current_children {
- // We will chain the copies information accumulated for `rev` with
- // the individual copies information for each of its children.
- // Creating a new PathCopies for each `rev` → `children` vertex.
- let mut d: DataHolder<D> = DataHolder { data: None };
- let (p1, p2, changes) = rev_info(*child, &mut d);
+ let mut d: DataHolder<D> = DataHolder { data: None };
+ let (p1, p2, changes) = rev_info(rev, &mut d);
- let parent = if rev == p1 {
- Parent::FirstParent
- } else {
- assert_eq!(rev, p2);
- Parent::SecondParent
- };
- let new_copies =
- add_from_changes(&copies, &changes, parent, *child);
+ // We will chain the copies information accumulated for the parent with
+ // the individual copies information the curent revision. Creating a
+ // new TimeStampedPath for each `rev` → `children` vertex.
+ let mut copies: Option<TimeStampedPathCopies> = None;
+ if p1 != NULL_REVISION {
+ // Retrieve data computed in a previous iteration
+ let parent_copies = get_and_clean_parent_copies(
+ &mut all_copies,
+ &mut children_count,
+ p1,
+ );
+ if let Some(parent_copies) = parent_copies {
+ // combine it with data for that revision
+ let vertex_copies = add_from_changes(
+ &parent_copies,
+ &changes,
+ Parent::FirstParent,
+ rev,
+ );
+ // keep that data around for potential later combination
+ copies = Some(vertex_copies);
+ }
+ }
+ if p2 != NULL_REVISION {
+ // Retrieve data computed in a previous iteration
+ let parent_copies = get_and_clean_parent_copies(
+ &mut all_copies,
+ &mut children_count,
+ p2,
+ );
+ if let Some(parent_copies) = parent_copies {
+ // combine it with data for that revision
+ let vertex_copies = add_from_changes(
+ &parent_copies,
+ &changes,
+ Parent::SecondParent,
+ rev,
+ );
- // Merge has two parents needs to combines their copy information.
- //
- // If the vertex from the other parent was already processed, we
- // will have a value for the child ready to be used. We need to
- // grab it and combine it with the one we already
- // computed. If not we can simply store the newly
- // computed data. The processing happening at
- // the time of the second parent will take care of combining the
- // two TimeStampedPathCopies instance.
- match all_copies.remove(child) {
- None => {
- all_copies.insert(child, new_copies);
- }
- Some(other_copies) => {
- let (minor, major) = match parent {
- Parent::FirstParent => (other_copies, new_copies),
- Parent::SecondParent => (new_copies, other_copies),
- };
- let merged_copies =
- merge_copies_dict(minor, major, &changes, &mut oracle);
- all_copies.insert(child, merged_copies);
- }
- };
+ copies = match copies {
+ None => Some(vertex_copies),
+ // Merge has two parents needs to combines their copy
+ // information.
+ //
+ // If we got data from both parents, We need to combine
+ // them.
+ Some(copies) => Some(merge_copies_dict(
+ vertex_copies,
+ copies,
+ &changes,
+ &mut oracle,
+ )),
+ };
+ }
+ }
+ match copies {
+ Some(copies) => {
+ all_copies.insert(rev, copies);
+ }
+ _ => {}
}
}
@@ -405,6 +418,32 @@
result
}
+/// fetch previous computed information
+///
+/// If no other children are expected to need this information, we drop it from
+/// the cache.
+///
+/// If parent is not part of the set we are expected to walk, return None.
+fn get_and_clean_parent_copies(
+ all_copies: &mut HashMap<Revision, TimeStampedPathCopies>,
+ children_count: &mut HashMap<Revision, usize>,
+ parent_rev: Revision,
+) -> Option<TimeStampedPathCopies> {
+ let count = children_count.get_mut(&parent_rev)?;
+ *count -= 1;
+ if *count == 0 {
+ match all_copies.remove(&parent_rev) {
+ Some(c) => Some(c),
+ None => Some(TimeStampedPathCopies::default()),
+ }
+ } else {
+ match all_copies.get(&parent_rev) {
+ Some(c) => Some(c.clone()),
+ None => Some(TimeStampedPathCopies::default()),
+ }
+ }
+}
+
/// Combine ChangedFiles with some existing PathCopies information and return
/// the result
fn add_from_changes(
diff --git a/mercurial/copies.py b/mercurial/copies.py
--- a/mercurial/copies.py
+++ b/mercurial/copies.py
@@ -1,3 +1,4 @@
+# coding: utf8
# copies.py - copy detection for Mercurial
#
# Copyright 2008 Matt Mackall <mpm at selenic.com>
@@ -287,40 +288,92 @@
cl = repo.changelog
isancestor = cl.isancestorrev
- missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
- mrset = set(missingrevs)
+
+ # To track rename from "A" to B, we need to gather all parent → children
+ # edges that are contains in `::B` but not in `::A`.
+ #
+ #
+ # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
+ # ::a`) and also all the "roots point", ie the parents of the exclusive set
+ # that belong to ::a. These are exactly all the revisions needed to express
+ # the parent → children we need to combine.
+ #
+ # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
+ # excluding paths that leads to roots that are not ancestors of `a`. We
+ # keep this out of the explanation because it is hard enough without this special case..
+
+ parents = cl._uncheckedparentrevs
+ graph_roots = (nullrev, nullrev)
+
+ ancestors = cl.ancestors([a.rev()], inclusive=True)
+ revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
roots = set()
- for r in missingrevs:
- for p in cl.parentrevs(r):
- if p == nullrev:
- continue
- if p not in children:
- children[p] = [r]
- else:
- children[p].append(r)
- if p not in mrset:
- roots.add(p)
+ has_graph_roots = False
+
+ # iterate over `only(B, A)`
+ for r in revs:
+ ps = parents(r)
+ if ps == graph_roots:
+ has_graph_roots = True
+ else:
+ p1, p2 = ps
+
+ # find all the "root points" (see larger comment above)
+ if p1 != nullrev and p1 in ancestors:
+ roots.add(p1)
+ if p2 != nullrev and p2 in ancestors:
+ roots.add(p2)
if not roots:
# no common revision to track copies from
return {}
- min_root = min(roots)
-
- from_head = set(
- cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
- )
-
- iterrevs = set(from_head)
- iterrevs &= mrset
- iterrevs.update(roots)
- iterrevs.remove(b.rev())
- revs = sorted(iterrevs)
+ if has_graph_roots:
+ # this deal with the special case mentionned in the [1] footnotes. We
+ # must filter out revisions that leads to non-common graphroots.
+ roots = list(roots)
+ m = min(roots)
+ h = [b.rev()]
+ roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
+ roots_to_head = set(roots_to_head)
+ revs = [r for r in revs if r in roots_to_head]
if repo.filecopiesmode == b'changeset-sidedata':
+ # When using side-data, we will process the edges "from" the children.
+ # We iterate over the childre, gathering previous collected data for
+ # the parents. Do know when the parents data is no longer necessary, we
+ # keep a counter of how many children each revision has.
+ #
+ # An interresting property of `children_count` is that it only contains
+ # revision that will be relevant for a edge of the graph. So if a
+ # children has parent not in `children_count`, that edges should not be
+ # processed.
+ children_count = dict((r, 0) for r in roots)
+ for r in revs:
+ for p in cl.parentrevs(r):
+ if p == nullrev:
+ continue
+ children_count[r] = 0
+ if p in children_count:
+ children_count[p] += 1
revinfo = _revinfo_getter(repo, match)
return _combine_changeset_copies(
- revs, children, b.rev(), revinfo, match, isancestor
+ revs, children_count, b.rev(), revinfo, match, isancestor
)
else:
+ # When not using side-data, we will process the edges "from" the parent.
+ # so we need a full mapping of the parent -> children relation.
+ children = dict((r, []) for r in roots)
+ for r in revs:
+ for p in cl.parentrevs(r):
+ if p == nullrev:
+ continue
+ children[r] = []
+ if p in children:
+ children[p].append(r)
+ x = revs.pop()
+ assert x == b.rev()
+ revs.extend(roots)
+ revs.sort()
+
revinfo = _revinfo_getter_extra(repo)
return _combine_changeset_copies_extra(
revs, children, b.rev(), revinfo, match, isancestor
@@ -328,12 +381,12 @@
def _combine_changeset_copies(
- revs, children, targetrev, revinfo, match, isancestor
+ revs, children_count, targetrev, revinfo, match, isancestor
):
"""combine the copies information for each item of iterrevs
revs: sorted iterable of revision to visit
- children: a {parent: [children]} mapping.
+ children_count: a {parent: <number-of-relevant-children>} mapping.
targetrev: the final copies destination revision (not in iterrevs)
revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
match: a matcher
@@ -345,69 +398,76 @@
if rustmod is not None and alwaysmatch:
return rustmod.combine_changeset_copies(
- list(revs), children, targetrev, revinfo, isancestor
+ list(revs), children_count, targetrev, revinfo, isancestor
)
isancestor = cached_is_ancestor(isancestor)
all_copies = {}
- # iterate over all the "parent" side of copy tracing "edge"
- for r in revs:
- # fetch potential previously computed data for that parent
- copies = all_copies.pop(r, None)
- if copies is None:
- # this is a root
- copies = {}
+ # iterate over all the "children" side of copy tracing "edge"
+ for current_rev in revs:
+ p1, p2, changes = revinfo(current_rev)
+ current_copies = None
- # iterate over all known children to chain the existing data with the
+ # iterate over all parents to chain the existing data with the
# data from the parent → child edge.
- for i, c in enumerate(children[r]):
- p1, p2, changes = revinfo(c)
- childcopies = {}
+ for parent, parent_rev in ((1, p1), (2, p2)):
+ if parent_rev == nullrev:
+ continue
+ remaining_children = children_count.get(parent_rev)
+ if remaining_children is None:
+ continue
+ remaining_children -= 1
+ children_count[parent_rev] = remaining_children
+ if remaining_children:
+ copies = all_copies.get(parent_rev, None)
+ else:
+ copies = all_copies.pop(parent_rev, None)
- # select the right parent → child edge
- if r == p1:
- parent = 1
- if changes is not None:
+ if copies is None:
+ # this is a root
+ copies = {}
+
+ newcopies = copies
+ # chain the data in the edge with the existing data
+ if changes is not None:
+ childcopies = {}
+ if parent == 1:
childcopies = changes.copied_from_p1
- else:
- assert r == p2
- parent = 2
- if changes is not None:
+ elif parent == 2:
childcopies = changes.copied_from_p2
- if not alwaysmatch:
- childcopies = {
- dst: src for dst, src in childcopies.items() if match(dst)
- }
- # chain the data in the edge with the existing data
- newcopies = copies
- if childcopies:
- newcopies = copies.copy()
- for dest, source in pycompat.iteritems(childcopies):
- prev = copies.get(source)
- if prev is not None and prev[1] is not None:
- source = prev[1]
- newcopies[dest] = (c, source)
- assert newcopies is not copies
- if changes is not None and changes.removed:
- if newcopies is copies:
+ if not alwaysmatch:
+ childcopies = {
+ dst: src
+ for dst, src in childcopies.items()
+ if match(dst)
+ }
+ if childcopies:
newcopies = copies.copy()
- for f in changes.removed:
- if f in newcopies:
- if newcopies is copies:
- # copy on write to avoid affecting potential other
- # branches. when there are no other branches, this
- # could be avoided.
- newcopies = copies.copy()
- newcopies[f] = (c, None)
+ for dest, source in pycompat.iteritems(childcopies):
+ prev = copies.get(source)
+ if prev is not None and prev[1] is not None:
+ source = prev[1]
+ newcopies[dest] = (current_rev, source)
+ assert newcopies is not copies
+ if changes.removed:
+ if newcopies is copies:
+ newcopies = copies.copy()
+ for f in changes.removed:
+ if f in newcopies:
+ if newcopies is copies:
+ # copy on write to avoid affecting potential other
+ # branches. when there are no other branches, this
+ # could be avoided.
+ newcopies = copies.copy()
+ newcopies[f] = (current_rev, None)
# check potential need to combine the data from another parent (for
# that child). See comment below for details.
- othercopies = all_copies.get(c)
- if othercopies is None:
- all_copies[c] = newcopies
- elif newcopies is othercopies:
+ if current_copies is None:
+ current_copies = newcopies
+ elif current_copies is newcopies:
# nothing to merge:
pass
else:
@@ -418,17 +478,11 @@
# This is an arbitrary choice made anew when implementing
# changeset based copies. It was made without regards with
# potential filelog related behavior.
- if parent == 1:
- if newcopies is copies:
- newcopies = copies.copy()
- minor, major = othercopies, newcopies
- else:
- # we do not know if the other dict is a copy or not, so we
- # need to blindly copy it. Future change should make this
- # unnecessary.
- minor, major = newcopies, othercopies.copy()
- copies = _merge_copies_dict(minor, major, isancestor, changes)
- all_copies[c] = copies
+ assert parent == 2
+ current_copies = _merge_copies_dict(
+ newcopies, current_copies, isancestor, changes
+ )
+ all_copies[current_rev] = current_copies
# filter out internal details and return a {dest: source mapping}
final_copies = {}
To: marmoute, #hg-reviewers, Alphare
Cc: pulkit, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20201217/c005d9ac/attachment-0002.html>
More information about the Mercurial-patches
mailing list