D7834: nodemap: have some python code writing a nodemap in persistent binary form
marmoute (Pierre-Yves David)
phabricator at mercurial-scm.org
Fri Feb 7 22:29:31 UTC 2020
marmoute updated this revision to Diff 20015.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D7834?vs=19882&id=20015
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D7834/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D7834
AFFECTED FILES
mercurial/debugcommands.py
mercurial/revlogutils/nodemap.py
tests/test-completion.t
tests/test-help.t
tests/test-persistent-nodemap.t
CHANGE DETAILS
diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
new file mode 100644
--- /dev/null
+++ b/tests/test-persistent-nodemap.t
@@ -0,0 +1,26 @@
+===================================
+Test the persistent on-disk nodemap
+===================================
+
+
+ $ hg init test-repo
+ $ cd test-repo
+ $ hg debugbuilddag .+5000
+ $ hg debugnodemap --dump | f --sha256 --bytes=256 --hexdump --size
+ size=122880, sha256=b961925120e1c9bc345c199b2cc442abc477029fdece37ef9d99cbe59c0558b7
+ 0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0010: ff ff ff ff ff ff ff ff ff ff fa c2 ff ff ff ff |................|
+ 0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0030: ff ff ff ff ff ff ed b3 ff ff ff ff ff ff ff ff |................|
+ 0040: ff ff ff ff ff ff ee 34 00 00 00 00 ff ff ff ff |.......4........|
+ 0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 0080: ff ff ff ff ff ff f8 50 ff ff ff ff ff ff ff ff |.......P........|
+ 0090: ff ff ff ff ff ff ff ff ff ff ec c7 ff ff ff ff |................|
+ 00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00b0: ff ff ff ff ff ff fa be ff ff f2 fc ff ff ff ff |................|
+ 00c0: ff ff ff ff ff ff ef ea ff ff ff ff ff ff f9 17 |................|
+ 00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+ 00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
diff --git a/tests/test-help.t b/tests/test-help.t
--- a/tests/test-help.t
+++ b/tests/test-help.t
@@ -1018,6 +1018,7 @@
print merge state
debugnamecomplete
complete "names" - tags, open branch names, bookmark names
+ debugnodemap write and inspect on disk nodemap
debugobsolete
create arbitrary obsolete marker
debugoptADV (no help text available)
diff --git a/tests/test-completion.t b/tests/test-completion.t
--- a/tests/test-completion.t
+++ b/tests/test-completion.t
@@ -107,6 +107,7 @@
debugmanifestfulltextcache
debugmergestate
debugnamecomplete
+ debugnodemap
debugobsolete
debugp1copies
debugp2copies
@@ -289,6 +290,7 @@
debugmanifestfulltextcache: clear, add
debugmergestate:
debugnamecomplete:
+ debugnodemap: dump
debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template
debugp1copies: rev
debugp2copies: rev
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -7,9 +7,156 @@
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
-from .. import error
+
+import struct
+
+from .. import (
+ error,
+ node as nodemod,
+ pycompat,
+)
class NodeMap(dict):
def __missing__(self, x):
raise error.RevlogError(b'unknown node: %s' % x)
+
+
+### Nodemap Trie
+#
+# This is a simple reference implementation to compute and persist a nodemap
+# trie. This reference implementation is write only. The python version of this
+# is not expected to be actually used, since it wont provide performance
+# improvement over existing non-persistent C implementation.
+#
+# The nodemap is persisted as Trie using 4bits-address/16-entries block. each
+# revision can be adressed using its node shortest prefix.
+#
+# The trie is stored as a sequence of block. Each block contains 16 entries
+# (signed 64bit integer, big endian). Each entry can be one of the following:
+#
+# * value >= 0 -> index of sub-block
+# * value == -1 -> no value
+# * value < -1 -> a revision value: rev = -(value+10)
+#
+# The implementation focus on simplicity, not on performance. A Rust
+# implementation should provide a efficient version of the same binary
+# persistence. This reference python implementation is never meant to be
+# extensively use in production.
+
+
+def persistent_data(index):
+ """return the persistent binary form for a nodemap for a given index
+ """
+ trie = _build_trie(index)
+ return _persist_trie(trie)
+
+
+S_BLOCK = struct.Struct(">" + ("l" * 16))
+
+NO_ENTRY = -1
+# rev 0 need to be -2 because 0 is used by block, -1 is a special value.
+REV_OFFSET = 2
+
+
+def _transform_rev(rev):
+ """Return the number used to represent the rev in the tree.
+
+ (or retrieve a rev number from such representation)
+
+ Note that this is an involution, a function equal to its inverse (i.e.
+ which gives the identity when applied to itself).
+ """
+ return -(rev + REV_OFFSET)
+
+
+def _to_int(hex_digit):
+ """turn an hexadecimal digit into a proper integer"""
+ return int(hex_digit, 16)
+
+
+def _build_trie(index):
+ """build a nodemap trie
+
+ The nodemap stores revision number for each unique prefix.
+
+ Each block is a dictionary with keys in `[0, 15]`. Values are either
+ another block or a revision number.
+ """
+ root = {}
+ for rev in range(len(index)):
+ hex = nodemod.hex(index[rev][7])
+ _insert_into_block(index, 0, root, rev, hex)
+ return root
+
+
+def _insert_into_block(index, level, block, current_rev, current_hex):
+ """insert a new revision in a block
+
+ index: the index we are adding revision for
+ level: the depth of the current block in the trie
+ block: the block currently being considered
+ current_rev: the revision number we are adding
+ current_hex: the hexadecimal representation of the of that revision
+ """
+ hex_digit = _to_int(current_hex[level : level + 1])
+ entry = block.get(hex_digit)
+ if entry is None:
+ # no entry, simply store the revision number
+ block[hex_digit] = current_rev
+ elif isinstance(entry, dict):
+ # need to recurse to an underlying block
+ _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+ else:
+ # collision with a previously unique prefix, inserting new
+ # vertices to fit both entry.
+ other_hex = nodemod.hex(index[entry][7])
+ other_rev = entry
+ new = {}
+ block[hex_digit] = new
+ _insert_into_block(index, level + 1, new, other_rev, other_hex)
+ _insert_into_block(index, level + 1, new, current_rev, current_hex)
+
+
+def _persist_trie(root):
+ """turn a nodemap trie into persistent binary data
+
+ See `_build_trie` for nodemap trie structure"""
+ block_map = {}
+ chunks = []
+ for tn in _walk_trie(root):
+ block_map[id(tn)] = len(chunks)
+ chunks.append(_persist_block(tn, block_map))
+ return b''.join(chunks)
+
+
+def _walk_trie(block):
+ """yield all the block in a trie
+
+ Children blocks are always yield before their parent block.
+ """
+ for (_, item) in sorted(block.items()):
+ if isinstance(item, dict):
+ for sub_block in _walk_trie(item):
+ yield sub_block
+ yield block
+
+
+def _persist_block(block_node, block_map):
+ """produce persistent binary data for a single block
+
+ Children block are assumed to be already persisted and present in
+ block_map.
+ """
+ data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
+ return S_BLOCK.pack(*data)
+
+
+def _to_value(item, block_map):
+ """persist any value as an integer"""
+ if item is None:
+ return NO_ENTRY
+ elif isinstance(item, dict):
+ return block_map[id(item)]
+ else:
+ return _transform_rev(item)
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -94,7 +94,10 @@
stringutil,
)
-from .revlogutils import deltas as deltautil
+from .revlogutils import (
+ deltas as deltautil,
+ nodemap,
+)
release = lockmod.release
@@ -2081,6 +2084,20 @@
@command(
+ b'debugnodemap',
+ [(b'', b'dump', False, _(b'write persistent binary nodemap on stdin'))],
+)
+def debugnodemap(ui, repo, **opts):
+ """write and inspect on disk nodemap
+ """
+ if opts['dump']:
+ unfi = repo.unfiltered()
+ cl = unfi.changelog
+ data = nodemap.persistent_data(cl.index)
+ ui.write(data)
+
+
+ at command(
b'debugobsolete',
[
(b'', b'flags', 0, _(b'markers flag')),
To: marmoute, #hg-reviewers, martinvonz
Cc: martinvonz, mjpieters, mercurial-devel
More information about the Mercurial-devel
mailing list