[PATCH 10 of 14] git: handle octopus merges

Josef 'Jeff' Sipek jeffpc at josefsipek.net
Thu Jan 2 18:58:32 UTC 2025


 hgext/git/gitlog.py |  31 ++++++++++++++++-------------
 hgext/git/index.py  |  54 +++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 53 insertions(+), 32 deletions(-)


# HG changeset patch
# User Josef 'Jeff' Sipek <jeffpc at josefsipek.net>
# Date 1710095432 14400
#      Sun Mar 10 14:30:32 2024 -0400
# Node ID 4bee81ed4c627e8757ef37ffcb5daaa14c2ef762
# Parent  2c1e51b58cf315a36b227e26f5eee1d7934fc78d
git: handle octopus merges

Octopus merges in git are merge commits with more than 2 parents.  To make
them fit into mercurial core's assumption about commits having 0-2 parents,
the git indexing code creates "sythetic" commits to represent the octopus
commit as a sequence of regular 2-parent commits.

The synthetic commit hashes are just an incrementing commit number (which is
the same as the generated rev number).  The last commit in the sequence of
commits uses the actual git commit hash.  As a result, `hg checkout -r
<commit>` produces the same working directory as `git checkout <commit>` for
all git commit hashes.

The synthetic commit hashes are stored in the changelog table as any other
commit - with the two parents - but they also contain the commit hash of the
octopus merge commit.

For example, given the git DAG (manually pruned `git log --graph`):

*-.   commit 23480d86e2689703b33f693907c40fbe6e1620e4 Merge branches...
|\ \
| | |
| | * commit 2eda9984b06c75448598ec6c0a9028e49dacf616 C
| | |
| * | commit 5e634a12f12fedaf7b8ef0f0fcdbb07222871953 B
| |/
| |
* | commit 8883a1296c5ae323a1b18d1f6410398ce43ebd3a D
|/
|
* commit 95f241588fded9554cae91be0fefd576f61ebfc6 A

Where M is the octopus merge commit with 3 parents, the corresponding
mercurial DAG is:

$ hg log -G -T '{node} {desc}'
@    23480d86e2689703b33f693907c40fbe6e1620e4 Merge branches 'abc' and 'def'
|\
| o    0000000000000000000000000000000000000004 Merge branches 'abc' and 'def'
| |\
| | o  8883a1296c5ae323a1b18d1f6410398ce43ebd3a D
| | |
o---+  2eda9984b06c75448598ec6c0a9028e49dacf616 C
 / /
o /  5e634a12f12fedaf7b8ef0f0fcdbb07222871953 B
|/
o  95f241588fded9554cae91be0fefd576f61ebfc6 A

diff --git a/hgext/git/gitlog.py b/hgext/git/gitlog.py
--- a/hgext/git/gitlog.py
+++ b/hgext/git/gitlog.py
@@ -153,6 +153,14 @@ class baselog:  # revlog.revlog):
             raise error.LookupError(r, b'00changelog.i', _(b'no node'))
         return bin(t[0])
 
+    def synthetic(self, n):
+        t = self._db.execute(
+            'SELECT synthetic FROM changelog WHERE node = ?', (gitutil.togitnode(n),)
+        ).fetchone()
+        if t is None or t[0] is None:
+            return n
+        return bin(t[0])
+
     def hasnode(self, n):
         t = self._db.execute(
             'SELECT node FROM changelog WHERE node = ?',
@@ -320,6 +328,7 @@ class changelog(baselog):
             return hgchangelog._changelogrevision(
                 extra=extra, manifest=sha1nodeconstants.nullid
             )
+        n = self.synthetic(n)
         hn = gitutil.togitnode(n)
         # We've got a real commit!
         files = [
@@ -465,20 +474,13 @@ class changelog(baselog):
         return bool(self.reachableroots(a, [b], [a], includepath=False))
 
     def parentrevs(self, rev):
-        n = self.node(rev)
-        hn = gitutil.togitnode(n)
-        if hn != gitutil.nullgit:
-            c = self.gitrepo[hn]
-        else:
-            return nullrev, nullrev
-        p1 = p2 = nullrev
-        if c.parents:
-            p1 = self.rev(c.parents[0].id.raw)
-            if len(c.parents) > 2:
-                raise error.Abort(b'TODO octopus merge handling')
-            if len(c.parents) == 2:
-                p2 = self.rev(c.parents[1].id.raw)
-        return p1, p2
+        assert rev >= 0, rev
+        t = self._db.execute(
+            'SELECT p1, p2 FROM changelog WHERE rev = ?', (rev,)
+        ).fetchone()
+        if t is None:
+            raise error.LookupError(rev, b'00changelog.i', _(b'no rev %d'))
+        return self.rev(bin(t[0])), self.rev(bin(t[1]))
 
     # Private method is used at least by the tags code.
     _uncheckedparentrevs = parentrevs
@@ -557,6 +559,7 @@ class manifestlog(baselog):
         if node == sha1nodeconstants.nullid:
             # TODO: this should almost certainly be a memgittreemanifestctx
             return manifest.memtreemanifestctx(self, relpath)
+        node = self.synthetic(node)
         commit = self.gitrepo[gitutil.togitnode(node)]
         t = commit.tree
         if relpath:
diff --git a/hgext/git/index.py b/hgext/git/index.py
--- a/hgext/git/index.py
+++ b/hgext/git/index.py
@@ -18,7 +18,7 @@ from . import gitutil
 
 pygit2 = gitutil.get_pygit2()
 
-_CURRENT_SCHEMA_VERSION = 2
+_CURRENT_SCHEMA_VERSION = 3
 _SCHEMA = (
     """
 CREATE TABLE refs (
@@ -47,7 +47,8 @@ CREATE TABLE changelog (
   rev INTEGER NOT NULL PRIMARY KEY,
   node TEXT NOT NULL,
   p1 TEXT,
-  p2 TEXT
+  p2 TEXT,
+  synthetic TEXT
 );
 
 CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
@@ -310,26 +311,43 @@ def _index_repo(
     prog = progress_factory(b'commits')
     # This walker is sure to visit all the revisions in history, but
     # only once.
-    for pos, commit in enumerate(walker):
+    pos = -1
+    for commit in walker:
         if prog is not None:
             prog.update(pos)
         p1 = p2 = gitutil.nullgit
-        if len(commit.parents) > 2:
-            raise error.ProgrammingError(
-                (
-                    b"git support can't handle octopus merges, "
-                    b"found a commit with %d parents :("
-                )
-                % len(commit.parents)
+        if len(commit.parents) <= 2:
+            if commit.parents:
+                p1 = commit.parents[0].id.hex
+            if len(commit.parents) == 2:
+                p2 = commit.parents[1].id.hex
+            pos += 1
+            db.execute(
+                'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, NULL)',
+                (pos, commit.id.hex, p1, p2),
             )
-        if commit.parents:
-            p1 = commit.parents[0].id.hex
-        if len(commit.parents) == 2:
-            p2 = commit.parents[1].id.hex
-        db.execute(
-            'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',
-            (pos, commit.id.hex, p1, p2),
-        )
+        else:
+            parents = list(commit.parents)
+
+            p1 = parents.pop(0).id.hex
+            while parents:
+                pos += 1
+
+                if len(parents) == 1:
+                    this = commit.id.hex
+                    synth = None
+                else:
+                    this = "%040x" % pos
+                    synth = commit.id.hex
+
+                p2 = parents.pop(0).id.hex
+
+                db.execute(
+                    'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, ?)',
+                    (pos, this, p1, p2, synth),
+                )
+
+                p1 = this
 
         num_changedfiles = db.execute(
             "SELECT COUNT(*) from changedfiles WHERE node = ?",



More information about the Mercurial-devel mailing list