[PATCH 13 of 14] git: index changed files on-demand

Josef 'Jeff' Sipek jeffpc at josefsipek.net
Thu Jan 2 18:58:35 UTC 2025


 hgext/git/gitlog.py |   1 +
 hgext/git/index.py  |  34 ++++++++++++++++++++++++----------
 2 files changed, 25 insertions(+), 10 deletions(-)


# HG changeset patch
# User Josef 'Jeff' Sipek <jeffpc at josefsipek.net>
# Date 1728053504 14400
#      Fri Oct 04 10:51:44 2024 -0400
# Node ID 4caa2e795ca6b8bdb2e3d39eb150519c73240980
# Parent  ea7c0938bdfb01e91f006aa906fd145da69983d4
git: index changed files on-demand

Instead of indexing the changed files for every commit immediately, we can
index...

1. heads' changed files immediately
2. other commits' changed files on-demand

This helps a lot on repositories with large histories since the initial
mercurial invocation doesn't have to wait for the complete repo history to
be indexed.

diff --git a/hgext/git/gitlog.py b/hgext/git/gitlog.py
--- a/hgext/git/gitlog.py
+++ b/hgext/git/gitlog.py
@@ -331,6 +331,7 @@ class changelog(baselog):
         n = self.synthetic(n)
         hn = gitutil.togitnode(n)
         # We've got a real commit!
+        index._index_repo_commit(self.gitrepo, self._db, hn, commit=True)
         files = [
             r[0]
             for r in self._db.execute(
diff --git a/hgext/git/index.py b/hgext/git/index.py
--- a/hgext/git/index.py
+++ b/hgext/git/index.py
@@ -226,7 +226,7 @@ def _index_repo_commit(
     commit=False
 ):
     already_done = db.execute("SELECT changedfiles FROM changelog WHERE node=?",
-        (node.id.hex,)
+        (node,)
     ).fetchone()[0]
     if already_done:
         return # This commit has already been indexed
@@ -338,7 +338,7 @@ def _index_repo(
                 p2 = commit.parents[1].id.hex
             pos += 1
             db.execute(
-                'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, TRUE)',
+                'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, FALSE)',
                 (pos, commit.id.hex, p1, p2),
             )
         else:
@@ -358,18 +358,12 @@ def _index_repo(
                 p2 = parents.pop(0).id.hex
 
                 db.execute(
-                    'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, TRUE)',
+                    'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, FALSE)',
                     (pos, this, p1, p2, synth),
                 )
 
                 p1 = this
-
-        num_changedfiles = db.execute(
-            "SELECT COUNT(*) from changedfiles WHERE node = ?",
-            (commit.id.hex,),
-        ).fetchone()[0]
-        if not num_changedfiles:
-            _index_repo_commit(gitrepo, db, commit)
+    # Determine heads from the list of possible heads.
     db.execute('DELETE FROM heads')
     db.execute('DELETE FROM possible_heads')
     db.executemany('INSERT INTO possible_heads (node) VALUES(?)',
@@ -384,6 +378,26 @@ def _index_repo(
                     changelog.p2 = possible_heads.node
             )
     ''')
+    # Mark all commits with already-loaded changefiles info
+    db.execute('''
+    UPDATE changelog SET changedfiles=TRUE WHERE node IN (
+        SELECT DISTINCT node FROM changedfiles
+    )
+    ''')
+
+    if prog is not None:
+        prog.complete()
+
+    # Index the changed files for head commits
+    prog = progress_factory(b'head files')
+    heads = [
+        row[0].decode('ascii')
+        for row in db.execute("SELECT * FROM heads")
+    ]
+    for pos, h in enumerate(heads):
+        if prog is not None:
+            prog.update(pos)
+        _index_repo_commit(gitrepo, db, h)
 
     db.commit()
     if prog is not None:



More information about the Mercurial-devel mailing list