[PATCH 12 of 14] git: track which commit's file changes have been indexed

Josef 'Jeff' Sipek jeffpc at josefsipek.net
Thu Jan 2 18:58:34 UTC 2025


 hgext/git/index.py |  25 ++++++++++++++++++++-----
 1 files changed, 20 insertions(+), 5 deletions(-)


# HG changeset patch
# User Josef 'Jeff' Sipek <jeffpc at josefsipek.net>
# Date 1728053486 14400
#      Fri Oct 04 10:51:26 2024 -0400
# Node ID ea7c0938bdfb01e91f006aa906fd145da69983d4
# Parent  f3dc4894194fdfdafcb28da5b8306cbd6c276f17
git: track which commit's file changes have been indexed

Since git and mercurial commit hashes are a function of their contents, we
can skip indexing the changed files of a commit if we have already indexed
it as it will never change.

To accomplish this, we can add a bool to the changelog table to track
whether or not we have indexed the files of each commit.

diff --git a/hgext/git/index.py b/hgext/git/index.py
--- a/hgext/git/index.py
+++ b/hgext/git/index.py
@@ -18,7 +18,7 @@ from . import gitutil
 
 pygit2 = gitutil.get_pygit2()
 
-_CURRENT_SCHEMA_VERSION = 3
+_CURRENT_SCHEMA_VERSION = 4
 _SCHEMA = (
     """
 CREATE TABLE refs (
@@ -48,7 +48,8 @@ CREATE TABLE changelog (
   node TEXT NOT NULL,
   p1 TEXT,
   p2 TEXT,
-  synthetic TEXT
+  synthetic TEXT,
+  changedfiles BOOLEAN
 );
 
 CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
@@ -221,8 +222,16 @@ def fill_in_filelog(gitrepo, db, startco
 def _index_repo_commit(
     gitrepo,
     db,
-    commit
+    node,
+    commit=False
 ):
+    already_done = db.execute("SELECT changedfiles FROM changelog WHERE node=?",
+        (node.id.hex,)
+    ).fetchone()[0]
+    if already_done:
+        return # This commit has already been indexed
+
+    commit = gitrepo[node]
     files = {}
     # I *think* we only need to check p1 for changed files
     # (and therefore linkrevs), because any node that would
@@ -251,6 +260,12 @@ def _index_repo_commit(
             'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
             (commit.id.hex, p, n, None, None, None, None),
         )
+    # Mark the commit as loaded
+    db.execute("UPDATE changelog SET changedfiles=TRUE WHERE node=?",
+        (commit.id.hex,)
+    )
+    if commit:
+        db.commit()
 
 def _index_repo(
     gitrepo,
@@ -323,7 +338,7 @@ def _index_repo(
                 p2 = commit.parents[1].id.hex
             pos += 1
             db.execute(
-                'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, NULL)',
+                'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, TRUE)',
                 (pos, commit.id.hex, p1, p2),
             )
         else:
@@ -343,7 +358,7 @@ def _index_repo(
                 p2 = parents.pop(0).id.hex
 
                 db.execute(
-                    'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, ?)',
+                    'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, TRUE)',
                     (pos, this, p1, p2, synth),
                 )
 



More information about the Mercurial-devel mailing list