[PATCH 12 of 14] git: track which commit's file changes have been indexed
Josef 'Jeff' Sipek
jeffpc at josefsipek.net
Thu Jan 2 18:58:34 UTC 2025
hgext/git/index.py | 25 ++++++++++++++++++++-----
1 files changed, 20 insertions(+), 5 deletions(-)
# HG changeset patch
# User Josef 'Jeff' Sipek <jeffpc at josefsipek.net>
# Date 1728053486 14400
# Fri Oct 04 10:51:26 2024 -0400
# Node ID ea7c0938bdfb01e91f006aa906fd145da69983d4
# Parent f3dc4894194fdfdafcb28da5b8306cbd6c276f17
git: track which commit's file changes have been indexed
Since git and mercurial commit hashes are a function of their contents, we
can skip indexing the changed files of a commit if we have already indexed
it as it will never change.
To accomplish this, we can add a bool to the changelog table to track
whether or not we have indexed the files of each commit.
diff --git a/hgext/git/index.py b/hgext/git/index.py
--- a/hgext/git/index.py
+++ b/hgext/git/index.py
@@ -18,7 +18,7 @@ from . import gitutil
pygit2 = gitutil.get_pygit2()
-_CURRENT_SCHEMA_VERSION = 3
+_CURRENT_SCHEMA_VERSION = 4
_SCHEMA = (
"""
CREATE TABLE refs (
@@ -48,7 +48,8 @@ CREATE TABLE changelog (
node TEXT NOT NULL,
p1 TEXT,
p2 TEXT,
- synthetic TEXT
+ synthetic TEXT,
+ changedfiles BOOLEAN
);
CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
@@ -221,8 +222,16 @@ def fill_in_filelog(gitrepo, db, startco
def _index_repo_commit(
gitrepo,
db,
- commit
+ node,
+ commit=False
):
+ already_done = db.execute("SELECT changedfiles FROM changelog WHERE node=?",
+ (node.id.hex,)
+ ).fetchone()[0]
+ if already_done:
+ return # This commit has already been indexed
+
+ commit = gitrepo[node]
files = {}
# I *think* we only need to check p1 for changed files
# (and therefore linkrevs), because any node that would
@@ -251,6 +260,12 @@ def _index_repo_commit(
'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
(commit.id.hex, p, n, None, None, None, None),
)
+ # Mark the commit as loaded
+ db.execute("UPDATE changelog SET changedfiles=TRUE WHERE node=?",
+ (commit.id.hex,)
+ )
+ if commit:
+ db.commit()
def _index_repo(
gitrepo,
@@ -323,7 +338,7 @@ def _index_repo(
p2 = commit.parents[1].id.hex
pos += 1
db.execute(
- 'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, NULL)',
+ 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, TRUE)',
(pos, commit.id.hex, p1, p2),
)
else:
@@ -343,7 +358,7 @@ def _index_repo(
p2 = parents.pop(0).id.hex
db.execute(
- 'INSERT INTO changelog (rev, node, p1, p2, synthetic) VALUES(?, ?, ?, ?, ?)',
+ 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, TRUE)',
(pos, this, p1, p2, synth),
)
More information about the Mercurial-devel
mailing list