[PATCH 13 of 14] git: index changed files on-demand
Josef 'Jeff' Sipek
jeffpc at josefsipek.net
Thu Jan 2 18:58:35 UTC 2025
hgext/git/gitlog.py | 1 +
hgext/git/index.py | 34 ++++++++++++++++++++++++----------
2 files changed, 25 insertions(+), 10 deletions(-)
# HG changeset patch
# User Josef 'Jeff' Sipek <jeffpc at josefsipek.net>
# Date 1728053504 14400
# Fri Oct 04 10:51:44 2024 -0400
# Node ID 4caa2e795ca6b8bdb2e3d39eb150519c73240980
# Parent ea7c0938bdfb01e91f006aa906fd145da69983d4
git: index changed files on-demand
Instead of indexing the changed files for every commit immediately, we can
index...
1. heads' changed files immediately
2. other commits' changed files on-demand
This helps a lot on repositories with large histories since the initial
mercurial invocation doesn't have to wait for the complete repo history to
be indexed.
diff --git a/hgext/git/gitlog.py b/hgext/git/gitlog.py
--- a/hgext/git/gitlog.py
+++ b/hgext/git/gitlog.py
@@ -331,6 +331,7 @@ class changelog(baselog):
n = self.synthetic(n)
hn = gitutil.togitnode(n)
# We've got a real commit!
+ index._index_repo_commit(self.gitrepo, self._db, hn, commit=True)
files = [
r[0]
for r in self._db.execute(
diff --git a/hgext/git/index.py b/hgext/git/index.py
--- a/hgext/git/index.py
+++ b/hgext/git/index.py
@@ -226,7 +226,7 @@ def _index_repo_commit(
commit=False
):
already_done = db.execute("SELECT changedfiles FROM changelog WHERE node=?",
- (node.id.hex,)
+ (node,)
).fetchone()[0]
if already_done:
return # This commit has already been indexed
@@ -338,7 +338,7 @@ def _index_repo(
p2 = commit.parents[1].id.hex
pos += 1
db.execute(
- 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, TRUE)',
+ 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, NULL, FALSE)',
(pos, commit.id.hex, p1, p2),
)
else:
@@ -358,18 +358,12 @@ def _index_repo(
p2 = parents.pop(0).id.hex
db.execute(
- 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, TRUE)',
+ 'INSERT INTO changelog (rev, node, p1, p2, synthetic, changedfiles) VALUES(?, ?, ?, ?, ?, FALSE)',
(pos, this, p1, p2, synth),
)
p1 = this
-
- num_changedfiles = db.execute(
- "SELECT COUNT(*) from changedfiles WHERE node = ?",
- (commit.id.hex,),
- ).fetchone()[0]
- if not num_changedfiles:
- _index_repo_commit(gitrepo, db, commit)
+ # Determine heads from the list of possible heads.
db.execute('DELETE FROM heads')
db.execute('DELETE FROM possible_heads')
db.executemany('INSERT INTO possible_heads (node) VALUES(?)',
@@ -384,6 +378,26 @@ def _index_repo(
changelog.p2 = possible_heads.node
)
''')
+ # Mark all commits with already-loaded changefiles info
+ db.execute('''
+ UPDATE changelog SET changedfiles=TRUE WHERE node IN (
+ SELECT DISTINCT node FROM changedfiles
+ )
+ ''')
+
+ if prog is not None:
+ prog.complete()
+
+ # Index the changed files for head commits
+ prog = progress_factory(b'head files')
+ heads = [
+ row[0].decode('ascii')
+ for row in db.execute("SELECT * FROM heads")
+ ]
+ for pos, h in enumerate(heads):
+ if prog is not None:
+ prog.update(pos)
+ _index_repo_commit(gitrepo, db, h)
db.commit()
if prog is not None:
More information about the Mercurial-devel
mailing list