D6734: git: RFC of a new extension to _directly_ operate on git repositories
durin42 (Augie Fackler)
phabricator at mercurial-scm.org
Sat Oct 5 20:29:10 UTC 2019
durin42 updated this revision to Diff 16858.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D6734?vs=16714&id=16858
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D6734/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D6734
AFFECTED FILES
hgext/git/__init__.py
hgext/git/dirstate.py
hgext/git/gitlog.py
hgext/git/index.py
mercurial/transaction.py
setup.py
tests/test-check-interfaces.py
tests/test-git-interop.t
CHANGE DETAILS
diff --git a/tests/test-git-interop.t b/tests/test-git-interop.t
new file mode 100644
--- /dev/null
+++ b/tests/test-git-interop.t
@@ -0,0 +1,190 @@
+This test requires pygit2:
+ > python -c 'import pygit2' || exit 80
+
+Setup:
+ > GIT_AUTHOR_NAME='test'; export GIT_AUTHOR_NAME
+ > GIT_AUTHOR_EMAIL='test at example.org'; export GIT_AUTHOR_EMAIL
+ > GIT_AUTHOR_DATE="2007-01-01 00:00:00 +0000"; export GIT_AUTHOR_DATE
+ > GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME"; export GIT_COMMITTER_NAME
+ > GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL"; export GIT_COMMITTER_EMAIL
+ > GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"; export GIT_COMMITTER_DATE
+
+ > count=10
+ > gitcommit() {
+ > GIT_AUTHOR_DATE="2007-01-01 00:00:$count +0000";
+ > GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"
+ > git commit "$@" >/dev/null 2>/dev/null || echo "git commit error"
+ > count=`expr $count + 1`
+ > }
+
+ > echo "[extensions]" >> $HGRCPATH
+ > echo "git=" >> $HGRCPATH
+
+Make a new repo with git:
+ $ mkdir foo
+ $ cd foo
+ $ git init
+ Initialized empty Git repository in $TESTTMP/foo/.git/
+Ignore the .hg directory within git:
+ $ echo .hg >> .git/info/exclude
+ $ echo alpha > alpha
+ $ git add alpha
+ $ gitcommit -am 'Add alpha'
+ $ echo beta > beta
+ $ git add beta
+ $ gitcommit -am 'Add beta'
+ $ echo gamma > gamma
+ $ git status
+ On branch master
+ Untracked files:
+ (use "git add <file>..." to include in what will be committed)
+
+ gamma
+
+ nothing added to commit but untracked files present (use "git add" to track)
+
+Without creating the .hg, hg status fails:
+ $ hg status
+ abort: no repository found in '$TESTTMP/foo' (.hg not found)!
+ [255]
+But if you run hg init --git, it works:
+ $ hg init --git
+ $ hg id
+ 3d9be8deba43 master
+ $ hg status
+ ? gamma
+Log works too:
+ $ hg log
+ changeset: 1:3d9be8deba43
+ bookmark: master
+ user: test <test at example.org>
+ date: Mon Jan 01 00:00:11 2007 +0000
+ summary: Add beta
+
+ changeset: 0:c5864c9d16fb
+ user: test <test at example.org>
+ date: Mon Jan 01 00:00:10 2007 +0000
+ summary: Add alpha
+
+
+
+and bookmarks:
+ $ hg bookmarks
+ * master 1:3d9be8deba43
+
+diff even works transparently in both systems:
+ $ echo blah >> alpha
+ $ git diff
+ diff --git a/alpha b/alpha
+ index 4a58007..faed1b7 100644
+ --- a/alpha
+ +++ b/alpha
+ @@ -1 +1,2 @@
+ alpha
+ +blah
+ $ hg diff --git
+ diff --git a/alpha b/alpha
+ --- a/alpha
+ +++ b/alpha
+ @@ -1,1 +1,2 @@
+ alpha
+ +blah
+
+Remove a file, it shows as such:
+ $ rm alpha
+ $ hg status
+ ! alpha
+ ? gamma
+
+Revert works:
+ $ hg revert alpha --traceback
+ $ hg status
+ ? gamma
+ $ git status
+ On branch master
+ Untracked files:
+ (use "git add <file>..." to include in what will be committed)
+
+ gamma
+
+ nothing added to commit but untracked files present (use "git add" to track)
+
+Add shows sanely in both:
+ $ hg add gamma
+ $ hg status
+ A gamma
+ $ hg files
+ alpha
+ beta
+ gamma
+ $ git ls-files
+ alpha
+ beta
+ gamma
+ $ git status
+ On branch master
+ Changes to be committed:
+ (use "git reset HEAD <file>..." to unstage)
+
+ new file: gamma
+
+
+forget does what it should as well:
+ $ hg forget gamma
+ $ hg status
+ ? gamma
+ $ git status
+ On branch master
+ Untracked files:
+ (use "git add <file>..." to include in what will be committed)
+
+ gamma
+
+ nothing added to commit but untracked files present (use "git add" to track)
+
+hg log FILE
+
+ $ echo a >> alpha
+ $ hg ci -m 'more alpha'
+ $ echo b >> beta
+ $ hg ci -m 'more beta'
+ $ echo a >> alpha
+ $ hg ci -m 'even more alpha'
+ $ hg log -G alpha
+ @ changeset: 4:bd975ddde71c
+ : bookmark: master
+ : user: test <test>
+ : date: Thu Jan 01 00:00:00 1970 +0000
+ : summary: even more alpha
+ :
+ o changeset: 2:77f597222800
+ : user: test <test>
+ : date: Thu Jan 01 00:00:00 1970 +0000
+ : summary: more alpha
+ :
+ o changeset: 0:c5864c9d16fb
+ user: test <test at example.org>
+ date: Mon Jan 01 00:00:10 2007 +0000
+ summary: Add alpha
+
+ $ hg log -G beta
+ o changeset: 3:b40d4fed5e27
+ : user: test <test>
+ : date: Thu Jan 01 00:00:00 1970 +0000
+ : summary: more beta
+ :
+ o changeset: 1:3d9be8deba43
+ | user: test <test at example.org>
+ ~ date: Mon Jan 01 00:00:11 2007 +0000
+ summary: Add beta
+
+
+hg annotate
+
+ $ hg annotate alpha
+ 0: alpha
+ 2: a
+ 4: a
+ $ hg annotate beta
+ 1: beta
+ 3: b
diff --git a/tests/test-check-interfaces.py b/tests/test-check-interfaces.py
--- a/tests/test-check-interfaces.py
+++ b/tests/test-check-interfaces.py
@@ -44,6 +44,11 @@
wireprotov2server,
)
+from hgext.git import (
+ dirstate as gitdirstate,
+ gitlog,
+)
+
testdir = os.path.dirname(__file__)
rootdir = pycompat.fsencode(os.path.normpath(os.path.join(testdir, '..')))
@@ -193,6 +198,10 @@
ziverify.verifyClass(intdirstate.idirstate, dirstate.dirstate)
+ # git interop implementations
+ ziverify.verifyClass(intdirstate.idirstate, gitdirstate.gitdirstate)
+ ziverify.verifyClass(repository.ifilestorage, gitlog.filelog)
+
vfs = vfsmod.vfs(b'.')
fl = filelog.filelog(vfs, b'dummy.i')
checkzobject(fl, allowextra=True)
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -1087,6 +1087,7 @@
'hgext', 'hgext.convert', 'hgext.fsmonitor',
'hgext.fastannotate',
'hgext.fsmonitor.pywatchman',
+ 'hgext.git',
'hgext.highlight',
'hgext.infinitepush',
'hgext.largefiles', 'hgext.lfs', 'hgext.narrow',
diff --git a/mercurial/transaction.py b/mercurial/transaction.py
--- a/mercurial/transaction.py
+++ b/mercurial/transaction.py
@@ -473,8 +473,11 @@
self._generatefiles(group=gengroupprefinalize)
categories = sorted(self._finalizecallback)
for cat in categories:
- self._finalizecallback[cat](self)
- # Prevent double usage and help clear cycles.
+ try:
+ self._finalizecallback[cat](self)
+ except TypeError as e:
+ raise TypeError('%r: %r (%r)' % (cat, self._finalizecallback[cat], e))
+ # Prevent double usage and help clear cycles.
self._finalizecallback = None
self._generatefiles(group=gengrouppostfinalize)
diff --git a/hgext/git/index.py b/hgext/git/index.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/index.py
@@ -0,0 +1,231 @@
+from __future__ import absolute_import
+
+import os
+import sqlite3
+
+from mercurial.i18n import _
+
+from mercurial import (
+ encoding,
+ error,
+ node as nodemod,
+)
+
+import pygit2
+
+_CURRENT_SCHEMA_VERSION = 1
+_SCHEMA = """
+CREATE TABLE refs (
+ -- node and name are unique together. There may be more than one name for
+ -- a given node, and there may be no name at all for a given node (in the
+ -- case of an anonymous hg head).
+ node TEXT NOT NULL,
+ name TEXT
+);
+
+-- The topological heads of the changelog, which hg depends on.
+CREATE TABLE heads (
+ node TEXT NOT NULL
+);
+
+-- A total ordering of the changelog
+CREATE TABLE changelog (
+ rev INTEGER NOT NULL PRIMARY KEY,
+ node TEXT NOT NULL,
+ p1 TEXT,
+ p2 TEXT
+);
+
+CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
+CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);
+
+-- Changed files for each commit, which lets us dynamically build
+-- filelogs.
+CREATE TABLE changedfiles (
+ node TEXT NOT NULL,
+ filename TEXT NOT NULL,
+ -- 40 zeroes for deletions
+ filenode TEXT NOT NULL,
+-- to handle filelog parentage:
+ p1node TEXT,
+ p1filenode TEXT,
+ p2node TEXT,
+ p2filenode TEXT
+);
+
+CREATE INDEX changedfiles_nodes_idx
+ ON changedfiles(node);
+
+PRAGMA user_version=%d
+""" % _CURRENT_SCHEMA_VERSION
+
+def _createdb(path):
+ # print('open db', path)
+ # import traceback
+ # traceback.print_stack()
+ db = sqlite3.connect(encoding.strfromlocal(path))
+ db.text_factory = bytes
+
+ res = db.execute(r'PRAGMA user_version').fetchone()[0]
+
+ # New database.
+ if res == 0:
+ for statement in _SCHEMA.split(';'):
+ db.execute(statement.strip())
+
+ db.commit()
+
+ elif res == _CURRENT_SCHEMA_VERSION:
+ pass
+
+ else:
+ raise error.Abort(_('sqlite database has unrecognized version'))
+
+ db.execute(r'PRAGMA journal_mode=WAL')
+
+ return db
+
+_OUR_ORDER = (pygit2.GIT_SORT_TOPOLOGICAL |
+ pygit2.GIT_SORT_TIME |
+ pygit2.GIT_SORT_REVERSE)
+
+_DIFF_FLAGS = 1 << 21 # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2
+
+def _find_nearest_ancestor_introducing_node(
+ db, gitrepo, file_path, walk_start, filenode):
+ """Find the nearest ancestor that introduces a file node.
+
+ Args:
+ db: a handle to our sqlite database.
+ gitrepo: A pygit2.Repository instance.
+ file_path: the path of a file in the repo
+ walk_start: a pygit2.Oid that is a commit where we should start walking
+ for our nearest ancestor.
+
+ Returns:
+ A hexlified SHA that is the commit ID of the next-nearest parent.
+ """
+ parent_options = {row[0] for row in db.execute(
+ 'SELECT node FROM changedfiles '
+ 'WHERE filename = ? AND filenode = ?',
+ (file_path, filenode))}
+ inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)
+ for w in inner_walker:
+ if w.id.hex in parent_options:
+ return w.id.hex
+
+def _index_repo(gitrepo, db, progress_cb):
+ # Identify all references so we can tell the walker to visit all of them.
+ all_refs = gitrepo.listall_references()
+ walker = None
+ possible_heads = set()
+ for pos, ref in enumerate(all_refs):
+ progress_cb('refs', pos)
+ if not (
+ ref.startswith('refs/heads/') # local branch
+ or ref.startswith('refs/tags/') # tag
+ or ref.startswith('refs/remotes/') # remote branch
+ or ref.startswith('refs/hg/') # from this extension
+ ):
+ continue
+ try:
+ start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)
+ except ValueError:
+ # No commit to be found, so we don't care for hg's purposes.
+ continue
+ possible_heads.add(start.id.hex)
+ if walker is None:
+ walker = gitrepo.walk(start.id, _OUR_ORDER)
+ else:
+ walker.push(start.id)
+ # Empty out the existing changelog. Even for large-ish histories
+ # we can do the top-level "walk all the commits" dance very
+ # quickly as long as we don't need to figure out the changed files
+ # list.
+ db.execute('DELETE FROM changelog')
+ progress_cb('refs', None)
+ # This walker is sure to visit all the revisions in history, but
+ # only once.
+ for pos, commit in enumerate(walker):
+ progress_cb('commits', pos)
+ r = commit.id.raw
+ p1 = p2 = nodemod.nullhex
+ if len(commit.parents) > 2:
+ raise error.ProgrammingError(
+ ("git support can't handle octopus merges, "
+ "found a commit with %d parents :(") % len(commit.parents))
+ if commit.parents:
+ p1 = commit.parents[0].id.hex
+ if len(commit.parents) == 2:
+ p2 = commit.parents[1].id.hex
+ db.execute(
+ 'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',
+ (pos, commit.id.hex, p1, p2))
+
+ num_changedfiles = db.execute(
+ "SELECT COUNT(*) from changedfiles WHERE node = ?",
+ (commit.id.hex,)).fetchone()[0]
+ if not num_changedfiles:
+ files = {}
+ # I *think* we only need to check p1 for changed files
+ # (and therefore linkrevs), because any node that would
+ # actually have this commit as a linkrev would be
+ # completely new in this rev.
+ p1 = commit.parents[0].id.hex if commit.parents else None
+ if p1 is not None:
+ patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)
+ else:
+ patchgen = commit.tree.diff_to_tree(
+ swap=True, flags=_DIFF_FLAGS)
+ new_files = (p.delta.new_file for p in patchgen)
+ files = {nf.path: nf.id.hex for nf in new_files
+ if nf.id.raw != nodemod.nullid}
+ for p, n in files.items():
+ # List of previous node, commit whose ancestry we
+ # should search.
+ parents = []
+ for parent in commit.parents:
+ t = parent.tree
+ for comp in p.split('/'):
+ try:
+ t = gitrepo[t[comp].id]
+ except KeyError:
+ break
+ else:
+ introducer = _find_nearest_ancestor_introducing_node(
+ db, gitrepo, p, parent.id, t.id.hex)
+ parents.append((introducer, t.id.hex))
+ p1node = p1fnode = p2node = p2fnode = None
+ if parents:
+ p1node, p1fnode = parents[0]
+ if len(parents) == 2:
+ p2node, p2fnode = parents[1]
+ if len(parents) > 2:
+ raise error.ProgrammingError(
+ "git support can't handle octopus merges")
+ db.execute(
+ 'INSERT INTO changedfiles ('
+ 'node, filename, filenode, p1node, p1filenode, p2node, '
+ 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
+ (commit.id.hex, p, n, p1node, p1fnode, p2node, p2fnode))
+ db.execute('DELETE FROM heads')
+ for h in possible_heads:
+ haschild = db.execute(
+ 'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?',
+ (h, h)).fetchone()[0]
+ if not haschild:
+ db.execute('INSERT INTO heads (node) VALUES(?)', (h,))
+
+ db.commit()
+ progress_cb('commits', None)
+
+def get_index(gitrepo):
+ cachepath = os.path.join(gitrepo.path, '..', '.hg', 'cache')
+ if not os.path.exists(cachepath):
+ os.makedirs(cachepath)
+ dbpath = os.path.join(cachepath, 'git-commits.sqlite')
+ db = _createdb(dbpath)
+ # TODO check against gitrepo heads before doing a full index
+ # TODO thread a ui.progress call into this layer
+ _index_repo(gitrepo, db, lambda x, y: None)
+ return db
diff --git a/hgext/git/gitlog.py b/hgext/git/gitlog.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/gitlog.py
@@ -0,0 +1,449 @@
+from __future__ import absolute_import
+
+from mercurial.i18n import _
+
+from mercurial import (
+ ancestor,
+ changelog as hgchangelog,
+ dagop,
+ error,
+ manifest,
+ match as matchmod,
+ node as nodemod,
+ pycompat,
+ revlog,
+)
+from mercurial.interfaces import (
+ repository,
+ util as interfaceutil,
+)
+from mercurial.utils import (
+ stringutil,
+)
+from . import (
+ index,
+)
+
+import pygit2
+
+class baselog(object): # revlog.revlog):
+ """Common implementations between changelog and manifestlog."""
+ def __init__(self, gr, db):
+ self.gitrepo = gr
+ self._db = db
+
+ def __len__(self):
+ return int(self._db.execute(
+ 'SELECT COUNT(*) FROM changelog').fetchone()[0])
+
+ def rev(self, n):
+ if n == nodemod.nullid:
+ return -1
+ t = self._db.execute(
+ 'SELECT rev FROM changelog WHERE node = ?',
+ (nodemod.hex(n),)).fetchone()
+ if t is None:
+ raise error.LookupError(n, '00changelog.i', _('no node'))
+ return t[0]
+
+ def node(self, r):
+ if r == nodemod.nullrev:
+ return nodemod.nullid
+ t = self._db.execute(
+ 'SELECT node FROM changelog WHERE rev = ?',
+ (r,)).fetchone()
+ if t is None:
+ raise error.LookupError(r, '00changelog.i', _('no node'))
+ return nodemod.bin(t[0])
+
+ def hasnode(self, n):
+ t = self._db.execute(
+ 'SELECT node FROM changelog WHERE node = ?',
+ (n,)).fetchone()
+ return t is not None
+
+# TODO: an interface for the changelog type?
+class changelog(baselog):
+
+ def __contains__(self, rev):
+ try:
+ self.node(rev)
+ return True
+ except error.LookupError:
+ return False
+
+ @property
+ def filteredrevs(self):
+ # TODO: we should probably add a refs/hg/ namespace for hidden
+ # heads etc, but that's an idea for later.
+ return set()
+
+ @property
+ def nodemap(self):
+ r = {
+ nodemod.bin(v[0]): v[1] for v in
+ self._db.execute('SELECT node, rev FROM changelog')}
+ r[nodemod.nullid] = nodemod.nullrev
+ return r
+
+ def tip(self):
+ t = self._db.execute(
+ 'SELECT node FROM changelog ORDER BY rev DESC LIMIT 1').fetchone()
+ if t:
+ return nodemod.hex(t[0])
+ return nodemod.nullid
+
+ def revs(self, start=0, stop=None):
+ if stop is None:
+ stop = self.tip()
+ t = self._db.execute(
+ 'SELECT rev FROM changelog '
+ 'WHERE rev >= ? AND rev <= ? '
+ 'ORDER BY REV ASC',
+ (start, stop))
+ return (int(r[0]) for r in t)
+
+ def _partialmatch(self, id):
+ if nodemod.wdirhex.startswith(id):
+ raise error.WdirUnsupported
+ candidates = [nodemod.bin(x[0]) for x in self._db.execute(
+ 'SELECT node FROM changelog WHERE node LIKE ?', (id + '%', ))]
+ if nodemod.nullhex.startswith(id):
+ candidates.append(nodemod.nullid)
+ if len(candidates) > 1:
+ raise error.AmbiguousPrefixLookupError(
+ id, '00changelog.i', _('ambiguous identifier'))
+ if candidates:
+ return candidates[0]
+ return None
+
+ def flags(self, rev):
+ return 0
+
+ def shortest(self, node, minlength=1):
+ nodehex = nodemod.hex(node)
+ for attempt in pycompat.xrange(minlength, len(nodehex)+1):
+ candidate = nodehex[:attempt]
+ matches = int(self._db.execute(
+ 'SELECT COUNT(*) FROM changelog WHERE node LIKE ?',
+ (nodehex + '%',)).fetchone()[0])
+ if matches == 1:
+ return candidate
+ return nodehex
+
+ def headrevs(self, revs=None):
+ realheads = [int(x[0]) for x in
+ self._db.execute(
+ 'SELECT rev FROM changelog '
+ 'INNER JOIN heads ON changelog.node = heads.node')]
+ if revs:
+ return sorted([r for r in revs if r in realheads])
+ return sorted(realheads)
+
+ def changelogrevision(self, nodeorrev):
+ # Ensure we have a node id
+ if isinstance(nodeorrev, int):
+ n = self.node(nodeorrev)
+ else:
+ n = nodeorrev
+ # handle looking up nullid
+ if n == nodemod.nullid:
+ return hgchangelog._changelogrevision(extra={})
+ hn = nodemod.hex(n)
+ # We've got a real commit!
+ files = [r[0] for r in self._db.execute(
+ 'SELECT filename FROM changedfiles '
+ 'WHERE node = ? and filenode != ?',
+ (hn, nodemod.nullhex))]
+ filesremoved = [r[0] for r in self._db.execute(
+ 'SELECT filename FROM changedfiles '
+ 'WHERE node = ? and filenode = ?',
+ (hn, nodemod.nullhex))]
+ c = self.gitrepo[hn]
+ return hgchangelog._changelogrevision(
+ manifest=n, # pretend manifest the same as the commit node
+ user='%s <%s>' % (c.author.name.encode('utf8'),
+ c.author.email.encode('utf8')),
+ # TODO: a fuzzy memory from hg-git hacking says this should be -offset
+ date=(c.author.time, c.author.offset),
+ files=files,
+ # TODO filesadded in the index
+ filesremoved=filesremoved,
+ description=c.message.encode('utf8'),
+ # TODO do we want to handle extra? how?
+ extra={b'branch': b'default'},
+ )
+
+ def ancestors(self, revs, stoprev=0, inclusive=False):
+ revs = list(revs)
+ tip = self.tip()
+ for r in revs:
+ if r > tip:
+ raise IndexError('Invalid rev %r' % r)
+ return ancestor.lazyancestors(
+ self.parentrevs, revs, stoprev=stoprev, inclusive=inclusive)
+
+ # Cleanup opportunity: this is *identical* to the revlog.py version
+ def descendants(self, revs):
+ return dagop.descendantrevs(revs, self.revs, self.parentrevs)
+
+ def reachableroots(self, minroot, heads, roots, includepath=False):
+ return dagop._reachablerootspure(self.parentrevs,
+ minroot, roots, heads, includepath)
+
+ # Cleanup opportunity: this is *identical* to the revlog.py version
+ def isancestor(self, a, b):
+ a, b = self.rev(a), self.rev(b)
+ return self.isancestorrev(a, b)
+
+ # Cleanup opportunity: this is *identical* to the revlog.py version
+ def isancestorrev(self, a, b):
+ if a == nodemod.nullrev:
+ return True
+ elif a == b:
+ return True
+ elif a > b:
+ return False
+ return bool(self.reachableroots(a, [b], [a], includepath=False))
+
+ def parentrevs(self, rev):
+ n = self.node(rev)
+ hn = nodemod.hex(n)
+ c = self.gitrepo[hn]
+ p1 = p2 = nodemod.nullrev
+ if c.parents:
+ p1 = self.rev(c.parents[0].id.raw)
+ if len(c.parents) > 2:
+ raise error.Abort('TODO octopus merge handling')
+ if len(c.parents) == 2:
+ p2 = self.rev(c.parents[0].id.raw)
+ return p1, p2
+
+ # Private method is used at least by the tags code.
+ _uncheckedparentrevs = parentrevs
+
+ def commonancestorsheads(self, a, b):
+ # TODO the revlog verson of this has a C path, so we probably
+ # need to optimize this...
+ a, b = self.rev(a), self.rev(b)
+ return [self.node(n) for n in
+ ancestor.commonancestorsheads(self.parentrevs, a, b)]
+
+ def branchinfo(self, rev):
+ """Git doesn't do named branches, so just put everything on default."""
+ return b'default', False
+
+ def delayupdate(self, tr):
+ # TODO: I think we can elide this because we're just dropping
+ # an object in the git repo?
+ pass
+
+ def add(self, manifest, files, desc, transaction, p1, p2,
+ user, date=None, extra=None, p1copies=None, p2copies=None,
+ filesadded=None, filesremoved=None):
+ parents = []
+ hp1, hp2 = nodemod.hex(p1), nodemod.hex(p2)
+ if p1 != nodemod.nullid:
+ parents.append(hp1)
+ if p2 and p2 != nodemod.nullid:
+ parents.append(hp2)
+ assert date is not None
+ timestamp, tz = date
+ sig = pygit2.Signature(stringutil.person(user), stringutil.email(user),
+ timestamp, tz)
+ oid = self.gitrepo.create_commit(
+ None, sig, sig, desc,
+ nodemod.hex(manifest), parents)
+ # Set up an internal reference to force the commit into the
+ # changelog. Hypothetically, we could even use this refs/hg/
+ # namespace to allow for anonymous heads on git repos, which
+ # would be neat.
+ self.gitrepo.references.create(
+ 'refs/hg/internal/latest-commit', oid, force=True)
+ # Reindex now to pick up changes
+ index._index_repo(self.gitrepo, self._db, lambda x, y: None)
+ return oid.raw
+
+# TODO: Make a split between mutable and immutable manifest types here.
+class gittreemanifest(object):
+ def __init__(self, gt, builderfn):
+ self._builderfn = builderfn
+ self._tree = gt
+ self._builder = None
+
+ def __contains__(self, k):
+ if self._builder:
+ return self._builder.get(k) is not None
+ return k in self._tree
+
+ def __getitem__(self, k):
+ if self._builder:
+ match = self._builder.get(k)
+ if match is None:
+ raise error.LookupError('File %r not found in tree %r' % (
+ k, self._tree.id.hex))
+ return match
+ try:
+ return self._tree[k].id.raw
+ except ValueError:
+ raise error.LookupError('File %r not found in tree %r' % (
+ k, self._tree.id.hex))
+
+ def __setitem__(self, k, v):
+ if self._builder is None:
+ self._builder = self._builderfn()
+ self._builder.insert(k, nodemod.hex(v), pygit2.GIT_FILEMODE_BLOB)
+
+ def setflag(self, p, flag):
+ oid = self._builder.get(p).id
+ if not flag:
+ self._builder.insert(p, oid, pygit2.GIT_FILEMODE_BLOB)
+ elif flag == 'x':
+ self._builder.insert(p, oid, pygit2.GIT_FILEMODE_BLOB_EXECUTABLE)
+ elif flag == 'l':
+ self._builder.insert(p, oid, pygit2.GIT_FILEMODE_LINK)
+ else:
+ raise ValueError('Illegal flag value %r on path %r' % flag, p)
+
+ def flags(self, k):
+ # TODO flags handling
+ return ''
+
+ def _walkonetree(self, tree, match, subdir):
+ for te in tree:
+ # TODO: can we prune dir walks with the matcher?
+ realname = subdir + te.name
+ if te.type == r'tree':
+ for inner in self._walkonetree(
+ self.gitrepo[te.id], match, realname + '/'):
+ yield inner
+ if not match(realname):
+ continue
+ yield realname
+
+ def walk(self, match):
+ return self._walkonetree(self._tree, match, '')
+
+ def get(self, fname, default=None):
+ if fname in self:
+ return self[fname]
+ return default
+
+ at interfaceutil.implementer(repository.imanifestrevisionstored)
+class gittreemanifestctx(object):
+ def __init__(self, repo, gittree):
+ self._repo = repo
+ self._tree = gittree
+ self._builder = None
+
+ def _getbuilder(self):
+ if self._builder is None:
+ self._builder = self._repo.TreeBuilder(self._tree)
+ return self._builder
+
+ def read(self):
+ return gittreemanifest(self._tree, self._getbuilder)
+
+ def find(self, path):
+ self.read()[path]
+
+ def copy(self):
+ return gittreemanifestctx(self._repo, self._tree)
+
+ def write(self, transaction, link, p1, p2, added, removed, match=None):
+ # We're not (for now, anyway) going to audit filenames, so we
+ # can ignore added and removed.
+
+ # TODO what does this match argument get used for? hopefully
+ # just narrow?
+ assert not match or isinstance(match, matchmod.alwaysmatcher)
+ return self._getbuilder().write().raw
+
+class manifestlog(baselog):
+
+ def __getitem__(self, node):
+ return self.get('', node)
+
+ def get(self, relpath, node):
+ if node == nodemod.nullid:
+ return manifest.memtreemanifestctx(self, relpath)
+ commit = self.gitrepo[nodemod.hex(node)]
+ t = commit.tree
+ if relpath:
+ parts = relpath.split('/')
+ for p in parts:
+ te = t[p]
+ t = self.gitrepo[te.id]
+ return gittreemanifestctx(self.gitrepo, t)
+
+ at interfaceutil.implementer(repository.ifilestorage)
+class filelog(baselog):
+ def __init__(self, gr, db, path):
+ super(filelog, self).__init__(gr, db)
+ self.path = path
+
+ def read(self, node):
+ return self.gitrepo[nodemod.hex(node)].data
+
+ def lookup(self, node):
+ if len(node) not in (20, 40):
+ node = int(node)
+ if isinstance(node, int):
+ assert False, 'todo revnums for nodes'
+ if len(node) == 40:
+ hnode = node
+ node = nodemod.bin(node)
+ else:
+ hnode = nodemod.hex(node)
+ if hnode in self.gitrepo:
+ return node
+ raise error.LookupError(self.path, node, _('no match found'))
+
+ def cmp(self, node, text):
+ """Returns True if text is different than content at `node`."""
+ return self.read(node) != text
+
+ def add(self, text, meta, transaction, link, p1=None, p2=None):
+ assert not meta # Should we even try to handle this?
+ return self.gitrepo.create_blob(text).raw
+
+ def __iter__(self):
+ for clrev in self._db.execute('''
+SELECT rev FROM changelog
+INNER JOIN changedfiles ON changelog.node = changedfiles.node
+WHERE changedfiles.filename = ? AND changedfiles.filenode != ?
+ ''', (self.path, nodemod.nullhex)):
+ yield clrev[0]
+
+ def linkrev(self, fr):
+ return fr
+
+ def rev(self, node):
+ return int(self._db.execute('''
+SELECT rev FROM changelog
+INNER JOIN changedfiles ON changelog.node = changedfiles.node
+WHERE changedfiles.filename = ? AND changedfiles.filenode = ?''', (
+ self.path, nodemod.hex(node))).fetchone()[0])
+
+ def node(self, rev):
+ return nodemod.bin(self._db.execute(
+'''SELECT filenode FROM changedfiles
+INNER JOIN changelog ON changelog.node = changedfiles.node
+WHERE changelog.rev = ? AND filename = ?
+''', (rev, self.path)).fetchone()[0])
+
+ def parents(self, node):
+ ps = []
+ for p in self._db.execute(
+'''SELECT p1filenode, p2filenode FROM changedfiles
+WHERE filenode = ? AND filename = ?
+''', (nodemod.hex(node), self.path)).fetchone():
+ if p is not None:
+ ps.append(nodemod.bin(p))
+ else:
+ ps.append(nodemod.nullid)
+ return ps
+
+ def renamed(self, node):
+ # TODO: renames/copies
+ return False
diff --git a/hgext/git/dirstate.py b/hgext/git/dirstate.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/dirstate.py
@@ -0,0 +1,254 @@
+from __future__ import absolute_import
+
+import contextlib
+import errno
+import os
+import stat
+
+from mercurial import (
+ dirstate,
+ error,
+ extensions,
+ match as matchmod,
+ node as nodemod,
+ scmutil,
+ util,
+)
+from mercurial.interfaces import (
+ dirstate as intdirstate,
+ util as interfaceutil,
+)
+from mercurial.i18n import _
+
+import pygit2
+
+
+def readpatternfile(orig, filepath, warn, sourceinfo=False):
+ if not ('info/exclude' in filepath or filepath.endswith('.gitignore')):
+ return orig(filepath, warn, sourceinfo=False)
+ result = []
+ warnings = []
+ with open(filepath, 'rb') as fp:
+ for l in fp:
+ l = l.strip()
+ if not l or l.startswith('#'):
+ continue
+ if l.startswith('!'):
+ # on reflection, I think /foo is just glob:
+ warnings.append('unsupported ignore pattern %s' % l)
+ continue
+ if l.startswith('/'):
+ result.append('glob:' + l[1:])
+ else:
+ result.append('relglob:' + l)
+ return result, warnings
+extensions.wrapfunction(matchmod, 'readpatternfile', readpatternfile)
+
+
+_STATUS_MAP = {
+ pygit2.GIT_STATUS_CONFLICTED: 'm',
+ pygit2.GIT_STATUS_CURRENT: 'n',
+ pygit2.GIT_STATUS_IGNORED: '?',
+ pygit2.GIT_STATUS_INDEX_DELETED: 'r',
+ pygit2.GIT_STATUS_INDEX_MODIFIED: 'n',
+ pygit2.GIT_STATUS_INDEX_NEW: 'a',
+ pygit2.GIT_STATUS_INDEX_RENAMED: 'a',
+ pygit2.GIT_STATUS_INDEX_TYPECHANGE: 'n',
+ pygit2.GIT_STATUS_WT_DELETED: 'r',
+ pygit2.GIT_STATUS_WT_MODIFIED: 'n',
+ pygit2.GIT_STATUS_WT_NEW: '?',
+ pygit2.GIT_STATUS_WT_RENAMED: 'a',
+ pygit2.GIT_STATUS_WT_TYPECHANGE: 'n',
+ pygit2.GIT_STATUS_WT_UNREADABLE: '?',
+}
+
+
+ at interfaceutil.implementer(intdirstate.idirstate)
+class gitdirstate(object):
+
+ def __init__(self, ui, root, gitrepo):
+ self._ui = ui
+ self._root = os.path.dirname(root)
+ self.git = gitrepo
+
+ def p1(self):
+ return self.git.head.peel().id.raw
+
+ def p2(self):
+ # TODO: MERGE_HEAD? something like that, right?
+ return nodemod.nullid
+
+ def setparents(self, p1, p2=nodemod.nullid):
+ assert p2 == nodemod.nullid, 'TODO merging support'
+ self.git.head.set_target(nodemod.hex(p1))
+
+ @util.propertycache
+ def identity(self):
+ self.identity = util.filestat.frompath(
+ os.path.join(self.root, '.git', 'index'))
+
+ def branch(self):
+ return b'default'
+
+ def parents(self):
+ # TODO how on earth do we find p2 if a merge is in flight?
+ return self.p1(), nodemod.nullid
+
+ def __iter__(self):
+ # TODO is this going to give us unicodes on py3?
+ return (f.path for f in self.git.index)
+
+ def items(self):
+ for ie in self.git.index:
+ yield ie.path, None # value should be a dirstatetuple
+
+ # py2,3 compat forward
+ iteritems = items
+
+ def __getitem__(self, filename):
+ try:
+ gs = self.git.status_file(filename)
+ except KeyError:
+ return '?'
+ return _STATUS_MAP[gs]
+
+ def __contains__(self, filename):
+ try:
+ gs = self.git.status_file(filename)
+ return _STATUS_MAP[gs] != '?'
+ except KeyError:
+ return False
+
+ def status(self, match, subrepos, ignored, clean, unknown):
+ # TODO handling of clean files - can we get that from git.status()?
+ modified, added, removed, deleted, unknown, ignored, clean = (
+ [], [], [], [], [], [], [])
+ gstatus = self.git.status()
+ for path, status in gstatus.items():
+ if status == pygit2.GIT_STATUS_IGNORED:
+ if path.endswith('/'):
+ continue
+ ignored.append(path)
+ elif status in (pygit2.GIT_STATUS_WT_MODIFIED,
+ pygit2.GIT_STATUS_INDEX_MODIFIED,
+ pygit2.GIT_STATUS_WT_MODIFIED|pygit2.GIT_STATUS_INDEX_MODIFIED):
+ modified.append(path)
+ elif status == pygit2.GIT_STATUS_INDEX_NEW:
+ added.append(path)
+ elif status == pygit2.GIT_STATUS_WT_NEW:
+ unknown.append(path)
+ elif status == pygit2.GIT_STATUS_WT_DELETED:
+ deleted.append(path)
+ elif status == pygit2.GIT_STATUS_INDEX_DELETED:
+ removed.append(path)
+ else:
+ raise error.Abort('unhandled case: status for %r is %r' % (
+ path, status))
+
+ # TODO are we really always sure of status here?
+ return False, scmutil.status(
+ modified, added, removed, deleted, unknown, ignored, clean)
+
+ def flagfunc(self, buildfallback):
+ # TODO we can do better
+ return buildfallback()
+
+ def getcwd(self):
+ # TODO is this a good way to do this?
+ return os.path.dirname(os.path.dirname(self.git.path))
+
+ def normalize(self, path):
+ assert util.normcase(path) == path, 'TODO handling of case folding'
+ return path
+
+ @property
+ def _checklink(self):
+ return util.checklink(os.path.dirname(self.git.path))
+
+ def copies(self):
+ # TODO support copies?
+ return {}
+
+ # # TODO what the heck is this
+ _filecache = set()
+
+ def pendingparentchange(self):
+ # TODO: we need to implement the context manager bits and
+ # correctly stage/revert index edits.
+ return False
+
+ def write(self, tr):
+
+ if tr:
+
+ def writeinner(category):
+ self.git.index.write()
+
+ tr.addpending('gitdirstate', writeinner)
+ else:
+ self.git.index.write()
+
+ def pathto(self, f, cwd=None):
+ if cwd == None:
+ cwd = self.getcwd()
+ # TODO core dirstate does something about slashes here
+ r = util.pathto(self._root, cwd, f)
+ return r
+
+ def matches(self, match):
+ return [x.path for x in self.git.index if match(x.path)]
+
+ def normal(self, f, parentfiledata=None):
+ """Mark a file normal and clean."""
+ # TODO: for now we just let libgit2 re-stat the file. We can
+ # clearly do better.
+
+ def normallookup(self, f):
+ """Mark a file normal, but possibly dirty."""
+ # TODO: for now we just let libgit2 re-stat the file. We can
+ # clearly do better.
+
+ def walk(self, match, subrepos, unknown, ignored, full=True):
+ # TODO: we need to use .status() and not iterate the index,
+ # because the index doesn't force a re-walk and so `hg add` of
+ # a new file without an intervening call to status will
+ # silently do nothing.
+ r = {}
+ cwd = self.getcwd()
+ for path, status in self.git.status().items():
+ if path.startswith('.hg/'):
+ continue
+ if not match(path):
+ continue
+ # TODO construct the stat info from the status object?
+ try:
+ s = os.stat(os.path.join(cwd, path))
+ except OSError as e:
+ if e.errno != errno.ENOENT:
+ raise
+ continue
+ r[path] = s
+ return r
+
+ def savebackup(self, tr, backupname):
+ # TODO: figure out a strategy for saving index backups.
+ pass
+
+ def restorebackup(self, tr, backupname):
+ # TODO: figure out a strategy for saving index backups.
+ pass
+
+ def add(self, f):
+ self.git.index.add(f)
+
+ def drop(self, f):
+ self.git.index.remove(f)
+
+ def copied(self, path):
+ # TODO: track copies?
+ return None
+
+ @contextlib.contextmanager
+ def parentchange(self):
+ # TODO: track this maybe?
+ yield
diff --git a/hgext/git/__init__.py b/hgext/git/__init__.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/__init__.py
@@ -0,0 +1,218 @@
+"""Grant Mercurial the ability to operate on Git repositories. (EXPERIMENTAL)
+
+This is currently super experimental. It probably will consume your
+firstborn a la Rumpelstiltskin, etc.
+"""
+
+from __future__ import absolute_import
+
+import os
+
+from mercurial import (
+ commands,
+ debugcommands,
+ extensions,
+ hg,
+ localrepo,
+ node as nodemod,
+ store,
+)
+from mercurial.interfaces import (
+ repository,
+ util as interfaceutil,
+)
+
+from . import (
+ dirstate,
+ gitlog,
+ index,
+)
+
+import pygit2
+
+# TODO: extract an interface for this in core
+class gitstore(object): # store.basicstore):
+ def __init__(self, path, vfstype):
+ self.vfs = vfstype(path)
+ self.path = self.vfs.base
+ self.createmode = store._calcmode(self.vfs)
+ # above lines should go away in favor of:
+ # super(gitstore, self).__init__(path, vfstype)
+
+ self.git = pygit2.Repository(os.path.normpath(
+ os.path.join(path, '..', '.git')))
+ self._db = index.get_index(self.git)
+
+ def join(self, f):
+ """Fake store.join method for git repositories.
+
+ For the most part, store.join is used for @storecache
+ decorators to invalidate caches when various files
+ change. We'll map the ones we care about, and ignore the rest.
+ """
+ if f in ('00changelog.i', '00manifest.i'):
+ # This is close enough: in order for the changelog cache
+ # to be invalidated, HEAD will have to change.
+ return os.path.join(self.path, 'HEAD')
+ elif f == 'lock':
+ # TODO: we probably want to map this to a git lock, I
+ # suspect index.lock. We should figure out what the
+ # most-alike file is in git-land. For now we're risking
+ # bad concurrency errors if another git client is used.
+ return os.path.join(self.path, 'hgit-bogus-lock')
+ elif f in ('obsstore', 'phaseroots', 'narrowspec', 'bookmarks'):
+ return os.path.join(self.path, '..', '.hg', f)
+ raise NotImplementedError('Need to pick file for %s.' % f)
+
+ def changelog(self, trypending):
+ # TODO we don't have a plan for trypending in hg's git support yet
+ return gitlog.changelog(self.git, self._db)
+
+ def manifestlog(self, repo, storenarrowmatch):
+ # TODO handle storenarrowmatch and figure out if we need the repo arg
+ return gitlog.manifestlog(self.git, self._db)
+
+ def invalidatecaches(self):
+ pass
+
+ def write(self, tr=None):
+ # normally this handles things like fncache writes, which we don't have
+ pass
+
+def _makestore(orig, requirements, storebasepath, vfstype):
+ if (os.path.exists(os.path.join(storebasepath, 'this-is-git'))
+ and os.path.exists(os.path.join(storebasepath, '..', '.git'))):
+ return gitstore(storebasepath, vfstype)
+ return orig(requirements, storebasepath, vfstype)
+
+class gitfilestorage(object):
+ def file(self, path):
+ if path[0:1] == b'/':
+ path = path[1:]
+ return gitlog.filelog(self.store.git, self.store._db, path)
+
+def _makefilestorage(orig, requirements, features, **kwargs):
+ store = kwargs['store']
+ if isinstance(store, gitstore):
+ return gitfilestorage
+ return orig(requirements, features, **kwargs)
+
+def _setupdothg(ui, path):
+ dothg = os.path.join(path, '.hg')
+ if os.path.exists(dothg):
+ ui.warn(_('git repo already initialized for hg\n'))
+ else:
+ os.mkdir(os.path.join(path, b'.hg'))
+ # TODO is it ok to extend .git/info/exclude like this?
+ with open(os.path.join(path, b'.git',
+ b'info', b'exclude'), 'ab') as exclude:
+ exclude.write(b'\n.hg\n')
+ with open(os.path.join(dothg, b'this-is-git'), 'w') as f:
+ pass
+ with open(os.path.join(dothg, b'requirements'), 'w') as f:
+ f.write(b'git\n')
+
+_BMS_PREFIX = 'refs/heads/'
+
+class gitbmstore(object):
+ def __init__(self, gitrepo):
+ self.gitrepo = gitrepo
+
+ def __contains__(self, name):
+ return (_BMS_PREFIX + name) in self.gitrepo.references
+
+ def __iter__(self):
+ for r in self.gitrepo.listall_references():
+ if r.startswith(_BMS_PREFIX):
+ yield r[len(_BMS_PREFIX):]
+
+ def __getitem__(self, k):
+ return self.gitrepo.references[_BMS_PREFIX + k].peel().id.raw
+
+ def get(self, k, default=None):
+ try:
+ if k in self:
+ return self[k]
+ return default
+ except pygit2.InvalidSpecError:
+ return default
+
+ @property
+ def active(self):
+ h = self.gitrepo.references['HEAD']
+ if not isinstance(h.target,
+ str) or not h.target.startswith(_BMS_PREFIX):
+ return None
+ return h.target[len(_BMS_PREFIX):]
+
+ @active.setter
+ def active(self, mark):
+ raise NotImplementedError
+
+ def names(self, node):
+ r = []
+ for ref in self.gitrepo.listall_references():
+ if not ref.startswith(_BMS_PREFIX):
+ continue
+ if self.gitrepo.references[ref].peel().id.raw != node:
+ continue
+ r.append(ref[len(_BMS_PREFIX):])
+ return r
+
+ # Cleanup opportunity: this is *identical* to core's bookmarks store.
+ def expandname(self, bname):
+ if bname == '.':
+ if self.active:
+ return self.active
+ raise error.RepoLookupError(_("no active bookmark"))
+ return bname
+
+ def applychanges(self, repo, tr, changes):
+ """Apply a list of changes to bookmarks
+ """
+ # TODO: this should respect transactions, but that's going to
+ # require enlarging the gitbmstore to know how to do in-memory
+ # temporary writes and read those back prior to transaction
+ # finalization.
+ for name, node in changes:
+ if node is None:
+ self.gitrepo.references.delete(_BMS_PREFIX+name)
+ else:
+ self.gitrepo.references.create(
+ _BMS_PREFIX+name, nodemod.hex(node), force=True)
+
+def init(orig, ui, dest='.', **opts):
+ if opts.get('git', False):
+ inited = False
+ path = os.path.abspath(dest)
+ # TODO: walk up looking for the git repo
+ gr = pygit2.Repository(os.path.join(path, '.git'))
+ _setupdothg(ui, path)
+ return 0 # debugcommands.debugrebuilddirstate(
+ # ui, hg.repository(ui, path), rev='.')
+ return orig(ui, dest=dest, **opts)
+
+def reposetup(ui, repo):
+ if isinstance(repo.store, gitstore):
+ orig = repo.__class__
+
+ class gitlocalrepo(orig):
+
+ def _makedirstate(self):
+ # TODO narrow support here
+ return dirstate.gitdirstate(
+ self.ui, self.vfs.base, self.store.git)
+
+ @property
+ def _bookmarks(self):
+ return gitbmstore(self.store.git)
+
+ repo.__class__ = gitlocalrepo
+ return repo
+
+def extsetup(ui):
+ extensions.wrapfunction(localrepo, 'makestore', _makestore)
+ extensions.wrapfunction(localrepo, 'makefilestorage', _makefilestorage)
+ # Inject --git flag for `hg init`
+ entry = extensions.wrapcommand(commands.table, 'init', init)
+ entry[1].extend([('', 'git', None, 'setup up a git repository instead of hg')])
To: durin42, #hg-reviewers
Cc: JordiGH, hollisb, mjpieters, mercurial-devel
More information about the Mercurial-devel
mailing list