D6734: git: RFC of a new extension to _directly_ operate on git repositories
durin42 (Augie Fackler)
phabricator at mercurial-scm.org
Fri Feb 14 21:07:20 UTC 2020
durin42 edited the summary of this revision.
durin42 updated this revision to Diff 20224.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D6734?vs=20144&id=20224
BRANCH
default
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D6734/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D6734
AFFECTED FILES
hgext/git/__init__.py
hgext/git/dirstate.py
hgext/git/gitlog.py
hgext/git/gitutil.py
hgext/git/index.py
hgext/git/manifest.py
setup.py
tests/test-git-interop.t
CHANGE DETAILS
diff --git a/tests/test-git-interop.t b/tests/test-git-interop.t
new file mode 100644
--- /dev/null
+++ b/tests/test-git-interop.t
@@ -0,0 +1,248 @@
+This test requires pygit2:
+ > $PYTHON -c 'import pygit2' || exit 80
+
+Setup:
+ > GIT_AUTHOR_NAME='test'; export GIT_AUTHOR_NAME
+ > GIT_AUTHOR_EMAIL='test at example.org'; export GIT_AUTHOR_EMAIL
+ > GIT_AUTHOR_DATE="2007-01-01 00:00:00 +0000"; export GIT_AUTHOR_DATE
+ > GIT_COMMITTER_NAME="$GIT_AUTHOR_NAME"; export GIT_COMMITTER_NAME
+ > GIT_COMMITTER_EMAIL="$GIT_AUTHOR_EMAIL"; export GIT_COMMITTER_EMAIL
+ > GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"; export GIT_COMMITTER_DATE
+
+ > count=10
+ > gitcommit() {
+ > GIT_AUTHOR_DATE="2007-01-01 00:00:$count +0000";
+ > GIT_COMMITTER_DATE="$GIT_AUTHOR_DATE"
+ > git commit "$@" >/dev/null 2>/dev/null || echo "git commit error"
+ > count=`expr $count + 1`
+ > }
+
+ > echo "[extensions]" >> $HGRCPATH
+ > echo "git=" >> $HGRCPATH
+
+Make a new repo with git:
+ $ mkdir foo
+ $ cd foo
+ $ git init
+ Initialized empty Git repository in $TESTTMP/foo/.git/
+Ignore the .hg directory within git:
+ $ echo .hg >> .git/info/exclude
+ $ echo alpha > alpha
+ $ git add alpha
+ $ gitcommit -am 'Add alpha'
+ $ echo beta > beta
+ $ git add beta
+ $ gitcommit -am 'Add beta'
+ $ echo gamma > gamma
+ $ git status
+ On branch master
+ Untracked files:
+ (use "git add <file>..." to include in what will be committed)
+ gamma
+
+ nothing added to commit but untracked files present (use "git add" to track)
+
+Without creating the .hg, hg status fails:
+ $ hg status
+ abort: no repository found in '$TESTTMP/foo' (.hg not found)!
+ [255]
+But if you run hg init --git, it works:
+ $ hg init --git
+ $ hg id --traceback
+ 3d9be8deba43 tip master
+ $ hg status
+ ? gamma
+Log works too:
+ $ hg log
+ changeset: 1:3d9be8deba43
+ bookmark: master
+ tag: tip
+ user: test <test at example.org>
+ date: Mon Jan 01 00:00:11 2007 +0000
+ summary: Add beta
+
+ changeset: 0:c5864c9d16fb
+ user: test <test at example.org>
+ date: Mon Jan 01 00:00:10 2007 +0000
+ summary: Add alpha
+
+
+
+and bookmarks:
+ $ hg bookmarks
+ * master 1:3d9be8deba43
+
+diff even works transparently in both systems:
+ $ echo blah >> alpha
+ $ git diff
+ diff --git a/alpha b/alpha
+ index 4a58007..faed1b7 100644
+ --- a/alpha
+ +++ b/alpha
+ @@ -1* +1,2 @@ (glob)
+ alpha
+ +blah
+ $ hg diff --git
+ diff --git a/alpha b/alpha
+ --- a/alpha
+ +++ b/alpha
+ @@ -1,1 +1,2 @@
+ alpha
+ +blah
+
+Remove a file, it shows as such:
+ $ rm alpha
+ $ hg status
+ ! alpha
+ ? gamma
+
+Revert works:
+ $ hg revert alpha --traceback
+ $ hg status
+ ? gamma
+ $ git status
+ On branch master
+ Untracked files:
+ (use "git add <file>..." to include in what will be committed)
+ gamma
+
+ nothing added to commit but untracked files present (use "git add" to track)
+
+Add shows sanely in both:
+ $ hg add gamma
+ $ hg status
+ A gamma
+ $ hg files
+ alpha
+ beta
+ gamma
+ $ git ls-files
+ alpha
+ beta
+ gamma
+ $ git status
+ On branch master
+ Changes to be committed:
+ (use "git restore --staged <file>..." to unstage)
+ new file: gamma
+
+
+forget does what it should as well:
+ $ hg forget gamma
+ $ hg status
+ ? gamma
+ $ git status
+ On branch master
+ Untracked files:
+ (use "git add <file>..." to include in what will be committed)
+ gamma
+
+ nothing added to commit but untracked files present (use "git add" to track)
+
+clean up untracked file
+ $ rm gamma
+
+hg log FILE
+
+ $ echo a >> alpha
+ $ hg ci -m 'more alpha' --traceback
+ $ echo b >> beta
+ $ hg ci -m 'more beta'
+ $ echo a >> alpha
+ $ hg ci -m 'even more alpha'
+ $ hg log -G alpha
+ @ changeset: 4:bd975ddde71c
+ : bookmark: master
+ : tag: tip
+ : user: test <test>
+ : date: Thu Jan 01 00:00:00 1970 +0000
+ : summary: even more alpha
+ :
+ o changeset: 2:77f597222800
+ : user: test <test>
+ : date: Thu Jan 01 00:00:00 1970 +0000
+ : summary: more alpha
+ :
+ o changeset: 0:c5864c9d16fb
+ user: test <test at example.org>
+ date: Mon Jan 01 00:00:10 2007 +0000
+ summary: Add alpha
+
+ $ hg log -G beta
+ o changeset: 3:b40d4fed5e27
+ : user: test <test>
+ : date: Thu Jan 01 00:00:00 1970 +0000
+ : summary: more beta
+ :
+ o changeset: 1:3d9be8deba43
+ | user: test <test at example.org>
+ ~ date: Mon Jan 01 00:00:11 2007 +0000
+ summary: Add beta
+
+
+hg annotate
+
+ $ hg annotate alpha
+ 0: alpha
+ 2: a
+ 4: a
+ $ hg annotate beta
+ 1: beta
+ 3: b
+
+
+Files in subdirectories. TODO: case-folding support, make this `A`
+instead of `a`.
+
+ $ mkdir a
+ $ echo "This is file mu." > a/mu
+ $ hg ci -A -m 'Introduce file a/mu'
+ adding a/mu
+
+Both hg and git agree a/mu is part of the repo
+
+ $ git ls-files
+ a/mu
+ alpha
+ beta
+ $ hg files
+ a/mu
+ alpha
+ beta
+
+Bug! alpha and beta both show as modified in git and hg, but I don't
+think they should.
+
+ $ git status
+ On branch master
+ Changes to be committed:
+ (use "git restore --staged <file>..." to unstage)
+ modified: alpha
+ modified: beta
+
+ Changes not staged for commit:
+ (use "git add <file>..." to update what will be committed)
+ (use "git restore <file>..." to discard changes in working directory)
+ modified: alpha
+ modified: beta
+
+ $ hg status
+ M alpha
+ M beta
+ $ git diff
+ diff --git a/alpha b/alpha
+ index 4a58007..d112a75 100644
+ --- a/alpha
+ +++ b/alpha
+ @@ -1 +1,3 @@
+ alpha
+ +a
+ +a
+ diff --git a/beta b/beta
+ index 65b2df8..0d750bb 100644
+ --- a/beta
+ +++ b/beta
+ @@ -1 +1,2 @@
+ beta
+ +b
+ $ hg diff --git
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -1209,6 +1209,7 @@
'hgext.fsmonitor',
'hgext.fastannotate',
'hgext.fsmonitor.pywatchman',
+ 'hgext.git',
'hgext.highlight',
'hgext.infinitepush',
'hgext.largefiles',
diff --git a/hgext/git/manifest.py b/hgext/git/manifest.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/manifest.py
@@ -0,0 +1,296 @@
+from __future__ import absolute_import
+
+import pygit2
+
+from mercurial import (
+ pathutil,
+ pycompat,
+ match as matchmod,
+ util,
+)
+from mercurial.interfaces import (
+ repository,
+ util as interfaceutil,
+)
+from . import gitutil
+
+# layers: ctx and manifest
+# implementations: memory and stored.
+
+# TODO: can we have a gitmanifestdict and a memgitmanifestdict and
+# only implement writes on the second one, which would have a builder?
+# Is that worth doing?
+ at interfaceutil.implementer(repository.imanifestdict)
+class gitmanifestdict(object):
+ """Expose git trees (and optionally a builder's overlay) as a manifestdict.
+
+ Very similar to mercurial.manifest.treemanifest.
+ """
+
+ def __init__(self, git_repo, root_tree, pending_changes):
+ """Initializer.
+
+ Args:
+ git_repo: The git_repo we're walking (required to look up child
+ trees).
+ root_tree: The root Git tree object for this manifest.
+ pending_changes: A dict in which pending changes will be
+ tracked. The enclosing memgittreemanifestctx will use this to
+ construct any required Tree objects in Git during it's
+ `write()` method.
+ """
+ self._git_repo = git_repo
+ self._tree = root_tree
+ if pending_changes is None:
+ pending_changes = {}
+ # dict of path: Optional[Tuple(node, flags)]
+ self._pending_changes = pending_changes
+
+ def _resolve_entry(self, path):
+ """Given a path, load its node and flags, or raise KeyError if missing.
+
+ This takes into account any pending writes in the builder.
+ """
+ upath = pycompat.fsdecode(path)
+ ent = None
+ if path in self._pending_changes:
+ val = self._pending_changes[path]
+ if val is None:
+ raise KeyError
+ return val
+ t = self._tree
+ comps = upath.split('/')
+ for comp in comps[:-1]:
+ te = self._tree[comp]
+ t = self.gitrepo[te.id]
+ ent = t[comps[-1]]
+ if ent.filemode == pygit2.GIT_FILEMODE_BLOB:
+ flags = b''
+ elif ent.filemode == pygit2.GIT_FILEMODE_BLOB_EXECUTABLE:
+ flags = b'x'
+ elif ent.filemode == pygit2.GIT_FILEMODE_LINK:
+ flags = b'l'
+ else:
+ raise ValueError('unsupported mode %s' % oct(ent.filemode))
+ return ent.id.raw, flags
+
+ def __getitem__(self, path):
+ return self._resolve_entry(path)[0]
+
+ def find(self, path):
+ return self._resolve_entry(path)
+
+ def __len__(self):
+ return len(list(self.walk()))
+
+ def __nonzero__(self):
+ try:
+ next(iter(self))
+ return True
+ except StopIteration:
+ return False
+
+ def __bool__(self):
+ return self.__nonzero__(self)
+
+ def __contains__(self, path):
+ try:
+ self._resolve_entry(path)
+ return True
+ except KeyError:
+ return False
+
+ def iterkeys(self):
+ return self.walk(matchmod.always())
+
+ def keys(self):
+ return list(self.iterkeys())
+
+ def __iter__(self):
+ return self.iterkeys()
+
+ def __setitem__(self, path, node):
+ self._pending_changes[path] = node, self.flags(path)
+
+ def __delitem__(self, path):
+ # TODO: should probably KeyError for already-deleted files?
+ self._pending_changes[path] = None
+
+ def filesnotin(self, other, match=None):
+ if match is not None:
+ match = matchmod.badmatch(match, lambda path, msg: None)
+ sm2 = set(other.walk(match))
+ return {f for f in self.walk(match) if f not in sm2}
+ return {f for f in self if f not in other}
+
+ @util.propertycache
+ def _dirs(self):
+ return pathutil.dirs(self)
+
+ def hasdir(self, dir):
+ return dir in self._dirs
+
+ def diff(self, other, match=None, clean=False):
+ # TODO
+ assert False
+
+ def setflag(self, path, flag):
+ node, unused_flag = self._resolve_entry(path)
+ self._pending_changes[path] = node, flag
+
+ def get(self, path, default=None):
+ try:
+ return self._resolve_entry(path)[0]
+ except KeyError:
+ return default
+
+ def flags(self, path):
+ try:
+ return self._resolve_entry(path)[1]
+ except KeyError:
+ return b''
+
+ def copy(self):
+ pass
+
+ def items(self):
+ for f in self:
+ # TODO: build a proper iterator version of this
+ yield self[path]
+
+ def iteritems(self):
+ return self.items()
+
+ def iterentries(self):
+ for f in self:
+ # TODO: build a proper iterator version of this
+ yield self._resolve_entry(path)
+
+ def text(self):
+ assert False # TODO can this method move out of the manifest iface?
+
+ def _walkonetree(self, tree, match, subdir):
+ for te in tree:
+ # TODO: can we prune dir walks with the matcher?
+ realname = subdir + pycompat.fsencode(te.name)
+ if te.type == r'tree':
+ for inner in self._walkonetree(
+ self.gitrepo[te.id], match, realname + b'/'
+ ):
+ yield inner
+ if not match(realname):
+ continue
+ yield pycompat.fsencode(realname)
+
+ def walk(self, match):
+ # TODO: this is a very lazy way to merge in the pending
+ # changes. There is absolutely room for optimization here by
+ # being clever about walking over the sets...
+ baseline = set(self._walkonetree(self._tree, match, b''))
+ deleted = {p for p, v in self._pending_changes.items() if v is None}
+ pend = {p for p in self._pending_changes if match(p)}
+ return iter(sorted((baseline | pend) - deleted))
+
+ at interfaceutil.implementer(repository.imanifestrevisionstored)
+class gittreemanifestctx(object):
+ def __init__(self, repo, gittree):
+ self._repo = repo
+ self._tree = gittree
+
+ def read(self):
+ return gitmanifestdict(self._repo, self._tree, None)
+
+ def copy(self):
+ # NB: it's important that we return a memgittreemanifestctx
+ # because the caller expects a mutable manifest.
+ return memgittreemanifestctx(self._repo, self._tree)
+
+ def find(self, path):
+ self.read()[path]
+
+
+ at interfaceutil.implementer(repository.imanifestrevisionwritable)
+class memgittreemanifestctx(object):
+ def __init__(self, repo, tree):
+ self._repo = repo
+ self._tree = tree
+ # dict of path: Optional[Tuple(node, flags)]
+ self._pending_changes = {}
+
+ def read(self):
+ return gitmanifestdict(self._repo, self._tree, self._pending_changes)
+
+ def copy(self):
+ # TODO: if we have a builder in play, what should happen here?
+ # Maybe we can shuffle copy() into the immutable interface.
+ return memgittreemanifestctx(self._repo, self._tree)
+
+ def write(self, transaction, link, p1, p2, added, removed, match=None):
+ # We're not (for now, anyway) going to audit filenames, so we
+ # can ignore added and removed.
+
+ # TODO what does this match argument get used for? hopefully
+ # just narrow?
+ assert not match or isinstance(match, matchmod.alwaysmatcher)
+
+ deleted = {p for p, v in self._pending_changes.items() if v is None}
+ touched_dirs = pathutil.dirs(self._pending_changes)
+ trees = {
+ b'': self._tree,
+ }
+ # path: treebuilder
+ builders = {
+ b'': self._repo.TreeBuilder(self._tree),
+ }
+ # get a TreeBuilder for every tree in the touched_dirs set
+ for d in sorted(touched_dirs, key=lambda x: (len(x), x)):
+ if d == b'':
+ # loaded root tree above
+ continue
+ comps = d.split(b'/')
+ full = b''
+ for part in comps:
+ parent = trees[full]
+ try:
+ new = self._repo[parent[pycompat.fsdecode(part)]]
+ except KeyError:
+ # new directory
+ new = None
+ full += b'/' + part
+ if new is not None:
+ # existing directory
+ trees[full] = new
+ builders[full] = self._repo.TreeBuilder(new)
+ else:
+ # new directory, use an empty dict to easily
+ # generate KeyError as any nested new dirs get
+ # created.
+ trees[full] = {}
+ builders[full] = self._repo.TreeBuilder()
+ for f, info in self._pending_changes.items():
+ if b'/' not in f:
+ dirname = b''
+ basename = f
+ else:
+ dirname, basename = f.rsplit(b'/', 1)
+ dirname = b'/' + dirname
+ if info is None:
+ builders[dirname].remove(pycompat.fsdecode(basename))
+ else:
+ n, fl = info
+ mode = {
+ b'': pygit2.GIT_FILEMODE_BLOB,
+ b'x': pygit2.GIT_FILEMODE_BLOB_EXECUTABLE,
+ b'l': pygit2.GIT_FILEMODE_LINK,
+ }[fl]
+ builders[dirname].insert(pycompat.fsdecode(basename),
+ gitutil.togitnode(n), mode)
+ # This visits the buffered TreeBuilders in deepest-first
+ # order, bubbling up the edits.
+ for b in sorted(builders, key=len, reverse=True):
+ if b == b'':
+ break
+ cb = builders[b]
+ dn, bn = b.rsplit(b'/', 1)
+ builders[dn].insert(pycompat.fsdecode(bn), cb.write(), pygit2.GIT_FILEMODE_TREE)
+ return builders[b''].write().raw
diff --git a/hgext/git/index.py b/hgext/git/index.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/index.py
@@ -0,0 +1,346 @@
+from __future__ import absolute_import
+
+import collections
+import os
+import sqlite3
+
+import pygit2
+
+from mercurial.i18n import _
+
+from mercurial import (
+ encoding,
+ error,
+ node as nodemod,
+ pycompat,
+)
+
+from . import gitutil
+
+
+_CURRENT_SCHEMA_VERSION = 1
+_SCHEMA = (
+ """
+CREATE TABLE refs (
+ -- node and name are unique together. There may be more than one name for
+ -- a given node, and there may be no name at all for a given node (in the
+ -- case of an anonymous hg head).
+ node TEXT NOT NULL,
+ name TEXT
+);
+
+-- The "possible heads" of the repository, which we use to figure out
+-- if we need to re-walk the changelog.
+CREATE TABLE possible_heads (
+ node TEXT NOT NULL
+);
+
+-- The topological heads of the changelog, which hg depends on.
+CREATE TABLE heads (
+ node TEXT NOT NULL
+);
+
+-- A total ordering of the changelog
+CREATE TABLE changelog (
+ rev INTEGER NOT NULL PRIMARY KEY,
+ node TEXT NOT NULL,
+ p1 TEXT,
+ p2 TEXT
+);
+
+CREATE UNIQUE INDEX changelog_node_idx ON changelog(node);
+CREATE UNIQUE INDEX changelog_node_rev_idx ON changelog(rev, node);
+
+-- Changed files for each commit, which lets us dynamically build
+-- filelogs.
+CREATE TABLE changedfiles (
+ node TEXT NOT NULL,
+ filename TEXT NOT NULL,
+ -- 40 zeroes for deletions
+ filenode TEXT NOT NULL,
+-- to handle filelog parentage:
+ p1node TEXT,
+ p1filenode TEXT,
+ p2node TEXT,
+ p2filenode TEXT
+);
+
+CREATE INDEX changedfiles_nodes_idx
+ ON changedfiles(node);
+
+PRAGMA user_version=%d
+"""
+ % _CURRENT_SCHEMA_VERSION
+)
+
+
+def _createdb(path):
+ # print('open db', path)
+ # import traceback
+ # traceback.print_stack()
+ db = sqlite3.connect(encoding.strfromlocal(path))
+ db.text_factory = bytes
+
+ res = db.execute('PRAGMA user_version').fetchone()[0]
+
+ # New database.
+ if res == 0:
+ for statement in _SCHEMA.split(';'):
+ db.execute(statement.strip())
+
+ db.commit()
+
+ elif res == _CURRENT_SCHEMA_VERSION:
+ pass
+
+ else:
+ raise error.Abort(_(b'sqlite database has unrecognized version'))
+
+ db.execute('PRAGMA journal_mode=WAL')
+
+ return db
+
+
+_OUR_ORDER = (
+ pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_TIME | pygit2.GIT_SORT_REVERSE
+)
+
+_DIFF_FLAGS = 1 << 21 # GIT_DIFF_FORCE_BINARY, which isn't exposed by pygit2
+
+
+def _find_nearest_ancestor_introducing_node(
+ db, gitrepo, file_path, walk_start, filenode
+):
+ """Find the nearest ancestor that introduces a file node.
+
+ Args:
+ db: a handle to our sqlite database.
+ gitrepo: A pygit2.Repository instance.
+ file_path: the path of a file in the repo
+ walk_start: a pygit2.Oid that is a commit where we should start walking
+ for our nearest ancestor.
+
+ Returns:
+ A hexlified SHA that is the commit ID of the next-nearest parent.
+ """
+ assert isinstance(file_path, str), 'file_path must be str, got %r' % type(
+ file_path
+ )
+ assert isinstance(filenode, str), 'filenode must be str, got %r' % type(
+ filenode
+ )
+ parent_options = {
+ row[0].decode('ascii')
+ for row in db.execute(
+ 'SELECT node FROM changedfiles '
+ 'WHERE filename = ? AND filenode = ?',
+ (file_path, filenode),
+ )
+ }
+ inner_walker = gitrepo.walk(walk_start, _OUR_ORDER)
+ for w in inner_walker:
+ if w.id.hex in parent_options:
+ return w.id.hex
+ raise error.ProgrammingError(
+ 'Unable to find introducing commit for %s node %s from %s',
+ (file_path, filenode, walk_start),
+ )
+
+
+def fill_in_filelog(gitrepo, db, startcommit, path, startfilenode):
+ """Given a starting commit and path, fill in a filelog's parent pointers.
+
+ Args:
+ gitrepo: a pygit2.Repository
+ db: a handle to our sqlite database
+ startcommit: a hexlified node id for the commit to start at
+ path: the path of the file whose parent pointers we should fill in.
+ filenode: the hexlified node id of the file at startcommit
+
+ TODO: make filenode optional
+ """
+ assert isinstance(
+ startcommit, str
+ ), 'startcommit must be str, got %r' % type(startcommit)
+ assert isinstance(
+ startfilenode, str
+ ), 'startfilenode must be str, got %r' % type(startfilenode)
+ visit = collections.deque([(startcommit, startfilenode)])
+ while visit:
+ cnode, filenode = visit.popleft()
+ commit = gitrepo[cnode]
+ parents = []
+ for parent in commit.parents:
+ t = parent.tree
+ for comp in path.split('/'):
+ try:
+ t = gitrepo[t[comp].id]
+ except KeyError:
+ break
+ else:
+ introducer = _find_nearest_ancestor_introducing_node(
+ db, gitrepo, path, parent.id, t.id.hex
+ )
+ parents.append((introducer, t.id.hex))
+ p1node = p1fnode = p2node = p2fnode = gitutil.nullgit
+ for par, parfnode in parents:
+ found = int(
+ db.execute(
+ 'SELECT COUNT(*) FROM changedfiles WHERE '
+ 'node = ? AND filename = ? AND filenode = ? AND '
+ 'p1node NOT NULL',
+ (par, path, parfnode),
+ ).fetchone()[0]
+ )
+ if found == 0:
+ assert par is not None
+ visit.append((par, parfnode))
+ if parents:
+ p1node, p1fnode = parents[0]
+ if len(parents) == 2:
+ p2node, p2fnode = parents[1]
+ if len(parents) > 2:
+ raise error.ProgrammingError(
+ b"git support can't handle octopus merges"
+ )
+ db.execute(
+ 'UPDATE changedfiles SET '
+ 'p1node = ?, p1filenode = ?, p2node = ?, p2filenode = ? '
+ 'WHERE node = ? AND filename = ? AND filenode = ?',
+ (p1node, p1fnode, p2node, p2fnode, commit.id.hex, path, filenode),
+ )
+ db.commit()
+
+
+def _index_repo(gitrepo, db, progress_factory=lambda *args, **kwargs: None):
+ # Identify all references so we can tell the walker to visit all of them.
+ all_refs = gitrepo.listall_references()
+ possible_heads = set()
+ prog = progress_factory(b'refs')
+ for pos, ref in enumerate(all_refs):
+ if prog is not None:
+ prog.update(pos)
+ if not (
+ ref.startswith('refs/heads/') # local branch
+ or ref.startswith('refs/tags/') # tag
+ or ref.startswith('refs/remotes/') # remote branch
+ or ref.startswith('refs/hg/') # from this extension
+ ):
+ continue
+ try:
+ start = gitrepo.lookup_reference(ref).peel(pygit2.GIT_OBJ_COMMIT)
+ except ValueError:
+ # No commit to be found, so we don't care for hg's purposes.
+ continue
+ possible_heads.add(start.id)
+ # Optimization: if the list of heads hasn't changed, don't
+ # reindex, the changelog. This doesn't matter on small
+ # repositories, but on even moderately deep histories (eg cpython)
+ # this is a very important performance win.
+ #
+ # TODO: we should figure out how to incrementally index history
+ # (preferably by detecting rewinds!) so that we don't have to do a
+ # full changelog walk every time a new commit is created.
+ cache_heads = {x[0] for x in db.execute('SELECT node FROM possible_heads')}
+ walker = None
+ cur_cache_heads = {h.hex for h in possible_heads}
+ if cur_cache_heads == cache_heads:
+ return
+ for start in possible_heads:
+ if walker is None:
+ walker = gitrepo.walk(start, _OUR_ORDER)
+ else:
+ walker.push(start)
+
+ # Empty out the existing changelog. Even for large-ish histories
+ # we can do the top-level "walk all the commits" dance very
+ # quickly as long as we don't need to figure out the changed files
+ # list.
+ db.execute('DELETE FROM changelog')
+ if prog is not None:
+ prog.complete()
+ prog = progress_factory(b'commits')
+ # This walker is sure to visit all the revisions in history, but
+ # only once.
+ for pos, commit in enumerate(walker):
+ if prog is not None:
+ prog.update(pos)
+ p1 = p2 = nodemod.nullhex
+ if len(commit.parents) > 2:
+ raise error.ProgrammingError(
+ (
+ b"git support can't handle octopus merges, "
+ b"found a commit with %d parents :("
+ )
+ % len(commit.parents)
+ )
+ if commit.parents:
+ p1 = commit.parents[0].id.hex
+ if len(commit.parents) == 2:
+ p2 = commit.parents[1].id.hex
+ db.execute(
+ 'INSERT INTO changelog (rev, node, p1, p2) VALUES(?, ?, ?, ?)',
+ (pos, commit.id.hex, p1, p2),
+ )
+
+ num_changedfiles = db.execute(
+ "SELECT COUNT(*) from changedfiles WHERE node = ?",
+ (commit.id.hex,),
+ ).fetchone()[0]
+ if not num_changedfiles:
+ files = {}
+ # I *think* we only need to check p1 for changed files
+ # (and therefore linkrevs), because any node that would
+ # actually have this commit as a linkrev would be
+ # completely new in this rev.
+ p1 = commit.parents[0].id.hex if commit.parents else None
+ if p1 is not None:
+ patchgen = gitrepo.diff(p1, commit.id.hex, flags=_DIFF_FLAGS)
+ else:
+ patchgen = commit.tree.diff_to_tree(
+ swap=True, flags=_DIFF_FLAGS
+ )
+ new_files = (p.delta.new_file for p in patchgen)
+ files = {
+ nf.path: nf.id.hex
+ for nf in new_files
+ if nf.id.raw != nodemod.nullid
+ }
+ for p, n in files.items():
+ # We intentionally set NULLs for any file parentage
+ # information so it'll get demand-computed later. We
+ # used to do it right here, and it was _very_ slow.
+ db.execute(
+ 'INSERT INTO changedfiles ('
+ 'node, filename, filenode, p1node, p1filenode, p2node, '
+ 'p2filenode) VALUES(?, ?, ?, ?, ?, ?, ?)',
+ (commit.id.hex, p, n, None, None, None, None),
+ )
+ db.execute('DELETE FROM heads')
+ db.execute('DELETE FROM possible_heads')
+ for hid in possible_heads:
+ h = hid.hex
+ db.execute('INSERT INTO possible_heads (node) VALUES(?)', (h,))
+ haschild = db.execute(
+ 'SELECT COUNT(*) FROM changelog WHERE p1 = ? OR p2 = ?', (h, h)
+ ).fetchone()[0]
+ if not haschild:
+ db.execute('INSERT INTO heads (node) VALUES(?)', (h,))
+
+ db.commit()
+ if prog is not None:
+ prog.complete()
+
+
+def get_index(gitrepo, progress_factory=lambda *args, **kwargs: None):
+ cachepath = os.path.join(
+ pycompat.fsencode(gitrepo.path), b'..', b'.hg', b'cache'
+ )
+ if not os.path.exists(cachepath):
+ os.makedirs(cachepath)
+ dbpath = os.path.join(cachepath, b'git-commits.sqlite')
+ db = _createdb(dbpath)
+ # TODO check against gitrepo heads before doing a full index
+ # TODO thread a ui.progress call into this layer
+ _index_repo(gitrepo, db, progress_factory)
+ return db
diff --git a/hgext/git/gitutil.py b/hgext/git/gitutil.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/gitutil.py
@@ -0,0 +1,28 @@
+"""utilities to assist in working with pygit2"""
+from __future__ import absolute_import
+
+from mercurial.node import bin, hex, nullid
+
+from mercurial import pycompat
+
+
+def togitnode(n):
+ """Wrapper to convert a Mercurial binary node to a unicode hexlified node.
+
+ pygit2 and sqlite both need nodes as strings, not bytes.
+ """
+ assert len(n) == 20
+ if pycompat.ispy3:
+ return hex(n).decode('ascii')
+ return hex(n)
+
+
+def fromgitnode(n):
+ """Opposite of togitnode."""
+ assert len(n) == 40
+ if pycompat.ispy3:
+ return bin(n.encode('ascii'))
+ return bin(n)
+
+
+nullgit = togitnode(nullid)
diff --git a/hgext/git/gitlog.py b/hgext/git/gitlog.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/gitlog.py
@@ -0,0 +1,465 @@
+from __future__ import absolute_import
+
+import pygit2
+
+from mercurial.i18n import _
+
+from mercurial import (
+ ancestor,
+ changelog as hgchangelog,
+ dagop,
+ encoding,
+ error,
+ manifest,
+ match as matchmod,
+ node as nodemod,
+ pycompat,
+)
+from mercurial.interfaces import (
+ repository,
+ util as interfaceutil,
+)
+from mercurial.utils import stringutil
+from . import (
+ gitutil,
+ index,
+ manifest as gitmanifest,
+)
+
+
+class baselog(object): # revlog.revlog):
+ """Common implementations between changelog and manifestlog."""
+
+ def __init__(self, gr, db):
+ self.gitrepo = gr
+ self._db = db
+
+ def __len__(self):
+ return int(
+ self._db.execute('SELECT COUNT(*) FROM changelog').fetchone()[0]
+ )
+
+ def rev(self, n):
+ if n == nodemod.nullid:
+ return -1
+ t = self._db.execute(
+ 'SELECT rev FROM changelog WHERE node = ?', (gitutil.togitnode(n),)
+ ).fetchone()
+ if t is None:
+ raise error.LookupError(n, b'00changelog.i', _(b'no node %d'))
+ return t[0]
+
+ def node(self, r):
+ if r == nodemod.nullrev:
+ return nodemod.nullid
+ t = self._db.execute(
+ 'SELECT node FROM changelog WHERE rev = ?', (r,)
+ ).fetchone()
+ if t is None:
+ raise error.LookupError(r, b'00changelog.i', _(b'no node'))
+ return nodemod.bin(t[0])
+
+ def hasnode(self, n):
+ t = self._db.execute(
+ 'SELECT node FROM changelog WHERE node = ?', (n,)
+ ).fetchone()
+ return t is not None
+
+
+class baselogindex(object):
+ def __init__(self, log):
+ self._log = log
+
+ def has_node(self, n):
+ return self._log.rev(n) != -1
+
+ def __len__(self):
+ return len(self._log)
+
+ def __getitem__(self, idx):
+ p1rev, p2rev = self._log.parentrevs(idx)
+ # TODO: it's messy that the index leaks so far out of the
+ # storage layer that we have to implement things like reading
+ # this raw tuple, which exposes revlog internals.
+ return (
+ # Pretend offset is just the index, since we don't really care.
+ idx,
+ # Same with lengths
+ idx, # length
+ idx, # rawsize
+ -1, # delta base
+ idx, # linkrev TODO is this right?
+ p1rev,
+ p2rev,
+ self._log.node(idx),
+ )
+
+
+# TODO: an interface for the changelog type?
+class changelog(baselog):
+ def __contains__(self, rev):
+ try:
+ self.node(rev)
+ return True
+ except error.LookupError:
+ return False
+
+ @property
+ def filteredrevs(self):
+ # TODO: we should probably add a refs/hg/ namespace for hidden
+ # heads etc, but that's an idea for later.
+ return set()
+
+ @property
+ def index(self):
+ return baselogindex(self)
+
+ @property
+ def nodemap(self):
+ r = {
+ nodemod.bin(v[0]): v[1]
+ for v in self._db.execute('SELECT node, rev FROM changelog')
+ }
+ r[nodemod.nullid] = nodemod.nullrev
+ return r
+
+ def tip(self):
+ t = self._db.execute(
+ 'SELECT node FROM changelog ORDER BY rev DESC LIMIT 1'
+ ).fetchone()
+ if t:
+ return nodemod.bin(t[0])
+ return nodemod.nullid
+
+ def revs(self, start=0, stop=None):
+ if stop is None:
+ stop = self.tip()
+ t = self._db.execute(
+ 'SELECT rev FROM changelog '
+ 'WHERE rev >= ? AND rev <= ? '
+ 'ORDER BY REV ASC',
+ (start, stop),
+ )
+ return (int(r[0]) for r in t)
+
+ def _partialmatch(self, id):
+ if nodemod.wdirhex.startswith(id):
+ raise error.WdirUnsupported
+ candidates = [
+ nodemod.bin(x[0])
+ for x in self._db.execute(
+ 'SELECT node FROM changelog WHERE node LIKE ?', (id + b'%',)
+ )
+ ]
+ if nodemod.nullhex.startswith(id):
+ candidates.append(nodemod.nullid)
+ if len(candidates) > 1:
+ raise error.AmbiguousPrefixLookupError(
+ id, b'00changelog.i', _(b'ambiguous identifier')
+ )
+ if candidates:
+ return candidates[0]
+ return None
+
+ def flags(self, rev):
+ return 0
+
+ def shortest(self, node, minlength=1):
+ nodehex = nodemod.hex(node)
+ for attempt in pycompat.xrange(minlength, len(nodehex) + 1):
+ candidate = nodehex[:attempt]
+ matches = int(
+ self._db.execute(
+ 'SELECT COUNT(*) FROM changelog WHERE node LIKE ?',
+ (nodehex + b'%',),
+ ).fetchone()[0]
+ )
+ if matches == 1:
+ return candidate
+ return nodehex
+
+ def headrevs(self, revs=None):
+ realheads = [
+ int(x[0])
+ for x in self._db.execute(
+ 'SELECT rev FROM changelog '
+ 'INNER JOIN heads ON changelog.node = heads.node'
+ )
+ ]
+ if revs:
+ return sorted([r for r in revs if r in realheads])
+ return sorted(realheads)
+
+ def changelogrevision(self, nodeorrev):
+ # Ensure we have a node id
+ if isinstance(nodeorrev, int):
+ n = self.node(nodeorrev)
+ else:
+ n = nodeorrev
+ # handle looking up nullid
+ if n == nodemod.nullid:
+ return hgchangelog._changelogrevision(extra={})
+ hn = gitutil.togitnode(n)
+ # We've got a real commit!
+ files = [
+ r[0]
+ for r in self._db.execute(
+ 'SELECT filename FROM changedfiles '
+ 'WHERE node = ? and filenode != ?',
+ (hn, gitutil.nullgit),
+ )
+ ]
+ filesremoved = [
+ r[0]
+ for r in self._db.execute(
+ 'SELECT filename FROM changedfiles '
+ 'WHERE node = ? and filenode = ?',
+ (hn, nodemod.nullhex),
+ )
+ ]
+ c = self.gitrepo[hn]
+ return hgchangelog._changelogrevision(
+ manifest=n, # pretend manifest the same as the commit node
+ user=b'%s <%s>'
+ % (c.author.name.encode('utf8'), c.author.email.encode('utf8')),
+ # TODO: a fuzzy memory from hg-git hacking says this should be -offset
+ date=(c.author.time, c.author.offset),
+ files=files,
+ # TODO filesadded in the index
+ filesremoved=filesremoved,
+ description=c.message.encode('utf8'),
+ # TODO do we want to handle extra? how?
+ extra={b'branch': b'default'},
+ )
+
+ def ancestors(self, revs, stoprev=0, inclusive=False):
+ revs = list(revs)
+ tip = self.rev(self.tip())
+ for r in revs:
+ if r > tip:
+ raise IndexError(b'Invalid rev %r' % r)
+ return ancestor.lazyancestors(
+ self.parentrevs, revs, stoprev=stoprev, inclusive=inclusive
+ )
+
+ # Cleanup opportunity: this is *identical* to the revlog.py version
+ def descendants(self, revs):
+ return dagop.descendantrevs(revs, self.revs, self.parentrevs)
+
+ def reachableroots(self, minroot, heads, roots, includepath=False):
+ return dagop._reachablerootspure(
+ self.parentrevs, minroot, roots, heads, includepath
+ )
+
+ # Cleanup opportunity: this is *identical* to the revlog.py version
+ def isancestor(self, a, b):
+ a, b = self.rev(a), self.rev(b)
+ return self.isancestorrev(a, b)
+
+ # Cleanup opportunity: this is *identical* to the revlog.py version
+ def isancestorrev(self, a, b):
+ if a == nodemod.nullrev:
+ return True
+ elif a == b:
+ return True
+ elif a > b:
+ return False
+ return bool(self.reachableroots(a, [b], [a], includepath=False))
+
+ def parentrevs(self, rev):
+ n = self.node(rev)
+ hn = gitutil.togitnode(n)
+ c = self.gitrepo[hn]
+ p1 = p2 = nodemod.nullrev
+ if c.parents:
+ p1 = self.rev(c.parents[0].id.raw)
+ if len(c.parents) > 2:
+ raise error.Abort(b'TODO octopus merge handling')
+ if len(c.parents) == 2:
+ p2 = self.rev(c.parents[0].id.raw)
+ return p1, p2
+
+ # Private method is used at least by the tags code.
+ _uncheckedparentrevs = parentrevs
+
+ def commonancestorsheads(self, a, b):
+ # TODO the revlog verson of this has a C path, so we probably
+ # need to optimize this...
+ a, b = self.rev(a), self.rev(b)
+ return [
+ self.node(n)
+ for n in ancestor.commonancestorsheads(self.parentrevs, a, b)
+ ]
+
+ def branchinfo(self, rev):
+ """Git doesn't do named branches, so just put everything on default."""
+ return b'default', False
+
+ def delayupdate(self, tr):
+ # TODO: I think we can elide this because we're just dropping
+ # an object in the git repo?
+ pass
+
+ def add(
+ self,
+ manifest,
+ files,
+ desc,
+ transaction,
+ p1,
+ p2,
+ user,
+ date=None,
+ extra=None,
+ p1copies=None,
+ p2copies=None,
+ filesadded=None,
+ filesremoved=None,
+ ):
+ parents = []
+ hp1, hp2 = gitutil.togitnode(p1), gitutil.togitnode(p2)
+ if p1 != nodemod.nullid:
+ parents.append(hp1)
+ if p2 and p2 != nodemod.nullid:
+ parents.append(hp2)
+ assert date is not None
+ timestamp, tz = date
+ sig = pygit2.Signature(
+ encoding.unifromlocal(stringutil.person(user)),
+ encoding.unifromlocal(stringutil.email(user)),
+ timestamp,
+ tz,
+ )
+ oid = self.gitrepo.create_commit(
+ None, sig, sig, desc, gitutil.togitnode(manifest), parents
+ )
+ # Set up an internal reference to force the commit into the
+ # changelog. Hypothetically, we could even use this refs/hg/
+ # namespace to allow for anonymous heads on git repos, which
+ # would be neat.
+ self.gitrepo.references.create(
+ 'refs/hg/internal/latest-commit', oid, force=True
+ )
+ # Reindex now to pick up changes. We omit the progress
+ # callback because this will be very quick.
+ index._index_repo(self.gitrepo, self._db)
+ return oid.raw
+
+
+class manifestlog(baselog):
+ def __getitem__(self, node):
+ return self.get(b'', node)
+
+ def get(self, relpath, node):
+ if node == nodemod.nullid:
+ # TODO: this should almost certainly be a memgittreemanifestctx
+ return manifest.memtreemanifestctx(self, relpath)
+ commit = self.gitrepo[gitutil.togitnode(node)]
+ t = commit.tree
+ if relpath:
+ parts = relpath.split(b'/')
+ for p in parts:
+ te = t[p]
+ t = self.gitrepo[te.id]
+ return gitmanifest.gittreemanifestctx(self.gitrepo, t)
+
+
+ at interfaceutil.implementer(repository.ifilestorage)
+class filelog(baselog):
+ def __init__(self, gr, db, path):
+ super(filelog, self).__init__(gr, db)
+ assert isinstance(path, bytes)
+ self.path = path
+
+ def read(self, node):
+ if node == nodemod.nullid:
+ return b''
+ return self.gitrepo[gitutil.togitnode(node)].data
+
+ def lookup(self, node):
+ if len(node) not in (20, 40):
+ node = int(node)
+ if isinstance(node, int):
+ assert False, b'todo revnums for nodes'
+ if len(node) == 40:
+ node = nodemod.bin(node)
+ hnode = gitutil.togitnode(node)
+ if hnode in self.gitrepo:
+ return node
+ raise error.LookupError(self.path, node, _(b'no match found'))
+
+ def cmp(self, node, text):
+ """Returns True if text is different than content at `node`."""
+ return self.read(node) != text
+
+ def add(self, text, meta, transaction, link, p1=None, p2=None):
+ assert not meta # Should we even try to handle this?
+ return self.gitrepo.create_blob(text).raw
+
+ def __iter__(self):
+ for clrev in self._db.execute(
+ '''
+SELECT rev FROM changelog
+INNER JOIN changedfiles ON changelog.node = changedfiles.node
+WHERE changedfiles.filename = ? AND changedfiles.filenode != ?
+ ''',
+ (pycompat.fsdecode(self.path), gitutil.nullgit),
+ ):
+ yield clrev[0]
+
+ def linkrev(self, fr):
+ return fr
+
+ def rev(self, node):
+ row = self._db.execute(
+ '''
+SELECT rev FROM changelog
+INNER JOIN changedfiles ON changelog.node = changedfiles.node
+WHERE changedfiles.filename = ? AND changedfiles.filenode = ?''',
+ (pycompat.fsdecode(self.path), gitutil.togitnode(node)),
+ ).fetchone()
+ if row is None:
+ raise error.LookupError(self.path, node, _(b'no such node'))
+ return int(row[0])
+
+ def node(self, rev):
+ maybe = self._db.execute(
+ '''SELECT filenode FROM changedfiles
+INNER JOIN changelog ON changelog.node = changedfiles.node
+WHERE changelog.rev = ? AND filename = ?
+''',
+ (rev, pycompat.fsdecode(self.path)),
+ ).fetchone()
+ if maybe is None:
+ raise IndexError('gitlog %r out of range %d' % (self.path, rev))
+ return nodemod.bin(maybe[0])
+
+ def parents(self, node):
+ gn = gitutil.togitnode(node)
+ gp = pycompat.fsdecode(self.path)
+ ps = []
+ for p in self._db.execute(
+ '''SELECT p1filenode, p2filenode FROM changedfiles
+WHERE filenode = ? AND filename = ?
+''',
+ (gn, gp),
+ ).fetchone():
+ if p is None:
+ commit = self._db.execute(
+ "SELECT node FROM changedfiles "
+ "WHERE filenode = ? AND filename = ?",
+ (gn, gp),
+ ).fetchone()[0]
+ # This filelog is missing some data. Build the
+ # filelog, then recurse (which will always find data).
+ if pycompat.ispy3:
+ commit = commit.decode('ascii')
+ index.fill_in_filelog(self.gitrepo, self._db, commit, gp, gn)
+ return self.parents(node)
+ else:
+ ps.append(nodemod.bin(p))
+ return ps
+
+ def renamed(self, node):
+ # TODO: renames/copies
+ return False
diff --git a/hgext/git/dirstate.py b/hgext/git/dirstate.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/dirstate.py
@@ -0,0 +1,290 @@
+from __future__ import absolute_import
+
+import contextlib
+import errno
+import os
+
+import pygit2
+
+from mercurial import (
+ error,
+ extensions,
+ match as matchmod,
+ node as nodemod,
+ pycompat,
+ scmutil,
+ util,
+)
+from mercurial.interfaces import (
+ dirstate as intdirstate,
+ util as interfaceutil,
+)
+
+from . import gitutil
+
+
+def readpatternfile(orig, filepath, warn, sourceinfo=False):
+ if not (b'info/exclude' in filepath or filepath.endswith(b'.gitignore')):
+ return orig(filepath, warn, sourceinfo=False)
+ result = []
+ warnings = []
+ with open(filepath, b'rb') as fp:
+ for l in fp:
+ l = l.strip()
+ if not l or l.startswith(b'#'):
+ continue
+ if l.startswith(b'!'):
+ # on reflection, I think /foo is just glob:
+ warnings.append(b'unsupported ignore pattern %s' % l)
+ continue
+ if l.startswith(b'/'):
+ result.append(b'glob:' + l[1:])
+ else:
+ result.append(b'relglob:' + l)
+ return result, warnings
+
+
+extensions.wrapfunction(matchmod, b'readpatternfile', readpatternfile)
+
+
+_STATUS_MAP = {
+ pygit2.GIT_STATUS_CONFLICTED: b'm',
+ pygit2.GIT_STATUS_CURRENT: b'n',
+ pygit2.GIT_STATUS_IGNORED: b'?',
+ pygit2.GIT_STATUS_INDEX_DELETED: b'r',
+ pygit2.GIT_STATUS_INDEX_MODIFIED: b'n',
+ pygit2.GIT_STATUS_INDEX_NEW: b'a',
+ pygit2.GIT_STATUS_INDEX_RENAMED: b'a',
+ pygit2.GIT_STATUS_INDEX_TYPECHANGE: b'n',
+ pygit2.GIT_STATUS_WT_DELETED: b'r',
+ pygit2.GIT_STATUS_WT_MODIFIED: b'n',
+ pygit2.GIT_STATUS_WT_NEW: b'?',
+ pygit2.GIT_STATUS_WT_RENAMED: b'a',
+ pygit2.GIT_STATUS_WT_TYPECHANGE: b'n',
+ pygit2.GIT_STATUS_WT_UNREADABLE: b'?',
+ pygit2.GIT_STATUS_INDEX_MODIFIED | pygit2.GIT_STATUS_WT_MODIFIED: 'm',
+}
+
+
+ at interfaceutil.implementer(intdirstate.idirstate)
+class gitdirstate(object):
+ def __init__(self, ui, root, gitrepo):
+ self._ui = ui
+ self._root = os.path.dirname(root)
+ self.git = gitrepo
+
+ def p1(self):
+ return self.git.head.peel().id.raw
+
+ def p2(self):
+ # TODO: MERGE_HEAD? something like that, right?
+ return nodemod.nullid
+
+ def setparents(self, p1, p2=nodemod.nullid):
+ assert p2 == nodemod.nullid, b'TODO merging support'
+ self.git.head.set_target(gitutil.togitnode(p1))
+
+ @util.propertycache
+ def identity(self):
+ self.identity = util.filestat.frompath(
+ os.path.join(self.root, b'.git', b'index')
+ )
+
+ def branch(self):
+ return b'default'
+
+ def parents(self):
+ # TODO how on earth do we find p2 if a merge is in flight?
+ return self.p1(), nodemod.nullid
+
+ def __iter__(self):
+ return (pycompat.fsencode(f.path) for f in self.git.index)
+
+ def items(self):
+ for ie in self.git.index:
+ yield ie.path, None # value should be a dirstatetuple
+
+ # py2,3 compat forward
+ iteritems = items
+
+ def __getitem__(self, filename):
+ try:
+ gs = self.git.status_file(filename)
+ except KeyError:
+ return b'?'
+ return _STATUS_MAP[gs]
+
+ def __contains__(self, filename):
+ try:
+ gs = self.git.status_file(filename)
+ return _STATUS_MAP[gs] != b'?'
+ except KeyError:
+ return False
+
+ def status(self, match, subrepos, ignored, clean, unknown):
+ # TODO handling of clean files - can we get that from git.status()?
+ modified, added, removed, deleted, unknown, ignored, clean = (
+ [],
+ [],
+ [],
+ [],
+ [],
+ [],
+ [],
+ )
+ gstatus = self.git.status()
+ for path, status in gstatus.items():
+ path = pycompat.fsencode(path)
+ if status == pygit2.GIT_STATUS_IGNORED:
+ if path.endswith(b'/'):
+ continue
+ ignored.append(path)
+ elif status in (
+ pygit2.GIT_STATUS_WT_MODIFIED,
+ pygit2.GIT_STATUS_INDEX_MODIFIED,
+ pygit2.GIT_STATUS_WT_MODIFIED
+ | pygit2.GIT_STATUS_INDEX_MODIFIED,
+ ):
+ modified.append(path)
+ elif status == pygit2.GIT_STATUS_INDEX_NEW:
+ added.append(path)
+ elif status == pygit2.GIT_STATUS_WT_NEW:
+ unknown.append(path)
+ elif status == pygit2.GIT_STATUS_WT_DELETED:
+ deleted.append(path)
+ elif status == pygit2.GIT_STATUS_INDEX_DELETED:
+ removed.append(path)
+ else:
+ raise error.Abort(
+ b'unhandled case: status for %r is %r' % (path, status)
+ )
+
+ # TODO are we really always sure of status here?
+ return (
+ False,
+ scmutil.status(
+ modified, added, removed, deleted, unknown, ignored, clean
+ ),
+ )
+
+ def flagfunc(self, buildfallback):
+ # TODO we can do better
+ return buildfallback()
+
+ def getcwd(self):
+ # TODO is this a good way to do this?
+ return os.path.dirname(
+ os.path.dirname(pycompat.fsencode(self.git.path))
+ )
+
+ def normalize(self, path):
+ normed = util.normcase(path)
+ assert normed == path, b"TODO handling of case folding: %s != %s" % (
+ normed,
+ path,
+ )
+ return path
+
+ @property
+ def _checklink(self):
+ return util.checklink(os.path.dirname(pycompat.fsencode(self.git.path)))
+
+ def copies(self):
+ # TODO support copies?
+ return {}
+
+ # # TODO what the heck is this
+ _filecache = set()
+
+ def pendingparentchange(self):
+ # TODO: we need to implement the context manager bits and
+ # correctly stage/revert index edits.
+ return False
+
+ def write(self, tr):
+
+ if tr:
+
+ def writeinner(category):
+ self.git.index.write()
+
+ tr.addpending(b'gitdirstate', writeinner)
+ else:
+ self.git.index.write()
+
+ def pathto(self, f, cwd=None):
+ if cwd is None:
+ cwd = self.getcwd()
+ # TODO core dirstate does something about slashes here
+ assert isinstance(f, bytes)
+ r = util.pathto(self._root, cwd, f)
+ return r
+
+ def matches(self, match):
+ for x in self.git.index:
+ p = pycompat.fsencode(x.path)
+ if match(p):
+ yield p
+
+ def normal(self, f, parentfiledata=None):
+ """Mark a file normal and clean."""
+ # TODO: for now we just let libgit2 re-stat the file. We can
+ # clearly do better.
+
+ def normallookup(self, f):
+ """Mark a file normal, but possibly dirty."""
+ # TODO: for now we just let libgit2 re-stat the file. We can
+ # clearly do better.
+
+ def walk(self, match, subrepos, unknown, ignored, full=True):
+ # TODO: we need to use .status() and not iterate the index,
+ # because the index doesn't force a re-walk and so `hg add` of
+ # a new file without an intervening call to status will
+ # silently do nothing.
+ r = {}
+ cwd = self.getcwd()
+ for path, status in self.git.status().items():
+ if path.startswith('.hg/'):
+ continue
+ path = pycompat.fsencode(path)
+ if not match(path):
+ continue
+ # TODO construct the stat info from the status object?
+ try:
+ s = os.stat(os.path.join(cwd, path))
+ except OSError as e:
+ if e.errno != errno.ENOENT:
+ raise
+ continue
+ r[path] = s
+ return r
+
+ def savebackup(self, tr, backupname):
+ # TODO: figure out a strategy for saving index backups.
+ pass
+
+ def restorebackup(self, tr, backupname):
+ # TODO: figure out a strategy for saving index backups.
+ pass
+
+ def add(self, f):
+ self.git.index.add(pycompat.fsdecode(f))
+
+ def drop(self, f):
+ self.git.index.remove(pycompat.fsdecode(f))
+
+ def remove(self, f):
+ self.git.index.remove(pycompat.fsdecode(f))
+
+ def copied(self, path):
+ # TODO: track copies?
+ return None
+
+ @contextlib.contextmanager
+ def parentchange(self):
+ # TODO: track this maybe?
+ yield
+
+ def clearbackup(self, tr, backupname):
+ # TODO
+ pass
diff --git a/hgext/git/__init__.py b/hgext/git/__init__.py
new file mode 100644
--- /dev/null
+++ b/hgext/git/__init__.py
@@ -0,0 +1,251 @@
+"""grant Mercurial the ability to operate on Git repositories. (EXPERIMENTAL)
+
+This is currently super experimental. It probably will consume your
+firstborn a la Rumpelstiltskin, etc.
+"""
+
+from __future__ import absolute_import
+
+import os
+
+import pygit2
+
+from mercurial.i18n import _
+
+from mercurial import (
+ commands,
+ error,
+ extensions,
+ localrepo,
+ pycompat,
+ store,
+)
+
+from . import (
+ dirstate,
+ gitlog,
+ gitutil,
+ index,
+)
+
+
+# TODO: extract an interface for this in core
+class gitstore(object): # store.basicstore):
+ def __init__(self, path, vfstype):
+ self.vfs = vfstype(path)
+ self.path = self.vfs.base
+ self.createmode = store._calcmode(self.vfs)
+ # above lines should go away in favor of:
+ # super(gitstore, self).__init__(path, vfstype)
+
+ self.git = pygit2.Repository(
+ os.path.normpath(os.path.join(path, b'..', b'.git'))
+ )
+ self._progress_factory = lambda *args, **kwargs: None
+ self._db_handle = None
+
+ @property
+ def _db(self):
+ # We lazy-create the database because we want to thread a
+ # progress callback down to the indexing process if it's
+ # required, and we don't have a ui handle in makestore().
+ if self._db_handle is None:
+ self._db_handle = index.get_index(self.git, self._progress_factory)
+ return self._db_handle
+
+ def join(self, f):
+ """Fake store.join method for git repositories.
+
+ For the most part, store.join is used for @storecache
+ decorators to invalidate caches when various files
+ change. We'll map the ones we care about, and ignore the rest.
+ """
+ if f in (b'00changelog.i', b'00manifest.i'):
+ # This is close enough: in order for the changelog cache
+ # to be invalidated, HEAD will have to change.
+ return os.path.join(self.path, b'HEAD')
+ elif f == b'lock':
+ # TODO: we probably want to map this to a git lock, I
+ # suspect index.lock. We should figure out what the
+ # most-alike file is in git-land. For now we're risking
+ # bad concurrency errors if another git client is used.
+ return os.path.join(self.path, b'hgit-bogus-lock')
+ elif f in (b'obsstore', b'phaseroots', b'narrowspec', b'bookmarks'):
+ return os.path.join(self.path, b'..', b'.hg', f)
+ raise NotImplementedError(b'Need to pick file for %s.' % f)
+
+ def changelog(self, trypending):
+ # TODO we don't have a plan for trypending in hg's git support yet
+ return gitlog.changelog(self.git, self._db)
+
+ def manifestlog(self, repo, storenarrowmatch):
+ # TODO handle storenarrowmatch and figure out if we need the repo arg
+ return gitlog.manifestlog(self.git, self._db)
+
+ def invalidatecaches(self):
+ pass
+
+ def write(self, tr=None):
+ # normally this handles things like fncache writes, which we don't have
+ pass
+
+
+def _makestore(orig, requirements, storebasepath, vfstype):
+ if os.path.exists(
+ os.path.join(storebasepath, b'this-is-git')
+ ) and os.path.exists(os.path.join(storebasepath, b'..', b'.git')):
+ return gitstore(storebasepath, vfstype)
+ return orig(requirements, storebasepath, vfstype)
+
+
+class gitfilestorage(object):
+ def file(self, path):
+ if path[0:1] == b'/':
+ path = path[1:]
+ return gitlog.filelog(self.store.git, self.store._db, path)
+
+
+def _makefilestorage(orig, requirements, features, **kwargs):
+ store = kwargs['store']
+ if isinstance(store, gitstore):
+ return gitfilestorage
+ return orig(requirements, features, **kwargs)
+
+
+def _setupdothg(ui, path):
+ dothg = os.path.join(path, b'.hg')
+ if os.path.exists(dothg):
+ ui.warn(_(b'git repo already initialized for hg\n'))
+ else:
+ os.mkdir(os.path.join(path, b'.hg'))
+ # TODO is it ok to extend .git/info/exclude like this?
+ with open(
+ os.path.join(path, b'.git', b'info', b'exclude'), 'ab'
+ ) as exclude:
+ exclude.write(b'\n.hg\n')
+ with open(os.path.join(dothg, b'this-is-git'), 'wb') as f:
+ pass
+ with open(os.path.join(dothg, b'requirements'), 'wb') as f:
+ f.write(b'git\n')
+
+
+_BMS_PREFIX = 'refs/heads/'
+
+
+class gitbmstore(object):
+ def __init__(self, gitrepo):
+ self.gitrepo = gitrepo
+
+ def __contains__(self, name):
+ return (
+ _BMS_PREFIX + pycompat.fsdecode(name)
+ ) in self.gitrepo.references
+
+ def __iter__(self):
+ for r in self.gitrepo.listall_references():
+ if r.startswith(_BMS_PREFIX):
+ yield pycompat.fsencode(r[len(_BMS_PREFIX) :])
+
+ def __getitem__(self, k):
+ return (
+ self.gitrepo.references[_BMS_PREFIX + pycompat.fsdecode(k)]
+ .peel()
+ .id.raw
+ )
+
+ def get(self, k, default=None):
+ try:
+ if k in self:
+ return self[k]
+ return default
+ except pygit2.InvalidSpecError:
+ return default
+
+ @property
+ def active(self):
+ h = self.gitrepo.references['HEAD']
+ if not isinstance(h.target, str) or not h.target.startswith(
+ _BMS_PREFIX
+ ):
+ return None
+ return pycompat.fsencode(h.target[len(_BMS_PREFIX) :])
+
+ @active.setter
+ def active(self, mark):
+ raise NotImplementedError
+
+ def names(self, node):
+ r = []
+ for ref in self.gitrepo.listall_references():
+ if not ref.startswith(_BMS_PREFIX):
+ continue
+ if self.gitrepo.references[ref].peel().id.raw != node:
+ continue
+ r.append(pycompat.fsencode(ref[len(_BMS_PREFIX) :]))
+ return r
+
+ # Cleanup opportunity: this is *identical* to core's bookmarks store.
+ def expandname(self, bname):
+ if bname == b'.':
+ if self.active:
+ return self.active
+ raise error.RepoLookupError(_(b"no active bookmark"))
+ return bname
+
+ def applychanges(self, repo, tr, changes):
+ """Apply a list of changes to bookmarks
+ """
+ # TODO: this should respect transactions, but that's going to
+ # require enlarging the gitbmstore to know how to do in-memory
+ # temporary writes and read those back prior to transaction
+ # finalization.
+ for name, node in changes:
+ if node is None:
+ self.gitrepo.references.delete(
+ _BMS_PREFIX + pycompat.fsdecode(name)
+ )
+ else:
+ self.gitrepo.references.create(
+ _BMS_PREFIX + pycompat.fsdecode(name),
+ gitutil.togitnode(node),
+ force=True,
+ )
+
+
+def init(orig, ui, dest=b'.', **opts):
+ if opts.get('git', False):
+ path = os.path.abspath(dest)
+ # TODO: walk up looking for the git repo
+ _setupdothg(ui, path)
+ return 0
+ return orig(ui, dest=dest, **opts)
+
+
+def reposetup(ui, repo):
+ if isinstance(repo.store, gitstore):
+ orig = repo.__class__
+ repo.store._progress_factory = repo.ui.makeprogress
+
+ class gitlocalrepo(orig):
+ def _makedirstate(self):
+ # TODO narrow support here
+ return dirstate.gitdirstate(
+ self.ui, self.vfs.base, self.store.git
+ )
+
+ @property
+ def _bookmarks(self):
+ return gitbmstore(self.store.git)
+
+ repo.__class__ = gitlocalrepo
+ return repo
+
+
+def extsetup(ui):
+ extensions.wrapfunction(localrepo, b'makestore', _makestore)
+ extensions.wrapfunction(localrepo, b'makefilestorage', _makefilestorage)
+ # Inject --git flag for `hg init`
+ entry = extensions.wrapcommand(commands.table, b'init', init)
+ entry[1].extend(
+ [(b'', b'git', None, b'setup up a git repository instead of hg')]
+ )
To: durin42, #hg-reviewers
Cc: sluongng, tom.prince, sheehan, rom1dep, JordiGH, hollisb, mjpieters, mercurial-devel
More information about the Mercurial-devel
mailing list