[PATCH 2 of 2] revlog: switch to a C version of headrevs
Bryan O'Sullivan
bos at serpentine.com
Tue Apr 17 21:26:27 UTC 2012
# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1334697949 25200
# Node ID 496191a81119bb8ec8f46f6f1da485a10f3e8733
# Parent 083221aa0d705cafa8b2588af149b6dabf0fcf28
revlog: switch to a C version of headrevs
The C implementation is more than 100 times faster than the Python
version (which is still available as a fallback). The result of the
headrevs calculation is cached, because it's still somewhat expensive,
and headrevs is called several times when the tag cache is stale
or missing.
In practice, in a repo with 330,000 revs and a stale .hg/cache/tags
file, this patch improves the performance of "hg tip" from 2.2 to
1.6 seconds.
diff -r 083221aa0d70 -r 496191a81119 mercurial/parsers.c
--- a/mercurial/parsers.c Tue Apr 17 14:19:54 2012 -0700
+++ b/mercurial/parsers.c Tue Apr 17 14:25:49 2012 -0700
@@ -244,6 +244,7 @@
Py_ssize_t raw_length; /* original number of elements */
Py_ssize_t length; /* current number of elements */
PyObject *added; /* populated on demand */
+ PyObject *headrevs; /* cache, invalidated on changes */
nodetree *nt; /* base-16 trie */
int ntlength; /* # nodes in use */
int ntcapacity; /* # nodes allocated */
@@ -461,6 +462,11 @@
if (self->nt)
nt_insert(self, node, (int)offset);
+ if (self->headrevs) {
+ Py_DECREF(self->headrevs);
+ self->headrevs = NULL;
+ }
+
Py_RETURN_NONE;
}
@@ -484,6 +490,10 @@
free(self->nt);
self->nt = NULL;
}
+ if (self->headrevs) {
+ Py_DECREF(self->headrevs);
+ self->headrevs = NULL;
+ }
}
static PyObject *index_clearcaches(indexObject *self)
@@ -534,6 +544,100 @@
return NULL;
}
+/*
+ * When we cache a list, we want to be sure the caller can't mutate
+ * the cached copy.
+ */
+static PyObject *list_copy(PyObject *list)
+{
+ Py_ssize_t len = PyList_GET_SIZE(list);
+ PyObject *newlist = PyList_New(len);
+ Py_ssize_t i;
+
+ if (newlist == NULL)
+ return NULL;
+
+ for (i = 0; i < len; i++) {
+ PyObject *obj = PyList_GET_ITEM(list, i);
+ Py_INCREF(obj);
+ PyList_SET_ITEM(newlist, i, obj);
+ }
+
+ return newlist;
+}
+
+static PyObject *index_headrevs(indexObject *self)
+{
+ Py_ssize_t i, len, addlen;
+ PyObject *heads;
+ char *ishead;
+
+ if (self->headrevs)
+ return list_copy(self->headrevs);
+
+ len = index_length(self) - 1;
+ heads = PyList_New(0);
+ if (heads == NULL)
+ goto bail;
+ if (len == 0) {
+ PyObject *nullid = PyInt_FromLong(-1);
+ if (nullid == NULL || PyList_Append(heads, nullid) == -1)
+ goto bail;
+ goto done;
+ }
+
+ ishead = alloca(len);
+ memset(ishead, 1, len);
+
+ for (i = 0; i < self->raw_length; i++) {
+ const char *data = index_deref(self, i);
+ int parent_1 = getbe32(data + 24);
+ int parent_2 = getbe32(data + 28);
+ if (parent_1 >= 0)
+ ishead[parent_1] = 0;
+ if (parent_2 >= 0)
+ ishead[parent_2] = 0;
+ }
+
+ addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
+
+ for (i = 0; i < addlen; i++) {
+ PyObject *rev = PyList_GET_ITEM(self->added, i);
+ PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
+ PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
+ long parent_1, parent_2;
+
+ if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
+ PyErr_SetString(PyExc_TypeError,
+ "revlog parents are invalid");
+ goto bail;
+ }
+ parent_1 = PyInt_AS_LONG(p1);
+ parent_2 = PyInt_AS_LONG(p2);
+ if (parent_1 >= 0)
+ ishead[parent_1] = 0;
+ if (parent_2 >= 0)
+ ishead[parent_2] = 0;
+ }
+
+ for (i = 0; i < len; i++) {
+ PyObject *head;
+
+ if (!ishead[i])
+ continue;
+ head = PyInt_FromLong(i);
+ if (head == NULL || PyList_Append(heads, head) == -1)
+ goto bail;
+ }
+
+done:
+ self->headrevs = heads;
+ return list_copy(self->headrevs);
+bail:
+ Py_XDECREF(heads);
+ return NULL;
+}
+
static inline int nt_level(const char *node, int level)
{
int v = node[level>>1];
@@ -829,6 +933,7 @@
{
Py_ssize_t start, stop, step, slicelength;
Py_ssize_t length = index_length(self);
+ int ret = 0;
if (PySlice_GetIndicesEx((PySliceObject*)item, length,
&start, &stop, &step, &slicelength) < 0)
@@ -874,7 +979,9 @@
self->ntrev = (int)start;
}
self->length = start + 1;
- return 0;
+ if (start < self->raw_length)
+ self->raw_length = start;
+ goto done;
}
if (self->nt) {
@@ -882,10 +989,15 @@
if (self->ntrev > start)
self->ntrev = (int)start;
}
- return self->added
- ? PyList_SetSlice(self->added, start - self->length + 1,
- PyList_GET_SIZE(self->added), NULL)
- : 0;
+ if (self->added)
+ ret = PyList_SetSlice(self->added, start - self->length + 1,
+ PyList_GET_SIZE(self->added), NULL);
+done:
+ if (self->headrevs) {
+ Py_DECREF(self->headrevs);
+ self->headrevs = NULL;
+ }
+ return ret;
}
/*
@@ -965,6 +1077,7 @@
self->cache = NULL;
self->added = NULL;
+ self->headrevs = NULL;
self->offsets = NULL;
self->nt = NULL;
self->ntlength = self->ntcapacity = 0;
@@ -1041,6 +1154,8 @@
"clear the index caches"},
{"get", (PyCFunction)index_m_get, METH_VARARGS,
"get an index entry"},
+ {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
+ "get head revisions"},
{"insert", (PyCFunction)index_insert, METH_VARARGS,
"insert an index entry"},
{"stats", (PyCFunction)index_stats, METH_NOARGS,
diff -r 083221aa0d70 -r 496191a81119 mercurial/revlog.py
--- a/mercurial/revlog.py Tue Apr 17 14:19:54 2012 -0700
+++ b/mercurial/revlog.py Tue Apr 17 14:25:49 2012 -0700
@@ -635,15 +635,18 @@
return (orderedout, roots, heads)
def headrevs(self):
- count = len(self)
- if not count:
- return [nullrev]
- ishead = [1] * (count + 1)
- index = self.index
- for r in xrange(count):
- e = index[r]
- ishead[e[5]] = ishead[e[6]] = 0
- return [r for r in xrange(count) if ishead[r]]
+ try:
+ return self.index.headrevs()
+ except AttributeError:
+ count = len(self)
+ if not count:
+ return [nullrev]
+ ishead = [1] * (count + 1)
+ index = self.index
+ for r in xrange(count):
+ e = index[r]
+ ishead[e[5]] = ishead[e[6]] = 0
+ return [r for r in xrange(count) if ishead[r]]
def heads(self, start=None, stop=None):
"""return the list of all nodes that have no children
More information about the Mercurial-devel
mailing list