[PATCH 4 of 6 V3] revsbetween: add a C implementation
Siddharth Agarwal
sid at less-broken.com
Fri Aug 7 18:12:07 UTC 2015
On 8/7/15 10:38 AM, Laurent Charignon wrote:
> # HG changeset patch
> # User Laurent Charignon <lcharignon at fb.com>
> # Date 1438921725 25200
> # Thu Aug 06 21:28:45 2015 -0700
> # Branch stable
> # Node ID cf26c47bd9344439d5b97b42df7d64330468c5c6
> # Parent 4d1b9f4def5070ac3a09dbeb396b745237a49417
> revsbetween: add a C implementation
Looks good to me in general. A couple of nits below.
>
> This patch is part of a series of patches to speed up the computation of
> revset.revsbetween by introducing a C implementation. The main motivation is to
> speed up smartlog on big repositories. At the end of the series, on our big
> repositories the computation of revsbetween is 10-50x faster and smartlog on is
> 2x-5x faster.
>
> This patch introduces a C implementation for revsbetween following closely the
> Python implementation but optimized by using C data structures.
>
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -1105,6 +1105,141 @@
> phases[i] = phases[parent_2];
> }
>
> +static PyObject *revsbetween(indexObject *self, PyObject *args)
> +{
> +
> + /* Input */
> + long minroot;
> + PyObject *includepatharg = NULL;
> + int includepath = 0;
> + PyObject *heads = NULL;
> + Py_ssize_t numheads;
> + PyObject *roots = NULL;
> + PyObject *reachable = NULL;
> +
> + PyObject *val;
> + Py_ssize_t len = index_length(self) - 1;
> + long revnum;
> + Py_ssize_t k;
> + Py_ssize_t i;
> + int r;
> + int minidx;
> + int parents[2];
> +
> + /* Internal data structure:
> + * tovisit: array of length len+1 (all revs + nullrev), filled upto lentovisit
> + * seen: array of length len+1 (all revs + nullrev) 0: not seen, 1 seen*/
> + int *tovisit = NULL;
> + long lentovisit = 0;
> + char *seen = NULL;
> +
> + /* Get arguments */
> + if (!PyArg_ParseTuple(args, "lOOO", &minroot, &heads, &roots,
> + &includepatharg)) {
Use the O! syntax for this: https://docs.python.org/2/c-api/arg.html
> + goto bail;
> + }
> + if (roots == NULL || !PySet_Check(roots))
> + goto bail;
> + if (heads == NULL || !PyList_Check(heads))
> + goto bail;
> + if (includepatharg == NULL || !PyBool_Check(includepatharg))
> + goto bail;
> + if (includepatharg == Py_True)
> + includepath = 1;
> +
> + /* Initialize return set */
> + reachable = PySet_New(0);
> + if (reachable == NULL)
> + goto bail;
> +
> + /* Initialize internal datastructures */
> + tovisit = (int *)malloc((len + 1) * sizeof(int));
> + if (tovisit == NULL) {
> + PyErr_SetNone(PyExc_MemoryError);
> + goto release_reachable;
> + }
> +
> + seen = (char *)calloc(len+1, 1);
> + if (seen == NULL) {
> + PyErr_SetNone(PyExc_MemoryError);
> + goto release_tovisit;
> + }
Something that would be interesting to check is whether a bitmap is faster.
> +
> + /* Populate tovisit with all the heads */
> + numheads = PyList_GET_SIZE(heads);
> + for (i = 0; i < numheads; i++) {
> + revnum = PyInt_AS_LONG(PyList_GET_ITEM(heads, i));
> + if (seen[revnum+1] == 0) {
> + tovisit[lentovisit++] = revnum;
> + seen[revnum+1]=1;
> + }
> + }
> +
> + /* Visit the tovisit list and find the reachable roots */
> + k = 0;
> + while (k < lentovisit) {
> + /* Add the node to reachable if it is a root*/
> + revnum = tovisit[k++];
> + val = PyInt_FromLong(revnum);
> + if (PySet_Contains(roots, val) == 1) {
> + PySet_Add(reachable, val);
> + if (includepath == 0) {
> + Py_XDECREF(val);
> + continue;
> + }
> + }
> + Py_XDECREF(val);
> +
> + /* And its parents to the list of nodes to visit */
> + if (revnum != -1) {
> + r = index_get_parents(self, revnum, parents, (int)len - 1);
> + if (r < 0)
> + goto release_seen;
> +
> + for (i = 0; i < 2; i++) {
> + if (seen[parents[i]+1] == 0 && parents[i] >= minroot) {
> + tovisit[lentovisit++] = parents[i];
> + seen[parents[i]+1]=1;
Spaces around + and =
seen[parents[i] + 1] = 1;
> + }
> + }
> + }
> + }
> +
> + /* Find all the nodes in between the roots we found and the heads
> + * and add them to the reachable set */
> + if (includepath == 1) {
> + minidx = minroot;
> + if (minidx < 0)
> + minidx = 0;
> + for (i = minidx; i < len; i++) {
> + if (seen[i+1] == 1) {
Spaces around +
> + r = index_get_parents(self, i, parents, (int)len - 1);
> + /* Corrupted index file, error is set from index_get_parents */
> + if (r < 0)
> + goto release_seen;
> + for (k = 0; k < 2; k++) {
> + PyObject *p = PyInt_FromLong(parents[k]);
> + if (PySet_Contains(reachable, p) == 1)
> + PySet_Add(reachable, PyInt_FromLong(i));
> + Py_XDECREF(p);
> + }
> + }
> + }
> + }
> +
> +release_seen:
> + free(seen);
> +release_tovisit:
> + free(tovisit);
You can combine these two since free(NULL) is well-defined.
> + return reachable;
> +release_reachable:
> + Py_XDECREF(reachable);
> +bail:
> + val = Py_None;
> + Py_INCREF(Py_None);
> + return val;
> +}
> +
> static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
> {
> PyObject *roots = Py_None;
> @@ -2279,6 +2414,8 @@
> "get an index entry"},
> {"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
> METH_VARARGS, "compute phases"},
> + {"revsbetween", (PyCFunction)revsbetween, METH_VARARGS,
> + "revsbetween"},
> {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
> "get head revisions"}, /* Can do filtering since 3.2 */
> {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> https://selenic.com/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list