[PATCH] parse_manifest: rewrite to use memchr
Benoit Boissinot
bboissin at gmail.com
Tue Sep 10 19:52:59 UTC 2013
On Sun, Sep 8, 2013 at 5:07 AM, Siddharth Agarwal <sid0 at fb.com> wrote:
> # HG changeset patch
> # User Siddharth Agarwal <sid0 at fb.com>
> # Date 1378536479 25200
> # Fri Sep 06 23:47:59 2013 -0700
> # Node ID 3219c8821869d631bd6dab602a66019c4c43639c
> # Parent 6638dd23999d66a95ce28816f035a1d051963290
> parse_manifest: rewrite to use memchr
>
> memchr is usually smarter than a simple for loop. With gcc 4.4.6 and glibc
> 2.12
> on x86-64, for a 20 MB, 200,000 file manifest, parse_manifest goes from
> 0.116
> seconds to 0.095 seconds.
>
Looks good.
>
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -77,7 +77,7 @@
> static PyObject *parse_manifest(PyObject *self, PyObject *args)
> {
> PyObject *mfdict, *fdict;
> - char *str, *cur, *start, *zero;
> + char *str, *start, *end, *zero, *newline;
> int len;
>
> if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
> @@ -86,21 +86,24 @@
> &str, &len))
> goto quit;
>
> - for (start = cur = str, zero = NULL; cur < str + len; cur++) {
> + start = str;
> + end = str + len;
> + while (start < end) {
> PyObject *file = NULL, *node = NULL;
> PyObject *flags = NULL;
> ptrdiff_t nlen;
>
you can now move the declaration of zero and newline here (and initialize).
>
> - if (!*cur) {
> - zero = cur;
> - continue;
> - }
> - else if (*cur != '\n')
> - continue;
> -
> + zero = memchr(start, '\0', end - start);
> if (!zero) {
> PyErr_SetString(PyExc_ValueError,
> - "manifest entry has no separator");
> + "manifest contains trailing
> garbage");
> + goto quit;
> + }
> +
> + newline = memchr(zero + 1, '\n', end - (zero + 1));
> + if (!newline) {
> + PyErr_SetString(PyExc_ValueError,
> + "manifest contains trailing
> garbage");
> goto quit;
> }
>
> @@ -109,7 +112,7 @@
> if (!file)
> goto bail;
>
> - nlen = cur - zero - 1;
> + nlen = newline - zero - 1;
>
> node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
> if (!node)
> @@ -128,8 +131,7 @@
> if (PyDict_SetItem(mfdict, file, node) == -1)
> goto bail;
>
> - start = cur + 1;
> - zero = NULL;
> + start = newline + 1;
>
> Py_XDECREF(flags);
> Py_XDECREF(node);
> @@ -142,12 +144,6 @@
> goto quit;
> }
>
> - if (len > 0 && *(cur - 1) != '\n') {
> - PyErr_SetString(PyExc_ValueError,
> - "manifest contains trailing garbage");
> - goto quit;
> - }
> -
> Py_INCREF(Py_None);
> return Py_None;
> quit:
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-devel/attachments/20130910/4cb38f97/attachment-0002.html>
More information about the Mercurial-devel
mailing list