[PATCH 3 of 3] [mq]: revlog_c.patch

Bernhard Leiner mailinglists.bleiner at gmail.com
Mon Sep 15 19:14:05 UTC 2008


# HG changeset patch
# User Bernhard Leiner <bleiner at gmail.com>
# Date 1221505550 -7200
# Node ID 6e7cf56886364bfd41d509a48506ed3e4ef31f14
# Parent  9804622ce53c838d310454d3d6c35331ecefd590
[mq]: revlog_c.patch

diff --git a/mercurial/revlog_c.c b/mercurial/revlog_c.c
new file mode 100644
--- /dev/null
+++ b/mercurial/revlog_c.c
@@ -0,0 +1,251 @@
+/*
+ revlog_c.c - C implementation of revlog functions
+
+ Copyright 2008 Bernhard Leiner <bleiner at gmail.com>
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+
+*/
+#include <Python.h>
+
+#if defined __hpux || defined __SUNPRO_C || defined _AIX
+# define inline
+#endif
+
+#ifdef _WIN32
+#ifdef _MSC_VER
+#define inline __inline
+typedef unsigned long uint32_t;
+typedef unsigned long long uint64_t;
+#else
+#include <stdint.h>
+#endif
+static uint32_t ntohl(uint32_t x)
+{
+	return ((x & 0x000000ffUL) << 24) |
+		((x & 0x0000ff00UL) <<  8) |
+		((x & 0x00ff0000UL) >>  8) |
+		((x & 0xff000000UL) >> 24);
+}
+#else
+#include <sys/types.h>
+#ifdef __BEOS__
+#include <ByteOrder.h>
+#else
+#include <arpa/inet.h>
+#endif
+#include <inttypes.h>
+#endif
+
+static PyObject *revlog_c_Error;
+
+static inline uint64_t ntohll(uint64_t x)
+{
+	return (((uint64_t)ntohl((uint32_t)x)) << 32) |
+		(uint64_t)ntohl((uint32_t)(x >> 32));
+}
+
+
+/* RevlogNG format (all in big endian, data may be inlined):
+ *    6 bytes: offset
+ *    2 bytes: flags
+ *    4 bytes: compressed length
+ *    4 bytes: uncompressed length
+ *    4 bytes: base revision
+ *    4 bytes: link revision
+ *    4 bytes: parent 1 revision
+ *    4 bytes: parent 2 revision
+ *   32 bytes: nodeid (only 20 bytes used)
+ */
+static int parseindex_ng (const char *data, int size, int inlined,
+			  PyObject *index, PyObject *nodemap)
+{
+	PyObject *e = NULL; /* init to circumvent compiler warning */
+	PyObject *py_obj, *py_obj2;
+	const char *end = data + size;
+	uint32_t comp_len;
+	uint64_t offset_flags;
+	int i;
+	int first = 1;
+	int n = 0;
+
+	/* TODO DRY! 
+	 * if possible, do not hardcode node.nullid and node.nullrev here */
+	char nullrev[20] = {'\0','\0','\0','\0','\0','\0','\0','\0','\0','\0',
+			    '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0'};
+	int nullid = -1;
+
+	while (data < end) {
+		/* create new revlog entry */
+		e = PyTuple_New(8);
+		if (!e)
+			goto fail_entry;
+
+		/* get offset and flags */
+		offset_flags = ntohll(*((uint64_t *)data));;
+		if (first) {
+			offset_flags &= 0xFFFF; /* mask out version number */
+			first = 0;
+		}
+		py_obj = PyLong_FromUnsignedLongLong(offset_flags);
+		if (!py_obj)
+			goto fail_obj;
+		PyTuple_SET_ITEM(e, 0, py_obj);
+		data += 8;
+
+		/* 6 values, each of them 4 bytes. The first one is the
+		 * compressed length which will be needed later on if data
+		 * is inlined */
+		comp_len = ntohl(*((uint32_t *)data));
+		py_obj = PyInt_FromLong(comp_len);
+		if (!py_obj)
+			goto fail_obj;
+		PyTuple_SET_ITEM(e, 1, py_obj);
+		data += 4;
+
+		/* the remaining five 4 byte values:  */
+		for (i = 2; i < 7; i++) {
+			py_obj = PyInt_FromLong(ntohl(*((uint32_t *)data)));
+			if (!py_obj)
+				goto fail_obj;
+			PyTuple_SET_ITEM(e, i, py_obj);
+			data += 4;
+		}
+
+		/* get node id, set in nodemap dict and in 'e' */
+		py_obj = PyString_FromStringAndSize(data, 20);
+		if (!py_obj)
+			goto fail_obj;
+		py_obj2 = PyInt_FromLong(n);
+		if (!py_obj2)
+			goto fail_obj2;
+		if (PyDict_SetItem(nodemap, py_obj, py_obj2) != 0)
+			goto fail_dict;
+		PyTuple_SET_ITEM(e, 7, py_obj);
+
+		/* append to the index list and release the object */
+		PyList_Append(index, e);
+		Py_DECREF(e);
+
+		/* set data pointer to start of next reventry and increase 
+		 * counter */
+		data += 32 + (inlined ? comp_len : 0);
+		n++;
+	}
+	if (data > end) {
+		if (!PyErr_Occurred())
+			PyErr_SetString(revlog_c_Error, "corrupt index file");
+		return 0;
+	}
+
+	/* add the nullid/nullrev entry to the nodemap */
+	py_obj = PyString_FromStringAndSize(nullrev, 20);
+	if (!py_obj)
+		goto fail_obj;
+	py_obj2 = PyInt_FromLong(nullid);
+	if (!py_obj2) {
+		goto fail_obj2;
+	}
+	if (PyDict_SetItem(nodemap, py_obj, py_obj2) != 0)
+		goto fail_obj2;
+
+	return 1;
+
+fail_dict:
+fail_obj2:
+	Py_XDECREF(py_obj2);
+fail_obj:
+	Py_XDECREF(py_obj);
+fail_entry:
+	Py_XDECREF(e);
+	return 0;
+}
+
+/* --- callable from python ------------------------------------------------- */
+
+/* This functions parses a index file and returns a Python tuple of the
+ * following format: (index, nodemap, cache)
+ *
+ * index: a list of tuples containing the RevlogNG elements
+ * nodemap: a dict mapping node ids to indices in the index list
+ * cache: if data is inlined, a tuple (index_file_content, 0) else None
+ */
+static PyObject *parseindex(PyObject *self, PyObject *args)
+{
+	const char *data;
+	int size, inlined;
+	PyObject *rval, *index, *nodemap, *cache, *integer;
+	PyObject *data_obj, *inlined_obj;
+
+	if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
+		return NULL;
+	inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
+
+	/* create Python objects to be returned */
+	rval = PyTuple_New(3);
+	if (!rval)
+		goto fail_rval;
+	index = PyList_New(0);
+	if (!index)
+		goto fail_index;
+	nodemap = PyDict_New();
+	if (!nodemap)
+		goto fail_nodemap;
+	if (inlined) {
+		cache = PyTuple_New(2);
+		if (!cache)
+			goto fail_cache;
+		integer = PyInt_FromLong(0);
+		if (!integer)
+			goto fail_integer;
+		PyTuple_SET_ITEM(cache, 0, integer);
+
+		data_obj = PyTuple_GET_ITEM(args, 0);
+		/* change borrowed reference into owned one */
+		Py_INCREF(data_obj);
+		PyTuple_SET_ITEM(cache, 1, data_obj);
+	} else {
+		cache = Py_None;
+		Py_INCREF(Py_None);
+	}
+	
+	/* actually populate the index and the nodemap with data */
+	if (!parseindex_ng (data, size, inlined, index, nodemap))
+		goto fail_parse;
+
+	/* set values for the rval tuple */
+	PyTuple_SET_ITEM(rval, 0, index);
+	PyTuple_SET_ITEM(rval, 1, nodemap);
+	PyTuple_SET_ITEM(rval, 2, cache);
+
+	return rval;
+
+fail_integer:
+	Py_XDECREF(integer);
+fail_parse:
+fail_cache:
+	Py_XDECREF(cache);
+fail_nodemap:
+	Py_XDECREF(nodemap);
+fail_index:
+	Py_XDECREF(index);
+fail_rval:
+	Py_XDECREF(rval);
+	return NULL;
+}
+
+static char revlog_c_doc[] = "Efficient implementation of revlog functions.";
+
+static PyMethodDef methods[] = {
+	{"parseindex", parseindex, METH_VARARGS, 
+	 "Fast implementation of parseindex\n"},
+	{NULL, NULL}
+};
+
+PyMODINIT_FUNC initrevlog_c(void)
+{
+	Py_InitModule3("revlog_c", methods, revlog_c_doc);
+	revlog_c_Error = PyErr_NewException("revlog_c.revlog_c_Error",
+					    NULL, NULL);
+}



More information about the Mercurial mailing list