[PATCH 3 of 3] imported patch revlog_c_parseindex.patch

Bernhard Leiner mailinglists.bleiner at gmail.com
Mon Oct 13 22:03:01 UTC 2008


# HG changeset patch
# User Bernhard Leiner <bleiner at gmail.com>
# Date 1223927239 -7200
# Node ID 5466b1b19da562209d5a815607a07d10c7b81fc5
# Parent  617ee65a661886079337ecf115c931ebf10d432b
imported patch revlog_c_parseindex.patch

diff --git a/mercurial/revlog_c.c b/mercurial/revlog_c.c
new file mode 100644
--- /dev/null
+++ b/mercurial/revlog_c.c
@@ -0,0 +1,259 @@
+/*
+ revlog_c.c - C implementation of revlog functions
+
+ Copyright 2008 Bernhard Leiner <bleiner at gmail.com>
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+
+*/
+#include <Python.h>
+
+#if defined __hpux || defined __SUNPRO_C || defined _AIX
+# define inline
+#endif
+
+#ifdef _WIN32
+#ifdef _MSC_VER
+#define inline __inline
+typedef unsigned long uint32_t;
+typedef unsigned long long uint64_t;
+#else
+#include <stdint.h>
+#endif
+static uint32_t ntohl(uint32_t x)
+{
+	return ((x & 0x000000ffUL) << 24) |
+		((x & 0x0000ff00UL) <<  8) |
+		((x & 0x00ff0000UL) >>  8) |
+		((x & 0xff000000UL) >> 24);
+}
+#else
+#include <sys/types.h>
+#ifdef __BEOS__
+#include <ByteOrder.h>
+#else
+#include <arpa/inet.h>
+#endif
+#include <inttypes.h>
+#endif
+
+static PyObject *revlog_c_Error;
+
+static inline uint64_t ntohll(uint64_t x)
+{
+	return (((uint64_t)ntohl((uint32_t)x)) << 32) |
+		(uint64_t)ntohl((uint32_t)(x >> 32));
+}
+
+
+/* RevlogNG format (all in big endian, data may be inlined):
+ *    6 bytes: offset
+ *    2 bytes: flags
+ *    4 bytes: compressed length
+ *    4 bytes: uncompressed length
+ *    4 bytes: base revision
+ *    4 bytes: link revision
+ *    4 bytes: parent 1 revision
+ *    4 bytes: parent 2 revision
+ *   32 bytes: nodeid (only 20 bytes used)
+ */
+static int parseindex_ng (const char *data, int size, int inlined,
+			  PyObject *index, PyObject *nodemap)
+{
+	PyObject *r = NULL; /* init to circumvent compiler warning */
+	PyObject *py_obj, *py_obj2;
+	const char *end = data + size;
+	uint32_t comp_len;
+	uint64_t offset_flags;
+	int i;
+	int n = 0;
+
+	/* TODO DRY!
+	 * if possible, do not hardcode node.nullid and node.nullrev here */
+	char nullrev[20] = {'\0','\0','\0','\0','\0','\0','\0','\0','\0','\0',
+			    '\0','\0','\0','\0','\0','\0','\0','\0','\0','\0'};
+	int nullid = -1;
+
+	while (data < end) {
+		/* create new revlog record */
+		r = PyTuple_New(8);
+		if (!r)
+			goto fail_record;
+
+		/* get offset and flags */
+		offset_flags = ntohll(*((uint64_t *)data));
+		if (n == 0) {
+			offset_flags &= 0xFFFF; /* mask out version number */
+		}
+		py_obj = PyLong_FromUnsignedLongLong(offset_flags);
+		if (!py_obj)
+			goto fail_obj;
+		PyTuple_SET_ITEM(r, 0, py_obj);
+		data += 8;
+
+		/* now there are six 4 byte values. The first one is the
+		 * compressed length which will be needed later on if data
+		 * is inlined */
+		comp_len = ntohl(*((uint32_t *)data));
+		py_obj = PyInt_FromLong(comp_len);
+		if (!py_obj)
+			goto fail_obj;
+		PyTuple_SET_ITEM(r, 1, py_obj);
+		data += 4;
+
+		/* the remaining five 4 byte values */
+		for (i = 2; i < 7; i++) {
+			py_obj = PyInt_FromLong(ntohl(*((uint32_t *)data)));
+			if (!py_obj)
+				goto fail_obj;
+			PyTuple_SET_ITEM(r, i, py_obj);
+			data += 4;
+		}
+
+		/* get node id, set in nodemap dict and in 'r' */
+		py_obj = PyString_FromStringAndSize(data, 20);
+		if (!py_obj)
+			goto fail_obj;
+		py_obj2 = PyInt_FromLong(n);
+		if (!py_obj2)
+			goto fail_obj2;
+		if (PyDict_SetItem(nodemap, py_obj, py_obj2) != 0)
+			goto fail_dict;
+		PyTuple_SET_ITEM(r, 7, py_obj);
+
+		/* append to or set value in the index list */
+		if (inlined) {
+			PyList_Append(index, r);
+			Py_DECREF(r);
+		} else {
+			PyList_SET_ITEM(index, n, r);
+		}
+
+		/* set data pointer to start of next revlog record and increase
+		 * counter */
+		data += 32 + (inlined ? comp_len : 0);
+		n++;
+	}
+	if (data > end) {
+		if (!PyErr_Occurred())
+			PyErr_SetString(revlog_c_Error, "corrupt index file");
+		return 0;
+	}
+
+	/* add the nullid/nullrev entry to the nodemap */
+	py_obj = PyString_FromStringAndSize(nullrev, 20);
+	if (!py_obj)
+		goto fail_obj;
+	py_obj2 = PyInt_FromLong(nullid);
+	if (!py_obj2) {
+		goto fail_obj2;
+	}
+	if (PyDict_SetItem(nodemap, py_obj, py_obj2) != 0)
+		goto fail_obj2;
+
+	return 1;
+
+fail_dict:
+fail_obj2:
+	Py_XDECREF(py_obj2);
+fail_obj:
+	Py_XDECREF(py_obj);
+fail_record:
+	Py_XDECREF(r);
+	return 0;
+}
+
+/* --- callable from python ------------------------------------------------- */
+
+/* This function parses a index file and returns a Python tuple of the
+ * following format: (index, nodemap, cache)
+ *
+ * index: a list of tuples containing the RevlogNG records
+ * nodemap: a dict mapping node ids to indices in the index list
+ * cache: if data is inlined, a tuple (index_file_content, 0) else None
+ */
+static PyObject *parseindex(PyObject *self, PyObject *args)
+{
+	const char *data;
+	int size, inlined;
+	PyObject *rval, *index, *nodemap, *cache, *integer;
+	PyObject *data_obj, *inlined_obj;
+
+	if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
+		return NULL;
+	inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
+
+	/* create Python objects to be returned */
+	rval = PyTuple_New(3);
+	if (!rval)
+		goto fail_rval;
+	if (inlined) {
+		index = PyList_New(0);
+	} else {
+		/* In this case we know the size of the index list in advance:
+		 * size divided by size of one one revlog record (64 bytes) */  
+		index = PyList_New(size / 64);
+	}
+	if (!index)
+		goto fail_index;
+	nodemap = PyDict_New();
+	if (!nodemap)
+		goto fail_nodemap;
+	if (inlined) {
+		cache = PyTuple_New(2);
+		if (!cache)
+			goto fail_cache;
+		integer = PyInt_FromLong(0);
+		if (!integer)
+			goto fail_integer;
+		PyTuple_SET_ITEM(cache, 0, integer);
+
+		data_obj = PyTuple_GET_ITEM(args, 0);
+		/* change borrowed reference into owned one */
+		Py_INCREF(data_obj);
+		PyTuple_SET_ITEM(cache, 1, data_obj);
+	} else {
+		cache = Py_None;
+		Py_INCREF(Py_None);
+	}
+
+	/* actually populate the index and the nodemap with data */
+	if (!parseindex_ng (data, size, inlined, index, nodemap))
+		goto fail_parse;
+
+	/* set values for the rval tuple */
+	PyTuple_SET_ITEM(rval, 0, index);
+	PyTuple_SET_ITEM(rval, 1, nodemap);
+	PyTuple_SET_ITEM(rval, 2, cache);
+
+	return rval;
+
+fail_integer:
+	Py_XDECREF(integer);
+fail_parse:
+fail_cache:
+	Py_XDECREF(cache);
+fail_nodemap:
+	Py_XDECREF(nodemap);
+fail_index:
+	Py_XDECREF(index);
+fail_rval:
+	Py_XDECREF(rval);
+	return NULL;
+}
+
+static char revlog_c_doc[] = "Efficient implementation of revlog functions.";
+
+static PyMethodDef methods[] = {
+	{"parseindex", parseindex, METH_VARARGS,
+	 "Fast implementation of parseindex\n"},
+	{NULL, NULL}
+};
+
+PyMODINIT_FUNC initrevlog_c(void)
+{
+	Py_InitModule3("revlog_c", methods, revlog_c_doc);
+	revlog_c_Error = PyErr_NewException("revlog_c.revlog_c_Error",
+					    NULL, NULL);
+}



More information about the Mercurial-devel mailing list