D2630: xdiff: move hashtable calculation to a separate function
quark (Jun Wu)
phabricator at mercurial-scm.org
Sun Mar 4 02:51:52 UTC 2018
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
Before running the main diff algorithm, xdiff will "prepare" the contexts
for both files. That includes splitting, hashing all the lines, and building
hash tables for those lines. The hash table building process could be
expensive. Moving it out so it can be optimized separately.
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D2630
AFFECTED FILES
mercurial/thirdparty/xdiff/xprepare.c
CHANGE DETAILS
diff --git a/mercurial/thirdparty/xdiff/xprepare.c b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -157,36 +157,25 @@
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
xdlclassifier_t *cf, xdfile_t *xdf) {
- unsigned int hbits;
- long nrec, hsize, bsize;
+ long nrec, bsize;
unsigned long hav;
char const *blk, *cur, *top, *prev;
xrecord_t *crec;
xrecord_t **recs, **rrecs;
- xrecord_t **rhash;
unsigned long *ha;
char *rchg;
long *rindex;
ha = NULL;
rindex = NULL;
rchg = NULL;
- rhash = NULL;
recs = NULL;
if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
goto abort;
if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
goto abort;
- {
- hbits = xdl_hashbits((unsigned int) narec);
- hsize = 1 << hbits;
- if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
- goto abort;
- memset(rhash, 0, hsize * sizeof(xrecord_t *));
- }
-
nrec = 0;
if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
for (top = blk + bsize; cur < top; ) {
@@ -204,9 +193,6 @@
crec->size = (long) (cur - prev);
crec->ha = hav;
recs[nrec++] = crec;
-
- if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
- goto abort;
}
}
@@ -221,27 +207,60 @@
xdf->nrec = nrec;
xdf->recs = recs;
- xdf->hbits = hbits;
- xdf->rhash = rhash;
xdf->rchg = rchg + 1;
xdf->rindex = rindex;
xdf->nreff = 0;
xdf->ha = ha;
xdf->dstart = 0;
xdf->dend = nrec - 1;
+ /* use xdl_prepare_hashtable to set them */
+ xdf->hbits = 0;
+ xdf->rhash = NULL;
+
return 0;
abort:
xdl_free(ha);
xdl_free(rindex);
xdl_free(rchg);
- xdl_free(rhash);
xdl_free(recs);
xdl_cha_free(&xdf->rcha);
return -1;
}
+/*
+ * Adjust hash values for records (lines) in a file so the hash values become
+ * unique. This makes future calculation faster - they can just compare "ha"
+ * instead of comparing line content.
+ */
+static int xdl_prepare_hashtable(unsigned int pass, mmfile_t *mf,
+ xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) {
+ xrecord_t **rhash = NULL;
+ long nrec = xdf->nrec;
+ unsigned int hbits;
+ long hsize;
+ long i;
+
+ hbits = xdl_hashbits((unsigned int) nrec);
+ hsize = 1 << hbits;
+ if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
+ goto abort;
+ memset(rhash, 0, hsize * sizeof(xrecord_t *));
+
+ for (i = 0; i < nrec; ++i) {
+ if (xdl_classify_record(pass, cf, rhash, hbits, xdf->recs[i]) < 0)
+ goto abort;
+ }
+
+ xdf->hbits = hbits;
+ xdf->rhash = rhash;
+
+ return 0;
+abort:
+ xdl_free(rhash);
+ return -1;
+}
static void xdl_free_ctx(xdfile_t *xdf) {
@@ -288,6 +307,19 @@
return -1;
}
+ if (xdl_prepare_hashtable(1, mf1, xpp, &cf, &xe->xdf1) < 0) {
+ xdl_free_ctx(&xe->xdf1);
+ xdl_free_ctx(&xe->xdf2);
+ xdl_free_classifier(&cf);
+ return -1;
+ }
+ if (xdl_prepare_hashtable(2, mf2, xpp, &cf, &xe->xdf2) < 0) {
+ xdl_free_ctx(&xe->xdf1);
+ xdl_free_ctx(&xe->xdf2);
+ xdl_free_classifier(&cf);
+ return -1;
+ }
+
if (xdl_cleanup_records(&cf, &xe->xdf1, &xe->xdf2) < 0) {
xdl_free_ctx(&xe->xdf2);
xdl_free_ctx(&xe->xdf1);
To: quark, #hg-reviewers
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list