[PATCH] issue1610: Add tests for convert's topological sort algorithm
Greg Ward
greg-hg at gerg.ca
Fri Apr 17 22:20:12 UTC 2009
# HG changeset patch
# User Greg Ward <greg-hg at gerg.ca>
# Date 1240006756 14400
# Node ID 7f37d0256d61578cc808c4247c80dc9820fc3993
# Parent 70f0eb571ea4958a2fa54dd103da6a2406543b6e
issue1610: Add tests for convert's topological sort algorithm.
diff --git a/tests/test-convert-toposort b/tests/test-convert-toposort
new file mode 100755
--- /dev/null
+++ b/tests/test-convert-toposort
@@ -0,0 +1,224 @@
+#!/usr/bin/env python
+
+# Unit tests for hgext.convert.convcmd.converter.toposort() method.
+#
+# toposort() has a couple of goals to satisfy:
+# correctness: if it doesn't generate a valid topological sort,
+# what's the point?
+# efficiency: the order in which we traverse the source DAG can
+# have a dramatic effect on the size of the manifest file
+# in the resulting Mercurial repository. Generally, toposort()
+# wants to stick to one linear chain of changesets for as long
+# as possible, rather than hop from branch to branch.
+# aesthetic appeal: in a nutshell, we want old branches in the source
+# repository to appear early in the target repository, and recent
+# branches to appear late
+#
+# Testing for efficiency is not too hard. Specifically, assert_chains()
+# lets us test that the topo sort follows linear chains of changesets for as
+# long as reasonable, only breaking at branch points. Note that
+# assert_chains() is quite dumb: the intelligence comes from the person
+# writing tests and deciding which chains must be followed.
+#
+# Testing for correctness and aesthetic appeal are both done with
+# assert_toposort(), which just takes a list of "valid" sorts. Obviously
+# we don't list all correct topo sorts for a given DAG. We don't even
+# bother to list all correct and appealing topo sorts, except for the
+# simpler test cases. So, it's possible to modify toposort() such that it
+# still generates correct and appealing sorts, but fails a test. In that
+# case, you need to verify that the resulting sort really is correct and
+# meets the "old branches stay old, recent branches stay recent"
+# requirement and, if so, add it to the list of accepted sorts passed to
+# assert_toposort().
+#
+# Currently, toposort() meets the correctness and efficiency goals, but
+# sometimes fails on aesthetic appeal. Thus, several test cases pass a
+# correct-but-unappealing sort to assert_toposort() in order that the tests
+# can pass. Once toposort() is fixed to take aesthetics into account,
+# those sorts (they are commented "yuck" below) can be removed from the
+# list of acceptable sorts.
+
+import sys, os
+sys.path.insert(
+ 0, os.path.dirname(os.path.dirname(os.path.abspath(sys.argv[0]))))
+
+import unittest
+from hgext.convert import convcmd, common
+
+class TestTopoSort(unittest.TestCase):
+ def setUp(self):
+ self.converter = convcmd.converter(
+ dummy_ui(), None, dummy_sink(), None, {})
+
+ def assert_chains(self, chains, parents):
+ actual = " ".join(self.converter.toposort(parents))
+ for chain in chains:
+ #lchain = " ".split(chain)
+ self.assert_(
+ chain in actual,
+ "expected chain %s in sort %s" % (chain, actual))
+
+ def assert_toposort(self, valid_sorts, parents):
+ # valid_sorts should really be the set of correct, efficient,
+ # aesthetically appealing sorts: i.e. we're not just looking for
+ # correctness, we're looking for a topo sort that yields a
+ # space-efficient Mercurial repository by sticking to one branch as
+ # long as it can; and that keeps old branches old, and recent
+ # braches recent.
+ actual = self.converter.toposort(parents)
+ actual = " ".join(actual)
+ self.assert_(actual in valid_sorts,
+ "expected one of:\n " +
+ ("\n ".join(map(str, valid_sorts))) +
+ "\nbut got:\n " + str(actual))
+
+ def test_simple_linear(self):
+ # simple linear chain of changesets (mainly to see if I can unit test
+ # this code)
+ parents = { '1': [], '2': ['1'], '3': ['2'] }
+ self.assert_toposort(('1 2 3',),
+ parents)
+
+ def test_one_branch(self):
+ # 1 -- 2 -- 4
+ # \
+ # +-- 3 -- 5
+ parents = {
+ '1': [], '2': ['1'], '4': ['2'],
+ '3': ['2'], '5': ['3'] }
+ self.assert_chains(['1 2', '3 5', '4'], parents)
+ self.assert_toposort(
+ ('1 2 4 3 5',
+ '1 2 3 5 4',),
+ parents)
+
+ def test_one_merged_branch(self):
+ # 1 -- 2 -- 4 ------- 6
+ # \ /
+ # +-- 3 -- 5 -+
+ parents = {
+ '1': [], '2': ['1'], '3': ['2'], '4': ['2'], '5': ['3'],
+ '6': ['4', '5'] }
+ # XXX this is currently broken: toposort() returns 1 2 4 3 5 6,
+ # i.e. it goes as far along the trunk as it can before it has
+ # to backup to include the ancestors of 6
+ #self.assert_chains(
+ # ['1 2', '4 6', '3 5'],
+ # parents)
+ self.assert_toposort(
+ ('1 2 4 3 5 6',
+ '1 2 3 5 4 6'),
+ parents)
+
+ def test_two_open_branches(self):
+ # 1 -- 2 -- 4 ------- 6 -- 7
+ # \ \
+ # +-- 3 -- 5 +--- 8
+ parents = {
+ '1': [], '2': ['1'], '4': ['2'], '6': ['4'], '7': ['6'],
+ '3': ['2'], '5': ['3'],
+ '8': ['6'] }
+ self.assert_chains(
+ ['1 2', '3 5', '4 6', '8', '7'],
+ parents)
+ self.assert_toposort(
+ ('1 2 4 6 8 7 3 5', # yuck
+ '1 2 3 5 4 6 7 8',
+ '1 2 3 5 4 6 8 7',),
+ parents)
+
+ def test_many_branches(self):
+ # 1 -- 2 -- 4 -- 6 -- 7 -- 9 -- 11 -- 13 -- 17 -- 19
+ # \ \ \
+ # 3 --- 5 --- 8 \ 14 -- 16
+ # \
+ # 10 -- 12 -- 15 -- 18 -- 20
+ # \
+ # 21 -- 22
+ parents = {
+ '1': [], '2': ['1'], '4': ['2'], '6': ['4'], '7': ['6'],
+ '9': ['7'], '11': ['9'], '13': ['11'], '17': ['13'], '19': ['17'],
+ '3': ['2'], '5': ['3'], '8': ['5'],
+ '10': ['9'], '12': ['10'], '15': ['12'], '18': ['15'], '20': ['18'],
+ '14': ['13'], '16': ['14'],
+ '21': ['18'], '22': ['21'] }
+ self.assert_chains(
+ ['1 2', '3 5 8', '4 6 7 9', '10 12 15 18', '21 22', '20',
+ '11 13', '14 16', '17 19'],
+ parents)
+ self.assert_toposort(
+ ('1 2 4 6 7 9 10 12 15 18 21 22 20 11 13 17 19 14 16 3 5 8', # yuck
+ '1 2 3 5 8 4 6 7 9 10 12 15 18 21 22 20 11 13 14 16 17 19',
+ '1 2 3 5 8 4 6 7 9 10 12 15 18 20 11 13 14 16 17 19 21 22'),
+ parents)
+
+ def test_multiple_children(self):
+ # one node has many children
+ # 1 -- 2 -- 9
+ # \-- 3 -- 5
+ # \-- 4 -- 7 -- 8
+ # \-- 6
+ parents = { '1': [], '2': ['1'], '9': ['2'],
+ '3': ['2'], '5': ['3'],
+ '4': ['2'], '7': ['4'], '8': ['7'],
+ '6': ['2'] }
+ expect_chains = ['1 2', '3 5', '4 7 8']
+ self.assert_chains(expect_chains, parents)
+ self.assert_toposort(
+ ('1 2 9 6 4 7 8 3 5', # yuck
+ '1 2 3 5 4 7 8 6 9',),
+ parents)
+
+ # add some merges to spice things up:
+ # 1 -- 2 -- 9 ----------------- 11
+ # \-- 3 -- 5 /
+ # \-- 4 -- 7 -- 8 -- 10
+ # \-- 6 ------------/
+ parents['10'] = ['8', '6']
+ parents['11'] = ['9', '10']
+ self.assert_chains(expect_chains, parents)
+ self.assert_toposort(
+ ('1 2 9 6 4 7 8 10 11 3 5', # yuck
+ '1 2 3 5 6 4 7 8 10 9 11',
+ '1 2 6 4 7 8 10 3 5 9 11',
+ '1 2 3 5 4 7 8 6 9 10 11',
+ '1 2 3 5 4 7 8 6 10 9 11',),
+ parents)
+
+ def test_real_life_1(self):
+ # This one came from a real-live CVS repository (stripped down
+ # to bare essentials, obviously).
+ # 741 -- 742 -- 948 -- 949 -- 1023 -- 1025 -- 1260 -- 1261
+ # \ \ \
+ # \ 1024 1698
+ # \
+ # 1413 -- 1417 -- 1434
+ parents = {
+ '741': [], '742': ['741'],
+ '948': ['742'], '949': ['948'],
+ '1023': ['949'], '1025': ['1023'],
+ '1260': ['1025'], '1261': ['1260'],
+ '1413': ['741'], '1417': ['1413'], '1434': ['1417'],
+ '1024': ['948'],
+ '1698': ['1260'],
+ }
+ self.assert_chains(
+ ['1413 1417 1434',
+ '949 1023 1025 1260'],
+ parents)
+ self.assert_toposort(
+ ('741 742 948 1413 1417 1434 1024 949 1023 1025 1260 1699 1261',
+ '741 1413 1417 1434 742 948 1024 949 1023 1025 1260 1698 1261',),
+ parents)
+
+
+class dummy_ui(object):
+ status = warn = note = debug = lambda self, *msg: None
+
+class dummy_sink(common.converter_sink):
+ def __init__(self):
+ pass
+
+# argh: PyUnit writes to stderr
+os.dup2(sys.stdout.fileno(), sys.stderr.fileno())
+unittest.main()
diff --git a/tests/test-convert-toposort.out b/tests/test-convert-toposort.out
new file mode 100644
--- /dev/null
+++ b/tests/test-convert-toposort.out
@@ -0,0 +1,5 @@
+.......
+----------------------------------------------------------------------
+Ran 7 tests in 0.003s
+
+OK
More information about the Mercurial-devel
mailing list