D10225: perf-helpers: add a search-discovery-case script
marmoute (Pierre-Yves David)
phabricator at mercurial-scm.org
Mon Mar 15 23:01:16 UTC 2021
marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
This a small script I built to look for interesting discovery case. It is fairly
basic but could be useful in various situation so lets put it in the main
repositories.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D10225
AFFECTED FILES
contrib/perf-utils/search-discovery-case
CHANGE DETAILS
diff --git a/contrib/perf-utils/search-discovery-case b/contrib/perf-utils/search-discovery-case
new file mode 100755
--- /dev/null
+++ b/contrib/perf-utils/search-discovery-case
@@ -0,0 +1,197 @@
+#!/usr/bin/env python3
+# Search for interesting discovery instance
+#
+# search-discovery-case REPO [REPO]â¦
+#
+# This use a subsetmaker extension (next to this script) to generate a steam of
+# random discovery instance. When interesting case are discovered, information
+# about them are print on the stdout.
+from __future__ import print_function
+
+import json
+import os
+import queue
+import random
+import signal
+import subprocess
+import sys
+import threading
+
+this_script = os.path.abspath(sys.argv[0])
+this_dir = os.path.dirname(this_script)
+hg_dir = os.path.join(this_dir, '..', '..')
+HG_REPO = os.path.normpath(hg_dir)
+HG_BIN = os.path.join(HG_REPO, 'hg')
+
+JOB = int(os.environ.get('NUMBER_OF_PROCESSORS', 8))
+
+
+SLICING = ('scratch', 'randomantichain', 'rev')
+
+
+def nb_revs(repo_path):
+ cmd = [
+ HG_BIN,
+ '--repository',
+ repo_path,
+ 'log',
+ '--template',
+ '{rev}',
+ '--rev',
+ 'tip',
+ ]
+ s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+ out, err = s.communicate()
+ return int(out)
+
+
+repos = []
+for repo in sys.argv[1:]:
+ size = nb_revs(repo)
+ repos.append((repo, size))
+
+
+def pick_one(repo):
+ pick = random.choice(SLICING)
+ seed = random.randint(0, 100000)
+ if pick == 'scratch':
+ start = int(repo[1] * 0.3)
+ end = int(repo[1] * 0.7)
+ nb = random.randint(start, end)
+ return ('scratch', nb, seed)
+ elif pick == 'randomantichain':
+ return ('randomantichain', seed)
+ elif pick == 'rev':
+ start = int(repo[1] * 0.3)
+ end = int(repo[1])
+ rev = random.randint(start, end)
+ return ('rev', rev)
+ else:
+ assert False
+
+
+done = threading.Event()
+cases = queue.Queue(maxsize=10 * JOB)
+results = queue.Queue()
+
+
+def worker():
+ while not done.is_set():
+ c = cases.get()
+ if c is None:
+ return
+ try:
+ res = process(c)
+ results.put(res)
+ except Exception as exc:
+ print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
+ c = (c[0], c[2], c[1])
+ try:
+ res = process(c)
+ results.put(res)
+ except Exception as exc:
+ print('processing-failed: %s %s' % (c, exc), file=sys.stderr)
+
+
+SUBSET_PATH = os.path.join(HG_REPO, 'contrib', 'perf-utils', 'subsetmaker.py')
+
+
+CMD_BASE = (
+ HG_BIN,
+ 'debugdiscovery',
+ '--template',
+ 'json',
+ '--config',
+ 'extensions.subset=%s' % SUBSET_PATH,
+)
+# '--local-as-revs "$left" --local-as-revs "$right"'
+# > /data/discovery-references/results/disco-mozilla-unified-$1-$2.txt
+# )
+
+
+def to_revsets(case):
+ t = case[0]
+ if t == 'scratch':
+ return 'not scratch(all(), %d, "%d")' % (case[1], case[2])
+ elif t == 'randomantichain':
+ return '::randomantichain(all(), "%d")' % case[1]
+ elif t == 'rev':
+ return '::%d' % case[1]
+ else:
+ assert False
+
+
+def process(case):
+ (repo, left, right) = case
+ cmd = list(CMD_BASE)
+ cmd.append('-R')
+ cmd.append(repo[0])
+ cmd.append('--local-as-revs')
+ cmd.append(to_revsets(left))
+ cmd.append('--remote-as-revs')
+ cmd.append(to_revsets(right))
+ s = subprocess.Popen(cmd, stdout=subprocess.PIPE)
+ out, err = s.communicate()
+ return json.loads(out)[0]
+
+
+def interesting_boundary(res):
+ """check if a case is interesting or not
+
+ For now we are mostly interrested in case were we do multiple roundstrip
+ and where the boundary is somewhere in the middle of the undecided set.
+
+ Ideally, we would make this configurable, but this is not a focus for now
+
+ return None or (round-trip, undecided-common, undecided-missing)
+ """
+ roundtrips = res["total-roundtrips"]
+ if roundtrips <= 1:
+ return None
+ undecided_common = res["nb-ini_und-common"]
+ undecided_missing = res["nb-ini_und-missing"]
+ if undecided_common == 0:
+ return None
+ if undecided_missing == 0:
+ return None
+ return (roundtrips, undecided_common, undecided_missing)
+
+
+def end(*args, **kwargs):
+ done.set()
+
+
+def format_case(case):
+ return '-'.join(str(s) for s in case)
+
+
+signal.signal(signal.SIGINT, end)
+
+for i in range(JOB):
+ threading.Thread(target=worker).start()
+
+nb_cases = 0
+while not done.is_set():
+ repo = random.choice(repos)
+ left = pick_one(repo)
+ right = pick_one(repo)
+ cases.put((repo, left, right))
+ while not results.empty():
+ # results has a single reader so this is fine
+ res = results.get_nowait()
+ boundary = interesting_boundary(res)
+ if boundary is not None:
+ print(repo[0], format_case(left), format_case(right), *boundary)
+
+ nb_cases += 1
+ if not nb_cases % 100:
+ print('[%d cases generated]' % nb_cases, file=sys.stderr)
+
+for i in range(JOB):
+ try:
+ cases.put_nowait(None)
+ except queue.Full:
+ pass
+
+print('[%d cases generated]' % nb_cases, file=sys.stderr)
+print('[ouput generation is over]' % nb_cases, file=sys.stderr)
To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
More information about the Mercurial-devel
mailing list