[PATCH] largefiles: use multiple threads for fetching largefiles remotely
Mads Kiilerich
mads at kiilerich.com
Fri Oct 10 00:59:53 UTC 2014
# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1412902786 -7200
# Fri Oct 10 02:59:46 2014 +0200
# Node ID 483463c1d99ba5e5979b756fc3d1255f0a7bd854
# Parent a1eb21f5caea4366310e32aa85248791d5bbfa0c
largefiles: use multiple threads for fetching largefiles remotely
Largefiles are currently fetched with one request per file. That adds a
constant overhead per file that gives bad network utilization.
To mitigate that, run multiple worker threads when fetching largefiles remotely.
The default is 2 processes, but it can be tweaked with the undocumented config
setting largefiles._remotegetthreads.
Some numbers with a slow server and 50 small files:
1 thread 36 s
2 threads 20 s
3 threads 15 s
4 threads 12 s
diff --git a/hgext/largefiles/basestore.py b/hgext/largefiles/basestore.py
--- a/hgext/largefiles/basestore.py
+++ b/hgext/largefiles/basestore.py
@@ -8,7 +8,7 @@
'''base class for store implementations and store-related utility code'''
-import re
+import re, threading
from mercurial import util, node, hg
from mercurial.i18n import _
@@ -37,6 +37,7 @@ class basestore(object):
self.ui = ui
self.repo = repo
self.url = url
+ self.threads = 0
def put(self, source, hash):
'''Put source file into the store so it can be retrieved by hash.'''
@@ -60,24 +61,43 @@ class basestore(object):
missing = []
ui = self.ui
- at = 0
available = self.exists(set(hash for (_filename, hash) in files))
- for filename, hash in files:
- ui.progress(_('getting largefiles'), at, unit='lfile',
- total=len(files))
- at += 1
- ui.note(_('getting %s:%s\n') % (filename, hash))
+ tasks = list(enumerate(reversed(files)))
- if not available.get(hash):
- ui.warn(_('%s: largefile %s not available from %s\n')
- % (filename, hash, util.hidepassword(self.url)))
- missing.append(filename)
- continue
+ def worker():
+ while True:
+ try:
+ task = tasks.pop()
+ except IndexError:
+ return
+ at, (filename, hash) = task
+ ui.progress(_('getting largefiles'), at, unit='lfile',
+ total=len(files))
+ ui.note(_('getting %s:%s\n') % (filename, hash))
- if self._gethash(filename, hash):
- success.append((filename, hash))
- else:
- missing.append(filename)
+ if available.get(hash):
+ if self._gethash(filename, hash):
+ success.append((filename, hash))
+ else:
+ missing.append(filename)
+ else:
+ ui.warn(_('%s: largefile %s not available from %s\n')
+ % (filename, hash, util.hidepassword(self.url)))
+ missing.append(filename)
+
+ if self.threads > 1:
+ running = []
+ for i in range(self.threads):
+ t = threading.Thread(target=worker)
+ t.setDaemon(True)
+ t.start()
+ running.append(t)
+
+ for t in running:
+ while t.isAlive():
+ t.join(0.1)
+ else:
+ worker()
ui.progress(_('getting largefiles'), None)
return (success, missing)
diff --git a/hgext/largefiles/remotestore.py b/hgext/largefiles/remotestore.py
--- a/hgext/largefiles/remotestore.py
+++ b/hgext/largefiles/remotestore.py
@@ -18,6 +18,7 @@ class remotestore(basestore.basestore):
'''a largefile store accessed over a network'''
def __init__(self, ui, repo, url):
super(remotestore, self).__init__(ui, repo, url)
+ self.threads = ui.configint(lfutil.longname, '_remotegetthreads', 2)
def put(self, source, hash):
if self.sendfile(source, hash):
More information about the Mercurial-devel
mailing list