[PATCH 1 of 1 v4] win32lfn: allow manipulating files with long names on Windows
Mads Kiilerich
mads at kiilerich.com
Mon Jan 24 23:37:04 UTC 2011
Aaron Cohen wrote, On 01/24/2011 07:11 AM:
> I've created a bitbucket repo:http://bitbucket.org/remleduff/win32lfn
As mentioned before, I propose you make that the home of this extension
and let it prove its worth there.
A next step could be to wait for users to demand its inclusion in the
TortoiseHg installers.
> # HG changeset patch
> # User Aaron Cohen<aaron at assonance.org>
> # Date 1295848528 18000
> # Node ID fad7e6eb443a32183d9a4d6e15d89eda767bc538
> # Parent 77351c88dd1098476c587d8fbf535963c42f809e
> win32lfn: allow manipulating files with long names on Windows
>
> Windows by default has a MAX_PATH of 260 characters. A while ago the
> "fncache" format was added which allows repositories on Windows to
> contain very long paths. At the time, a patch was proposed,
> "longpath.patch" which enabled handling of those files in the working
> copy but it was tabled.
>
> From http://mercurial.selenic.com/bts/issue839, I infer that the
> reason for this is that many tools on Windows don't handle long file
> names gracefully. Time has passed though and more programs now work,
> including all Java programs.
>
> This extension transparently uses so-called Universal Naming
> Convention (UNC) paths which allow 32768 character filenames in Windows. It does this through fairly extensive monkeypatching of the os, and util modules, similar to the win32mbcs extension.
>
> diff -r 77351c88dd10 -r fad7e6eb443a hgext/win32lfn.py
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/hgext/win32lfn.py Mon Jan 24 00:55:28 2011 -0500
> @@ -0,0 +1,317 @@
> +# Copyright 2011 Aaron Cohen<aaron at assonance.org>
> +#
> +# This software may be used and distributed according to the terms of the
> +# GNU General Public License version 2 or any later version.
> +
> +'''Allow manipulating long file names
> +
> +=== Overview ===
> +
> +Allows creating working copy files whose path is longer than 260 characters on
> + Windows (up to ~32768 characters).
> +
> +Caveats:
These are all other reasons why long file names might be problematic on
windows, right? You might want to make it clear that it isn't
limitations of the extension.
> + - Some filesystems may have their own pathname restrictions, such as some FAT
> + filesystems. Use NTFS or a newer FAT.
> +
> + - cmd.exe has trouble manipulating long pathnames (del, move, rename will all
> + fail). Use powershell.
> +
> + - Many legacy Windows programs will have difficulty opening files with long
> + pathnames, though most java and 64-bit programs will work fine.
> +
> + - explorer.exe may have trouble manipulating directories with long paths,
> + with dialogs like, "The source file name(s) are larger than is supported
> + by the file system. Try moving to a location which has a shorter path name."
> + To address this, use a tool other than explorer.exe or delete the affected
> + files using :hg:`lfn --clean`.
> +
> + - Things get more complicated if the root of your repository is more than 244
> + characters long, including directory separators.
> +
> + - There is no way in Windows to "cd" into a directory that long. As a
> + result, to use hg with the repo, you will have to use
> + :hg:`-R` or :hg:`--repository`. Mercurial works fairly well in this case
> + and this extension does its best to workaround the lack, but there may be
> + some loss of functionality. For instance, if Mercurial launches an external
> + program such as the commit editor, it may or may not work.
> +
> + - When Mercurial first starts up, it will not be able to find the
> + ".hg" directory in such a repository until this extension is loaded.
> + This implies that this extension must be configured in either the
> + system-wide or user hgrc or mercurial.ini, not the per-repository
> + ".hg/hgrc".
> +
> +=== Configuration ===
> +
> +Enable the extension in the configuration file (mercurial.ini)::
> +
> + [extensions]
> + win32lfn=
> +'''
> +
> +import __builtin__, os, errno
> +
> +_errmap = None
What is the purpose of initializing to None? Any use before its real
initialization will fail anyway.
> +from mercurial import util, osutil
> +from mercurial.i18n import _
> +
> +_win32 = False
> +try:
> + import win32api, win32file, winerror, pywintypes
> +
> + _errmap = {
> + winerror.ERROR_ALREADY_EXISTS: errno.EEXIST,
> + winerror.ERROR_PATH_NOT_FOUND: errno.ENOENT
> + }
> + _win32 = True
> +except ImportError:
> + pass
> +
> +_uncprefix = "\\\\?\\"
> +
> +_deviceprefix = "\\\\.\\"
> +
> +_maxpath = 260
> +
> +# Windows directory paths are limited to MAX_PATH - the dos file size (8.3)
> +_maxdirpath = 248
> +
> +_warned = False
> +
> +_cwd = None
> +
> +# UNC filenames require different normalization than mercurial and python want
> +def unc(path):
It seems like "uncabspath" would be a more descriptive name.
> + if not path.startswith(_uncprefix) and not path.startswith(_deviceprefix):
> + path = os.path.abspath(path)
> + # path may now be UNC after abspath
It could be made more clear to the casual reader that os.path.abspath
isn't what it usually is.
Perhaps wrapabspath should be defined before this use - it seems like
that is the most fundamental operation.
> + if not path.startswith(_uncprefix):
> + if path.startswith("\\\\"):
> + # path is a UNC network path (ie, \\server\share\path)
> + # Believe it or not, the literal UNC here is part of
> + # Microsoft's API
> + path = _uncprefix + "UNC" + path[1:]
> + else:
> + path = _uncprefix + path
> + return path
> +
> +def wrap1(method):
> + def fn(*args, **kwargs):
> + path = unc(args[0])
> + return method(path, *args[1:], **kwargs)
> +
> + return fn
> +
> +def wrap2(method):
> + def fn(*args, **kwargs):
> + src = unc(args[0])
> + dst = unc(args[1])
> + return method(src, dst, *args[2:], **kwargs)
> +
> + return fn
> +
> +def lfnlistdir(path):
> + '''Wrap os.listdir with a version that handles long UNC paths.
> + The original version handles UNC format ok, but breaks if the path is
> + longer than MAX_PATH.
> + Contrary to the documentation available on the web, the use of long-UNC
> + paths has been possible with both the FindFiles and FindFilesW families of
> + functions since Windows XP, so we use the 1-character wide functions to be
> + consistant with the rest of Hg by not requiring unicode.
> + This may cause Windows95/2000 to still throw FileNameTooLong exceptions.'''
> + path = unc(path)
> + if not os.path.exists(path) or not os.path.isdir(path):
> + return []
> + files = win32file.FindFilesIterator(os.path.join(path, "*.*"))
> + result = []
> + for f in files:
> + file = f[8]
> + if not file == ".." and not file == ".":
> + result.append(file)
> + return result
> +
> +def lfnmkdir(path, mode=None):
> + '''Wrap os.mkdir with a version that handles long UNC paths.
> + The original version handles UNC format ok, but breaks if the path is
> + longer than MAX_PATH.
> + Contrary to the documentation available on the web, the use of long-UNC
> + paths has been possible with both the CreateDirectory and CreateDirectoryW
> + families of functions since Windows XP, so we use the 1-character wide
> + functions to be consistant with the rest of Hg by not requiring unicode.
> + This may cause Windows95/2000 to still throw FileNameTooLong exceptions.'''
> + path = unc(path)
> + try:
> + # second parameter is a security descriptor, mapping it up to our
> + # "mode" parameter is non-trivial and hopefully unnecessary
> + win32file.CreateDirectory(path, None)
> + except pywintypes.error, err:
> + if err.winerror in _errmap:
> + pyerrno = _errmap[err.winerror]
> + raise OSError(pyerrno, err.strerror)
> + raise
> +
> +def wrapabspath(abspath):
> + '''Wrap os.path.abspath with a version that handles long UNC paths.
> + The original version handles UNC format ok, but breaks if the path is
> + longer than MAX_PATH (it returns a relative path instead of an absolute
> + one).
> + In other words, with the original version:
> +>>> os.path.abspath(30 * "123456789\\")
> +30 * "123456789\\"'''
> +
> + def lfnabspath(path):
> + result = path
> + if not os.path.isabs(result):
> + result = os.path.join(os.getcwd(), result)
> + result = os.path.normpath(result)
> + return result
> +
> + return lfnabspath
You can use extensions.wrapfunction to avoid repeating this pattern.
Mercurial only uses it to monkeypatch Mercurial, but it should work on
os.path as well.
> +
> +def _addmissingbackslash(path):
> + if path.endswith(":"):
> + path += "\\"
> + return path
Todays opinion: This function is only used once and doesn't improve
readability. It might be better to just inline it.
> +def wrapsplit(split):
> + '''Wrap os.path.split with a version that handles UNC paths.
> + The original version mostly handles UNC format ok, but breaks at the root
> + of a drive.
> + In other words, with the original version:
> +>>> os.path.split('\\\\?\\C:\\')
> +('\\\\?\\C:', '')
> + This is a problem, because it means join and split aren't inverses of each
> + other for UNC paths. Fixing this also fixes dirname which has the same
> + problem.'''
Another rationale could be that the result of
os.path.split('\\\\?\\C:\\') is very different from
os.path.split('C:\\'). This wrapper fixes that.
> + def lfnsplit(path):
> + result = split(path)
> + result = (_addmissingbackslash(result[0]), result[1])
> + return result
> +
> + return lfnsplit
> +
> +def wrapchdir(ui, chdir):
> + '''Wrap os.chdir with a version that handles long paths.
> + The Windows API has no SetCurrentDirectory function that takes a long
> + UNC path, so we emulate it internally. See:
> +http://social.msdn.microsoft.com/Forums/en/windowsgeneraldevelopmentissues/thread/7998d7ec-cf5a-4b5e-a554-13fa855e4a3d
> +
> + Where possible, we still call the original chdir function, but if we get a
> + long path we emit a warning and then emulate the chdir.
> + This is possible because all wrapped functions go through unc()
> + which converts the path to an absolute path.'''
> +
> + def lfnchdir(path):
> + path = os.path.abspath(path)
> + if len(path)>= _maxdirpath:
Is this method reliable or not? If it is reliable then why not always
use it? If it isn't reliable then why use it at all?
> + global _warned
> + if not _warned:
> + ui.warn(_("Warning, your repository contains a dir, %s, which \
> +is longer than fully supported by the OS. Attempting to workaround this. \
> +Please report any problems you encounter after seeing this message to \
> +aaron at assonance.org or the mercurial mailing list.") % path)
Warnings are generally lowercase and starts with "warning:" and are less
than 80 characters.
I guess the problem isn't that the path is too long but that we chdir to it.
The warning also shows the path. Only showing the first path could be
confusing. I think it is better to show all the problems and not just
the first warning.
> + _warned = True
> + path = unc(path)
> + else:
> + chdir(path)
> + if os.path.exists(path):
> + # I'd like to use an environment variable so subprocesses get the
> + # correct cwd, but python environ can't store environment vars in
> + # a different encoding from the "current". This is probably never
> + # going to work correctly for subprocess invocations anyway.
> + global _cwd
> + _cwd = path
> + else:
> + raise OSError(errno.ENOENT, _("Directory doesn't exist: %s") % path)
> +
> + return lfnchdir
> +
> +def wrapgetcwd(getcwd):
> + '''Wrap os.getcwd() to support our emulation of os.chdir
> + for long paths. Windows provides no API function to set the current
> + directory to a long path.'''
> +
> + def lfngetcwd():
> + if _cwd:
> + result = _cwd
> + else:
> + result = getcwd()
> + # Should I un-UNC long directories here?
> + return result
> +
> + return lfngetcwd
> +
> +def uisetup(ui):
> + if not _win32:
> + ui.warn(_("win32lfn: This extension requires the pywin32 extension\n"))
> + return
> + os.listdir = lfnlistdir
> + os.mkdir = lfnmkdir
> + os.path.abspath = wrapabspath(os.path.abspath)
> + os.path.split = wrapsplit(os.path.split)
> +
> + # No wrapping needed for os.makedirs
> +
> + os.chdir = wrapchdir(ui, os.chdir)
> + os.getcwd = wrapgetcwd(os.getcwd)
> +
> + os.stat = wrap1(os.stat)
> + os.lstat = wrap1(os.lstat)
> + os.open = wrap1(os.open)
> + os.chmod = wrap1(os.chmod)
> + os.remove = wrap1(os.remove)
> + os.unlink = wrap1(os.unlink)
> + os.rmdir = wrap1(os.rmdir)
> + os.removedirs = wrap1(os.removedirs)
> + os.rename = wrap2(os.rename)
> + os.renames = wrap2(os.renames)
> + __builtin__.open = wrap1(__builtin__.open)
> +
> + osutil.listdir = wrap1(osutil.listdir)
> + osutil.posixfile = wrap1(osutil.posixfile)
> +
> + util.posixfile = wrap1(util.posixfile)
> + util.makedirs = wrap1(util.makedirs)
> + util.rename = wrap2(util.rename)
> + util.copyfile = wrap2(util.copyfile)
> + util.copyfiles = wrap2(util.copyfiles)
> + util.unlinkpath = wrap1(util.unlinkpath)
> + util.unlink = wrap1(util.unlink)
> +
> +def list(ui, repo):
> + for root, _ignored, files in os.walk(repo.root):
> + for file in files:
> + if len(os.path.join(root, file))>= _maxpath:
> + ui.write(os.path.join(root, file) + "\n")
> +
> +def clean(ui, repo, force=False):
> + for root, _ignored, files in os.walk(repo.root):
> + for file in files:
> + if len(os.path.join(root, file))>= _maxpath:
> + path = os.path.join(root, file)
> + c = ui.promptchoice(_("Delete %s (yn)?") % path,
> + (_("&No"), _("&Yes")), 0)
> + if c or force:
> + util.unlinkpath(path)
> +
> +def lfn(ui, repo, clean=None):
> + '''Search for or delete files in the working copy that are longer than
> + MAX_PATH (260) characters.
> +
> + This may make it easier to deal with such files, since many Windows
> + programs are unable to.'''
> + if clean:
> + clean(ui, repo)
> + else:
> + list(ui, repo)
> +
> +cmdtable = {
> + "lfn": (lfn,
> + [('c', 'clean', None,
> + _('Prompt to delete files longer than MAX_PATH.'))],
No force option?
> + _('hg lfn [--clean]')),
> +}
> diff -r 77351c88dd10 -r fad7e6eb443a tests/test-win32lfn.py
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/tests/test-win32lfn.py Mon Jan 24 00:55:28 2011 -0500
> @@ -0,0 +1,237 @@
> +#coding: UTF8
> +import os, errno, re
> +
> +from mercurial import ui, hg, util
> +
> +from hgext import win32lfn
> +
> +win32lfn.uisetup(ui.ui())
> +
> +cwd = os.getcwd()
> +
> +# 299 chars, mixed slashes
> +name = "123456789\\" + 28 * "123456789/" + "123456789"
> +
> +uncname = win32lfn.unc(name)
> +
> +convolutedpath = "C:\\d/d\\d/././.\\..\\\\//."
> +
> +shortpath = "C:\\"
> +
> +# This is not a unicode string, it's a byte string
> +utfpath = (30 * u"123456789\\" + u"aאהרון").encode("windows-1255")
> +
> +def testcanonization():
> + expectedpath = "\\\\?\\" + cwd + 30 * "\\123456789"
> + assert uncname == expectedpath
> +
> + canonpath = win32lfn.unc(convolutedpath)
> + expected = "\\\\?\\C:\\d\\d"
> + assert canonpath == expected
> +
> + normpath = os.path.normpath(convolutedpath)
> + expected = "C:\\d\\d"
> + assert normpath == expected
> +
> + shortunc = win32lfn.unc(shortpath)
> + expected = "\\\\?\\C:\\"
> + assert shortunc == expected
> +
> + assert os.path.dirname(shortunc) == shortunc
> +
> + head, tail = os.path.split(shortunc)
> + expected = "\\\\?\\C:\\"
> + assert head == expected, tail == ""
> +
> + # Tempting as it is, make sure we don't touch paths that were already UNC
> + assert win32lfn.unc("\\\\?\\" + convolutedpath) == \
> + "\\\\?\\" + convolutedpath
> +
> +def testbadchdir():
> + thrown = False
> + try:
> + os.chdir("Blargh")
> + except OSError, err:
> + assert err.errno == errno.ENOENT
> + thrown = True
> + assert thrown
> +
> +def computetestpaths():
> + parent = os.path.normpath(os.path.join(name, ".."))
> +
> + testpaths = [(parent, "123456789"), (cwd, name), (cwd, uncname), (cwd, utfpath)]
> +
> + # Verify "\\servername\share\repo" works, using \\localhost\c$
> + # This only works if the test is being run from a local drive, not a
> + # network share
> + index = uncname.find(":")
> + if index != -1:
> + drive, tail = re.findall("\\\\?\\\(.):(.*)", uncname)[0]
> + sharename = r"\\localhost\%s$%s" % (drive, tail)
> + testpaths.append((cwd, sharename))
> + else:
> + print "Skipping \\localhost\c$ tests, cwd is not on a local drive"
> +
> + return testpaths
> +
> +def cleanup(d):
> + if not os.path.exists(d):
> + return
> + for root, dirs, files in os.walk(d):
> + for file in files:
> + f = os.path.join(root, file)
> + if os.path.isfile(f):
> + os.unlink(f)
> + if not dirs:
> + os.removedirs(root)
> +
> +def testos(root, d):
> + print "Running os tests for %s, %s" % (root, d)
> + f1 = os.path.join(d, "f1")
> + f2 = os.path.join(d, "f2")
> +
> + assert os.path.split(f1) == (d, "f1")
> + assert os.path.dirname(f2) == d
> + assert os.path.basename(f2) == "f2"
> +
> + # Make the root if it doesn't exist and chdir into it, to test chdir
> + if not os.path.exists(root):
> + os.makedirs(root)
> + os.chdir(root)
> +
> + os.makedirs(d)
> +
> + assert os.path.exists(d)
> + assert os.path.isdir(d)
> +
> + os.rmdir(d)
> + assert not os.path.exists(d)
> +
> + # os.mkdir must raise EEXIST if the directory exists
> + os.mkdir(d)
> + thrown = False
> + try:
> + os.mkdir(d)
> + except OSError, err:
> + thrown = True
> + assert err.errno == errno.EEXIST
> + assert thrown
> +
> + f = open(f1, 'w')
> + f.write("Test")
> + f.close()
> +
> + f = open(f1, 'r')
> + assert f.readline() == "Test"
> + f.close()
> +
> + os.stat(f1)
> + os.lstat(f1)
> + os.chmod(f1, 660)
> + assert os.path.isfile(f1)
> +
> + files = os.listdir(d)
> + assert len(files) == 1
> + assert os.path.basename(f1) in files
> +
> + os.rename(f1, f2)
> + os.stat(f2)
> + assert not os.path.exists(f1)
> +
> + fd = os.open(f1, os.O_CREAT | os.O_BINARY | os.O_RDWR)
> + os.write(fd, "Test2")
> + os.close(fd)
> +
> + os.remove(f1)
> + os.unlink(f2)
> + os.removedirs(d)
> + assert not os.path.exists(os.path.join(cwd, "123456789"))
> +
> + os.chdir(cwd)
> +
> +def testutil(root, d):
> + from mercurial import util, osutil
> +
> + print "Running util tests for %s, %s" % (root, d)
> +
> + if not os.path.exists(root):
> + util.makedirs(root)
> + os.chdir(root)
> +
> + util.makedirs(d)
> + assert os.path.exists(d)
> +
> + f1 = os.path.join(d, "f1")
> + f2 = os.path.join(d, "f2")
> + f3 = os.path.join(d, "f3")
> +
> + f = util.posixfile(f1, 'w')
> + f.write("Test")
> + f.close()
> +
> + util.copyfile(f1, f2)
> +
> + f = osutil.posixfile(f3, 'w')
> + f.write("Test")
> + f.close()
> +
> + files = os.listdir(d)
> + assert len(files) == 3
> + assert os.path.basename(f1) in files and\
> + os.path.basename(f2) in files and\
> + os.path.basename(f3) in files
> +
> + files = osutil.listdir(d)
> + assert len(files) == 3
> + j = 1
> + for file in files:
> + assert file[0] == "f" + str(j)
> + j += 1
> +
> + util.copyfiles("123456789", "d123456789")
> +
> + os.remove(f1)
> + os.unlink(f2)
> + util.unlinkpath(f3)
> + assert not os.path.exists("123456789")
> +
> + # All the files still there after the copy?
> + os.rename("d123456789", "123456789")
> + files = os.listdir(d)
> + assert len(files) == 3
> + assert os.path.basename(f1) in files and\
> + os.path.basename(f2) in files and\
> + os.path.basename(f3) in files
> +
> + util.unlink(f1)
> + os.unlink(f2)
> + util.unlinkpath(f3)
> + assert not os.path.exists("123456789")
> +
> +def testclean():
> + print 'Testing "hg lfn clean"'
> +
> + util.makedirs(uncname)
> + f = util.posixfile(os.path.join(uncname, "f"), 'w')
> + f.write("Test")
> + f.close()
> +
> + u = ui.ui()
> + r = hg.repository(u, "123456789", create=True)
> +
> + win32lfn.clean(u, r, force=True)
> +
> + # The project root will still exist but everything else should be gone
> + assert not os.path.exists("123456789/123456789")
> + cleanup("123456789")
> +
> +
> +testcanonization()
> +testbadchdir()
> +# If the tests get interrupted or fail, this will cleanup
> +#cleanup("123456789")
> +#cleanup("d123456789")
> +for test in computetestpaths():
> + testos(*test)
> + testutil(*test)
> +testclean()
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list