[PATCH 1 of 2 V3] fixed compare filelog rev with file content, besides meta-info content
alexrayne
alexraynepe196 at gmail.com
Sun Sep 26 20:31:34 UTC 2021
# HG changeset patch
# User alexrayne <alexraynepe196 at gmail.com>
# Date 1632687557 -10800
# Sun Sep 26 23:19:17 2021 +0300
# Branch stable
# Node ID bf4231a64fb22e0268aea344f03fbab1708ca3a5
# Parent faeb0ef5079e5faeac35283873aab02c84b5cdb2
fixed compare filelog rev with file content, besides meta-info content.
(patch for issue6588)
* if meta-info of files can vary, comarison of such revs text-body come non-trivial.
status detection of files in WC rely on this comparison. So here provided
compare that can definitely ignore meta, and give right comparison of text-body.
* filelog.size() now returns rev textbody len, without meta-info
* provided storageutil.filerev_content handle intended for filerev text and meta
access speedup. filelog caches revs info for size, and meta access
* filelog.meta(node ) provide easy access to rev meta-info with filerev_content
diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -980,14 +980,9 @@
)
if fctx._filenode is None:
- if self._repo._encodefilterpats:
- # can't rely on size() because wdir content may be decoded
- return self._filelog.cmp(self._filenode, fctx.data())
- if self.size() - 4 == fctx.size():
- # size() can match:
- # if file data starts with '\1\n', empty metadata block is
- # prepended, which adds 4 bytes to filelog.size().
- return self._filelog.cmp(self._filenode, fctx.data())
+ # since fctx have no metadata yet we compare file-content only
+ return self._filelog.cmp(self._filenode, fctx.data())
+
if self.size() == fctx.size() or self.flags() == b'l':
# size() matches: need to compare content
# issue6456: Always compare symlinks because size can represent
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -40,12 +40,26 @@
self.nullid = self._revlog.nullid
opts = opener.options
self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
+ self._infocache = {}
def __len__(self):
return len(self._revlog)
def __iter__(self):
return self._revlog.__iter__()
+
+ def _revinfo(self, node):
+ if node in self._infocache:
+ return self._infocache[node]
+ info = storageutil.FilerevContent(self._revlog, node)
+ self._infocache[node] = info
+ return info
+
+ def _revinfoinvalidate(self, node = None):
+ if node:
+ del self._infocache[node]
+ else:
+ self._infocache = {}
def hasnode(self, node):
if node in (self.nullid, nullrev):
@@ -177,16 +191,21 @@
return self._revlog.getstrippoint(minlink)
def strip(self, minlink, transaction):
+ self._revinfoinvalidate()
return self._revlog.strip(minlink, transaction)
def censorrevision(self, tr, node, tombstone=b''):
+ self._revinfoinvalidate(node)
return self._revlog.censorrevision(tr, node, tombstone=tombstone)
def files(self):
return self._revlog.files()
def read(self, node):
- return storageutil.filtermetadata(self.revision(node))
+ return self._revinfo(node).text()
+
+ def meta(self, node):
+ return self._revinfo(node).meta
def add(self, text, meta, transaction, link, p1=None, p2=None):
if meta or text.startswith(b'\1\n'):
@@ -195,27 +214,22 @@
return self.node(rev)
def renamed(self, node):
- return storageutil.filerevisioncopied(self, node)
+ return self._revinfo(node).filerevisioncopied()
def size(self, rev):
"""return the size of a given revision"""
- # for revisions with renames, we have to go the slow way
- node = self.node(rev)
- if self.renamed(node):
- return len(self.read(node))
if self.iscensored(rev):
return 0
- # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
- return self._revlog.size(rev)
+ return self._revinfo(self.node(rev)).textsize();
def cmp(self, node, text):
"""compare text with a given file revision
returns True if text is different than what is stored.
"""
- return not storageutil.filedataequivalent(self, node, text)
+ return not self._revinfo(node).filedataequivalent(text)
def verifyintegrity(self, state):
return self._revlog.verifyintegrity(state)
diff --git a/mercurial/utils/storageutil.py b/mercurial/utils/storageutil.py
--- a/mercurial/utils/storageutil.py
+++ b/mercurial/utils/storageutil.py
@@ -105,6 +105,87 @@
return text[offset + 2 :]
+class FilerevContent(object):
+ node = None
+ meta = None
+
+ # meta_offset = None
+ textoffs = 0
+ textlen = 0
+ raw = None
+
+ def __init__(self, store, nodeorrev):
+ self.store = store
+ if isinstance(nodeorrev, int):
+ self.node = store.node(nodeorrev)
+ else:
+ self.node = nodeorrev
+
+ self.raw = store.revision(nodeorrev)
+ if self.raw.startswith(b'\x01\n'):
+ offset = self.raw.index(b'\x01\n', 2)
+ self.textoffs = offset+2
+ self.meta = parsemeta(self.raw)[0]
+ self.textlen = len(self.raw) - self.textoffs
+
+ def text(self):
+ return self.raw[self.textoffs :]
+
+ def textsize(self):
+ return self.textlen
+
+ def issametext(self, data):
+ return self.raw.endswith(data, self.textoffs)
+
+ def iscopied(self):
+ if not self.meta:
+ return False
+ return (b'copy' in self.meta and b'copyrev' in self.meta)
+
+ def filerevisioncopied(self):
+ if self.iscopied():
+ # copy and copyrev occur in pairs. In rare cases due to old bugs,
+ # one can occur without the other. So ensure both are present to flag
+ # as a copy.
+ return self.meta[b'copy'], bin(self.meta[b'copyrev'])
+ return False
+
+ def iscensoreddata(self):
+ if self.meta:
+ return ( b'censored' in self.meta )
+ return False
+
+ def iscensorednode(self):
+ return self.store.iscensored( self.store.rev(self.node) );
+
+ def filedataequivalent(self, filedata):
+ """Determines whether file data is equivalent to a stored node.
+
+ Returns True if the passed file data would hash to the same value
+ as a stored revision and False otherwise.
+
+ When a stored revision is censored, filedata must be empty to have
+ equivalence.
+
+ When a stored revision has copy metadata, it is ignored as part
+ of the compare.
+ """
+ if self.textlen == len(filedata):
+ # calculating cache too expensive if we alredy have raw data to compare
+ if self.issametext(filedata):
+ return True
+
+ # Censored files compare against the empty file.
+ if self.iscensoreddata() or self.iscensorednode():
+ return filedata == b''
+
+ # Renaming a file produces a different hash, even if the data
+ # remains unchanged. Check if that's the case.
+ if self.iscopied():
+ return self.issametext(filedata)
+
+ return False
+
def filerevisioncopied(store, node):
"""Resolve file revision copy metadata.
@@ -126,41 +207,8 @@
def filedataequivalent(store, node, filedata):
- """Determines whether file data is equivalent to a stored node.
-
- Returns True if the passed file data would hash to the same value
- as a stored revision and False otherwise.
-
- When a stored revision is censored, filedata must be empty to have
- equivalence.
-
- When a stored revision has copy metadata, it is ignored as part
- of the compare.
- """
-
- if filedata.startswith(b'\x01\n'):
- revisiontext = b'\x01\n\x01\n' + filedata
- else:
- revisiontext = filedata
-
- p1, p2 = store.parents(node)
-
- computednode = hashrevisionsha1(revisiontext, p1, p2)
-
- if computednode == node:
- return True
-
- # Censored files compare against the empty file.
- if store.iscensored(store.rev(node)):
- return filedata == b''
-
- # Renaming a file produces a different hash, even if the data
- # remains unchanged. Check if that's the case.
- if store.renamed(node):
- return store.read(node) == filedata
-
- return False
-
+ info = FilerevContent(store, node)
+ return info.filedataequivalent(filedata)
def iterrevs(storelen, start=0, stop=None):
"""Iterate over revision numbers in a store."""
@@ -233,7 +281,6 @@
raise error.LookupError(fileid, identifier, _(b'no match found'))
-
def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
"""Resolve information needed to strip revisions.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mercurial-1.patch
Type: text/x-patch
Size: 9158 bytes
Desc: not available
URL: <http://www.mercurial-scm.org/pipermail/mercurial-devel/attachments/20210926/7fb1943f/attachment.bin>
More information about the Mercurial-devel
mailing list