[PATCH 1 of 2 V3] fixed compare filelog rev with file content, besides meta-info content

alexrayne alexraynepe196 at gmail.com
Sun Sep 26 20:31:34 UTC 2021


# HG changeset patch
# User alexrayne <alexraynepe196 at gmail.com>
# Date 1632687557 -10800
#      Sun Sep 26 23:19:17 2021 +0300
# Branch stable
# Node ID bf4231a64fb22e0268aea344f03fbab1708ca3a5
# Parent  faeb0ef5079e5faeac35283873aab02c84b5cdb2
fixed compare filelog rev with file content, besides meta-info content.
        (patch for issue6588)

* if meta-info of files can vary, comarison of such revs text-body come non-trivial.
  status detection of files in WC rely on this comparison. So here provided
  compare that can definitely ignore meta, and give right comparison of text-body.

* filelog.size() now returns rev textbody len, without meta-info

* provided storageutil.filerev_content handle intended for filerev text and meta
        access speedup. filelog caches revs info for size, and meta access

* filelog.meta(node ) provide easy access to rev meta-info with filerev_content

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -980,14 +980,9 @@
             )
 
         if fctx._filenode is None:
-            if self._repo._encodefilterpats:
-                # can't rely on size() because wdir content may be decoded
-                return self._filelog.cmp(self._filenode, fctx.data())
-            if self.size() - 4 == fctx.size():
-                # size() can match:
-                # if file data starts with '\1\n', empty metadata block is
-                # prepended, which adds 4 bytes to filelog.size().
-                return self._filelog.cmp(self._filenode, fctx.data())
+            # since fctx have no metadata yet we compare file-content only
+            return self._filelog.cmp(self._filenode, fctx.data())
+            
         if self.size() == fctx.size() or self.flags() == b'l':
             # size() matches: need to compare content
             # issue6456: Always compare symlinks because size can represent
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -40,12 +40,26 @@
         self.nullid = self._revlog.nullid
         opts = opener.options
         self._fix_issue6528 = opts.get(b'issue6528.fix-incoming', True)
+        self._infocache = {}
 
     def __len__(self):
         return len(self._revlog)
 
     def __iter__(self):
         return self._revlog.__iter__()
+    
+    def _revinfo(self, node):
+        if node in self._infocache:
+            return self._infocache[node]
+        info = storageutil.FilerevContent(self._revlog, node)
+        self._infocache[node] = info
+        return info
+    
+    def _revinfoinvalidate(self, node = None):
+        if node:
+            del self._infocache[node]
+        else:
+            self._infocache = {}
 
     def hasnode(self, node):
         if node in (self.nullid, nullrev):
@@ -177,16 +191,21 @@
         return self._revlog.getstrippoint(minlink)
 
     def strip(self, minlink, transaction):
+        self._revinfoinvalidate()
         return self._revlog.strip(minlink, transaction)
 
     def censorrevision(self, tr, node, tombstone=b''):
+        self._revinfoinvalidate(node)
         return self._revlog.censorrevision(tr, node, tombstone=tombstone)
 
     def files(self):
         return self._revlog.files()
 
     def read(self, node):
-        return storageutil.filtermetadata(self.revision(node))
+        return self._revinfo(node).text()
+    
+    def meta(self, node):
+        return self._revinfo(node).meta
 
     def add(self, text, meta, transaction, link, p1=None, p2=None):
         if meta or text.startswith(b'\1\n'):
@@ -195,27 +214,22 @@
         return self.node(rev)
 
     def renamed(self, node):
-        return storageutil.filerevisioncopied(self, node)
+        return self._revinfo(node).filerevisioncopied()
 
     def size(self, rev):
         """return the size of a given revision"""
 
-        # for revisions with renames, we have to go the slow way
-        node = self.node(rev)
-        if self.renamed(node):
-            return len(self.read(node))
         if self.iscensored(rev):
             return 0
 
-        # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
-        return self._revlog.size(rev)
+        return self._revinfo(self.node(rev)).textsize();
 
     def cmp(self, node, text):
         """compare text with a given file revision
 
         returns True if text is different than what is stored.
         """
-        return not storageutil.filedataequivalent(self, node, text)
+        return not self._revinfo(node).filedataequivalent(text)
 
     def verifyintegrity(self, state):
         return self._revlog.verifyintegrity(state)
diff --git a/mercurial/utils/storageutil.py b/mercurial/utils/storageutil.py
--- a/mercurial/utils/storageutil.py
+++ b/mercurial/utils/storageutil.py
@@ -105,6 +105,87 @@
     return text[offset + 2 :]
 
 
+class FilerevContent(object):
+    node = None
+    meta = None
+    
+    # meta_offset = None
+    textoffs   = 0
+    textlen    = 0
+    raw = None
+    
+    def __init__(self, store, nodeorrev):
+        self.store = store
+        if isinstance(nodeorrev, int):
+            self.node = store.node(nodeorrev)
+        else:
+            self.node = nodeorrev
+        
+        self.raw = store.revision(nodeorrev)
+        if self.raw.startswith(b'\x01\n'):
+            offset = self.raw.index(b'\x01\n', 2)
+            self.textoffs  = offset+2
+            self.meta = parsemeta(self.raw)[0]
+        self.textlen = len(self.raw) - self.textoffs
+
+    def text(self):
+        return self.raw[self.textoffs :]
+
+    def textsize(self):
+        return self.textlen
+
+    def issametext(self, data):
+        return self.raw.endswith(data, self.textoffs)
+    
+    def iscopied(self):
+        if not self.meta:
+            return False
+        return (b'copy' in self.meta and b'copyrev' in self.meta)
+    
+    def filerevisioncopied(self):
+        if self.iscopied():
+            # copy and copyrev occur in pairs. In rare cases due to old bugs,
+            # one can occur without the other. So ensure both are present to flag
+            # as a copy.
+            return self.meta[b'copy'], bin(self.meta[b'copyrev'])
+        return False
+
+    def iscensoreddata(self):
+        if self.meta:
+            return ( b'censored' in self.meta )
+        return False 
+    
+    def iscensorednode(self):
+        return self.store.iscensored( self.store.rev(self.node) );
+    
+    def filedataequivalent(self, filedata):
+        """Determines whether file data is equivalent to a stored node.
+    
+        Returns True if the passed file data would hash to the same value
+        as a stored revision and False otherwise.
+    
+        When a stored revision is censored, filedata must be empty to have
+        equivalence.
+    
+        When a stored revision has copy metadata, it is ignored as part
+        of the compare.
+        """
+        if self.textlen == len(filedata):
+            # calculating cache too expensive if we alredy have raw data to compare
+            if self.issametext(filedata): 
+                return True
+
+        # Censored files compare against the empty file.
+        if self.iscensoreddata() or self.iscensorednode():
+            return filedata == b''
+    
+        # Renaming a file produces a different hash, even if the data
+        # remains unchanged. Check if that's the case.
+        if self.iscopied():
+            return self.issametext(filedata)
+    
+        return False
+
 def filerevisioncopied(store, node):
     """Resolve file revision copy metadata.
 
@@ -126,41 +207,8 @@
 
 
 def filedataequivalent(store, node, filedata):
-    """Determines whether file data is equivalent to a stored node.
-
-    Returns True if the passed file data would hash to the same value
-    as a stored revision and False otherwise.
-
-    When a stored revision is censored, filedata must be empty to have
-    equivalence.
-
-    When a stored revision has copy metadata, it is ignored as part
-    of the compare.
-    """
-
-    if filedata.startswith(b'\x01\n'):
-        revisiontext = b'\x01\n\x01\n' + filedata
-    else:
-        revisiontext = filedata
-
-    p1, p2 = store.parents(node)
-
-    computednode = hashrevisionsha1(revisiontext, p1, p2)
-
-    if computednode == node:
-        return True
-
-    # Censored files compare against the empty file.
-    if store.iscensored(store.rev(node)):
-        return filedata == b''
-
-    # Renaming a file produces a different hash, even if the data
-    # remains unchanged. Check if that's the case.
-    if store.renamed(node):
-        return store.read(node) == filedata
-
-    return False
-
+    info = FilerevContent(store, node)
+    return info.filedataequivalent(filedata)
 
 def iterrevs(storelen, start=0, stop=None):
     """Iterate over revision numbers in a store."""
@@ -233,7 +281,6 @@
 
     raise error.LookupError(fileid, identifier, _(b'no match found'))
 
-
 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
     """Resolve information needed to strip revisions.
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: mercurial-1.patch
Type: text/x-patch
Size: 9158 bytes
Desc: not available
URL: <http://www.mercurial-scm.org/pipermail/mercurial-devel/attachments/20210926/7fb1943f/attachment.bin>


More information about the Mercurial-devel mailing list