[Request] [+--- ] D10834: rust: Parse "subinclude"d files along the way, not later
SimonSapin
phabricator at mercurial-scm.org
Fri Jun 4 09:03:30 UTC 2021
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.
REVISION SUMMARY
When parsing a `.hgignore` file and encountering an `include:` line,
the included file is parsed recursively right then in a depth-first fashion.
With `subinclude:` however included files were parsed (recursively) much later.
This changes it to be expanded during parsing, like `.hgignore`.
The motivation for this is an upcoming changeset that needs to detect changes
in which files are ignored or not. The plan is to hash all ignore files while
they are being read, and store that hash in the dirstate (in v2 format).
In order to allow a potential alternative implementations to read that format,
the algorithm to compute that hash must be documented. Having a well-defined
depth-first ordering for the tree of (sub-)included files makes that easier.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D10834
AFFECTED FILES
rust/hg-core/src/filepatterns.rs
rust/hg-core/src/matchers.rs
rust/hg-cpython/src/dirstate/status.rs
CHANGE DETAILS
diff --git a/rust/hg-cpython/src/dirstate/status.rs b/rust/hg-cpython/src/dirstate/status.rs
--- a/rust/hg-cpython/src/dirstate/status.rs
+++ b/rust/hg-cpython/src/dirstate/status.rs
@@ -211,12 +211,9 @@
.collect();
let ignore_patterns = ignore_patterns?;
- let mut all_warnings = vec![];
- let (matcher, warnings) =
- IncludeMatcher::new(ignore_patterns, &root_dir)
- .map_err(|e| handle_fallback(py, e.into()))?;
- all_warnings.extend(warnings);
+ let matcher = IncludeMatcher::new(ignore_patterns)
+ .map_err(|e| handle_fallback(py, e.into()))?;
let (status_res, warnings) = dmap
.status(
@@ -234,9 +231,7 @@
)
.map_err(|e| handle_fallback(py, e))?;
- all_warnings.extend(warnings);
-
- build_response(py, status_res, all_warnings)
+ build_response(py, status_res, warnings)
}
e => Err(PyErr::new::<ValueError, _>(
py,
diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -11,7 +11,7 @@
dirstate::dirs_multiset::DirsChildrenMultiset,
filepatterns::{
build_single_regex, filter_subincludes, get_patterns_from_file,
- PatternFileWarning, PatternResult, SubInclude,
+ PatternFileWarning, PatternResult,
},
utils::{
files::find_dirs,
@@ -341,8 +341,8 @@
/// Returns the regex pattern and a function that matches an `HgPath` against
/// said regex formed by the given ignore patterns.
-fn build_regex_match<'a>(
- ignore_patterns: &'a [&'a IgnorePattern],
+fn build_regex_match(
+ ignore_patterns: &[IgnorePattern],
) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
let mut regexps = vec![];
let mut exact_set = HashSet::new();
@@ -478,32 +478,25 @@
/// Returns a function that checks whether a given file (in the general sense)
/// should be matched.
fn build_match<'a, 'b>(
- ignore_patterns: &'a [IgnorePattern],
- root_dir: &Path,
-) -> PatternResult<(
- Vec<u8>,
- Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
- Vec<PatternFileWarning>,
-)> {
+ ignore_patterns: Vec<IgnorePattern>,
+) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)> {
let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
// For debugging and printing
let mut patterns = vec![];
- let mut all_warnings = vec![];
- let (subincludes, ignore_patterns) =
- filter_subincludes(ignore_patterns, root_dir)?;
+ let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
if !subincludes.is_empty() {
// Build prefix-based matcher functions for subincludes
let mut submatchers = FastHashMap::default();
let mut prefixes = vec![];
- for SubInclude { prefix, root, path } in subincludes.into_iter() {
- let (match_fn, warnings) =
- get_ignore_function(vec![path.to_path_buf()], &root)?;
- all_warnings.extend(warnings);
- prefixes.push(prefix.to_owned());
- submatchers.insert(prefix.to_owned(), match_fn);
+ for sub_include in subincludes {
+ let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
+ let match_fn =
+ Box::new(move |path: &HgPath| matcher.matches(path));
+ prefixes.push(sub_include.prefix.clone());
+ submatchers.insert(sub_include.prefix.clone(), match_fn);
}
let match_subinclude = move |filename: &HgPath| {
@@ -556,14 +549,13 @@
}
Ok(if match_funcs.len() == 1 {
- (patterns, match_funcs.remove(0), all_warnings)
+ (patterns, match_funcs.remove(0))
} else {
(
patterns,
Box::new(move |f: &HgPath| -> bool {
match_funcs.iter().any(|match_func| match_func(f))
}),
- all_warnings,
)
})
}
@@ -588,8 +580,7 @@
all_patterns.extend(patterns.to_owned());
all_warnings.extend(warnings);
}
- let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
- all_warnings.extend(warnings);
+ let matcher = IncludeMatcher::new(all_patterns)?;
Ok((
Box::new(move |path: &HgPath| matcher.matches(path)),
all_warnings,
@@ -597,34 +588,26 @@
}
impl<'a> IncludeMatcher<'a> {
- pub fn new(
- ignore_patterns: Vec<IgnorePattern>,
- root_dir: &Path,
- ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
- let (patterns, match_fn, warnings) =
- build_match(&ignore_patterns, root_dir)?;
+ pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
let RootsDirsAndParents {
roots,
dirs,
parents,
} = roots_dirs_and_parents(&ignore_patterns)?;
-
let prefix = ignore_patterns.iter().any(|k| match k.syntax {
PatternSyntax::Path | PatternSyntax::RelPath => true,
_ => false,
});
+ let (patterns, match_fn) = build_match(ignore_patterns)?;
- Ok((
- Self {
- patterns,
- match_fn,
- prefix,
- roots,
- dirs,
- parents,
- },
- warnings,
- ))
+ Ok(Self {
+ patterns,
+ match_fn,
+ prefix,
+ roots,
+ dirs,
+ parents,
+ })
}
fn get_all_parents_children(&self) -> DirsChildrenMultiset {
@@ -810,14 +793,11 @@
#[test]
fn test_includematcher() {
// VisitchildrensetPrefix
- let (matcher, _) = IncludeMatcher::new(
- vec![IgnorePattern::new(
- PatternSyntax::RelPath,
- b"dir/subdir",
- Path::new(""),
- )],
- "".as_ref(),
- )
+ let (matcher, _) = IncludeMatcher::new(vec![IgnorePattern::new(
+ PatternSyntax::RelPath,
+ b"dir/subdir",
+ Path::new(""),
+ )])
.unwrap();
let mut set = HashSet::new();
@@ -848,14 +828,11 @@
);
// VisitchildrensetRootfilesin
- let (matcher, _) = IncludeMatcher::new(
- vec![IgnorePattern::new(
- PatternSyntax::RootFiles,
- b"dir/subdir",
- Path::new(""),
- )],
- "".as_ref(),
- )
+ let (matcher, _) = IncludeMatcher::new(vec![IgnorePattern::new(
+ PatternSyntax::RootFiles,
+ b"dir/subdir",
+ Path::new(""),
+ )])
.unwrap();
let mut set = HashSet::new();
@@ -886,14 +863,11 @@
);
// VisitchildrensetGlob
- let (matcher, _) = IncludeMatcher::new(
- vec![IgnorePattern::new(
- PatternSyntax::Glob,
- b"dir/z*",
- Path::new(""),
- )],
- "".as_ref(),
- )
+ let (matcher, _) = IncludeMatcher::new(vec![IgnorePattern::new(
+ PatternSyntax::Glob,
+ b"dir/z*",
+ Path::new(""),
+ )])
.unwrap();
let mut set = HashSet::new();
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -41,7 +41,7 @@
/// Appended to the regexp of globs
const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PatternSyntax {
/// A regular expression
Regexp,
@@ -65,6 +65,14 @@
Include,
/// A file of patterns to match against files under the same directory
SubInclude,
+ /// SubInclude with the result of parsing the included file
+ ///
+ /// Note: there is no ExpandedInclude because that expansion can be done
+ /// in place by replacing the Include pattern by the included patterns.
+ /// SubInclude requires more handling.
+ ///
+ /// Note: `Box` is used to minimize size impact on other enum variants
+ ExpandedSubInclude(Box<SubInclude>),
}
/// Transforms a glob pattern into a regex
@@ -218,7 +226,9 @@
PatternSyntax::Glob | PatternSyntax::RootGlob => {
[glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
}
- PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
+ PatternSyntax::Include
+ | PatternSyntax::SubInclude
+ | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
}
}
@@ -441,10 +451,10 @@
pub type PatternResult<T> = Result<T, PatternError>;
/// Wrapper for `read_pattern_file` that also recursively expands `include:`
-/// patterns.
+/// and `subinclude:` patterns.
///
-/// `subinclude:` is not treated as a special pattern here: unraveling them
-/// needs to occur in the "ignore" phase.
+/// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
+/// is used for the latter to form a tree of patterns.
pub fn get_patterns_from_file(
pattern_file: &Path,
root_dir: &Path,
@@ -453,18 +463,35 @@
let patterns = patterns
.into_iter()
.flat_map(|entry| -> PatternResult<_> {
- let IgnorePattern {
- syntax, pattern, ..
- } = &entry;
- Ok(match syntax {
+ Ok(match &entry.syntax {
PatternSyntax::Include => {
let inner_include =
- root_dir.join(get_path_from_bytes(&pattern));
+ root_dir.join(get_path_from_bytes(&entry.pattern));
let (inner_pats, inner_warnings) =
get_patterns_from_file(&inner_include, root_dir)?;
warnings.extend(inner_warnings);
inner_pats
}
+ PatternSyntax::SubInclude => {
+ let mut sub_include = SubInclude::new(
+ &root_dir,
+ &entry.pattern,
+ &entry.source,
+ )?;
+ let (inner_patterns, inner_warnings) =
+ get_patterns_from_file(
+ &sub_include.path,
+ &sub_include.root,
+ )?;
+ sub_include.included_patterns = inner_patterns;
+ warnings.extend(inner_warnings);
+ vec![IgnorePattern {
+ syntax: PatternSyntax::ExpandedSubInclude(Box::new(
+ sub_include,
+ )),
+ ..entry
+ }]
+ }
_ => vec![entry],
})
})
@@ -475,6 +502,7 @@
}
/// Holds all the information needed to handle a `subinclude:` pattern.
+#[derive(Debug, PartialEq, Eq, Clone)]
pub struct SubInclude {
/// Will be used for repository (hg) paths that start with this prefix.
/// It is relative to the current working directory, so comparing against
@@ -484,6 +512,8 @@
pub path: PathBuf,
/// Folder in the filesystem where this it applies
pub root: PathBuf,
+
+ pub included_patterns: Vec<IgnorePattern>,
}
impl SubInclude {
@@ -513,29 +543,25 @@
})?,
path: path.to_owned(),
root: new_root.to_owned(),
+ included_patterns: Vec::new(),
})
}
}
/// Separate and pre-process subincludes from other patterns for the "ignore"
/// phase.
-pub fn filter_subincludes<'a>(
- ignore_patterns: &'a [IgnorePattern],
- root_dir: &Path,
-) -> Result<(Vec<SubInclude>, Vec<&'a IgnorePattern>), HgPathError> {
+pub fn filter_subincludes(
+ ignore_patterns: Vec<IgnorePattern>,
+) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> {
let mut subincludes = vec![];
let mut others = vec![];
- for ignore_pattern in ignore_patterns.iter() {
- let IgnorePattern {
- syntax,
- pattern,
- source,
- } = ignore_pattern;
- if *syntax == PatternSyntax::SubInclude {
- subincludes.push(SubInclude::new(root_dir, pattern, &source)?);
+ for pattern in ignore_patterns {
+ if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
+ {
+ subincludes.push(sub_include);
} else {
- others.push(ignore_pattern)
+ others.push(pattern)
}
}
Ok((subincludes, others))
To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mercurial-scm.org/pipermail/mercurial-patches/attachments/20210604/94694961/attachment-0001.html>
More information about the Mercurial-patches
mailing list