[Updated] D10834: rust: Parse "subinclude"d files along the way, not later
SimonSapin
phabricator at mercurial-scm.org
Mon Jun 7 07:45:58 UTC 2021
Closed by commit rHGf6bb181c75f8: rust: Parse "subinclude"d files along the way, not later (authored by SimonSapin).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs Review".
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D10834?vs=28436&id=28467
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D10834/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D10834
AFFECTED FILES
rust/hg-core/src/filepatterns.rs
rust/hg-core/src/matchers.rs
rust/hg-cpython/src/dirstate/status.rs
CHANGE DETAILS
diff --git a/rust/hg-cpython/src/dirstate/status.rs b/rust/hg-cpython/src/dirstate/status.rs
--- a/rust/hg-cpython/src/dirstate/status.rs
+++ b/rust/hg-cpython/src/dirstate/status.rs
@@ -211,12 +211,9 @@
.collect();
let ignore_patterns = ignore_patterns?;
- let mut all_warnings = vec![];
- let (matcher, warnings) =
- IncludeMatcher::new(ignore_patterns, &root_dir)
- .map_err(|e| handle_fallback(py, e.into()))?;
- all_warnings.extend(warnings);
+ let matcher = IncludeMatcher::new(ignore_patterns)
+ .map_err(|e| handle_fallback(py, e.into()))?;
let (status_res, warnings) = dmap
.status(
@@ -234,9 +231,7 @@
)
.map_err(|e| handle_fallback(py, e))?;
- all_warnings.extend(warnings);
-
- build_response(py, status_res, all_warnings)
+ build_response(py, status_res, warnings)
}
e => Err(PyErr::new::<ValueError, _>(
py,
diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -11,7 +11,7 @@
dirstate::dirs_multiset::DirsChildrenMultiset,
filepatterns::{
build_single_regex, filter_subincludes, get_patterns_from_file,
- PatternFileWarning, PatternResult, SubInclude,
+ PatternFileWarning, PatternResult,
},
utils::{
files::find_dirs,
@@ -237,7 +237,7 @@
/// ///
/// let ignore_patterns =
/// vec![IgnorePattern::new(PatternSyntax::RootGlob, b"this*", Path::new(""))];
-/// let (matcher, _) = IncludeMatcher::new(ignore_patterns, "".as_ref()).unwrap();
+/// let matcher = IncludeMatcher::new(ignore_patterns).unwrap();
/// ///
/// assert_eq!(matcher.matches(HgPath::new(b"testing")), false);
/// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true);
@@ -341,8 +341,8 @@
/// Returns the regex pattern and a function that matches an `HgPath` against
/// said regex formed by the given ignore patterns.
-fn build_regex_match<'a>(
- ignore_patterns: &'a [&'a IgnorePattern],
+fn build_regex_match(
+ ignore_patterns: &[IgnorePattern],
) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
let mut regexps = vec![];
let mut exact_set = HashSet::new();
@@ -478,32 +478,25 @@
/// Returns a function that checks whether a given file (in the general sense)
/// should be matched.
fn build_match<'a, 'b>(
- ignore_patterns: &'a [IgnorePattern],
- root_dir: &Path,
-) -> PatternResult<(
- Vec<u8>,
- Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
- Vec<PatternFileWarning>,
-)> {
+ ignore_patterns: Vec<IgnorePattern>,
+) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)> {
let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
// For debugging and printing
let mut patterns = vec![];
- let mut all_warnings = vec![];
- let (subincludes, ignore_patterns) =
- filter_subincludes(ignore_patterns, root_dir)?;
+ let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
if !subincludes.is_empty() {
// Build prefix-based matcher functions for subincludes
let mut submatchers = FastHashMap::default();
let mut prefixes = vec![];
- for SubInclude { prefix, root, path } in subincludes.into_iter() {
- let (match_fn, warnings) =
- get_ignore_function(vec![path.to_path_buf()], &root)?;
- all_warnings.extend(warnings);
- prefixes.push(prefix.to_owned());
- submatchers.insert(prefix.to_owned(), match_fn);
+ for sub_include in subincludes {
+ let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
+ let match_fn =
+ Box::new(move |path: &HgPath| matcher.matches(path));
+ prefixes.push(sub_include.prefix.clone());
+ submatchers.insert(sub_include.prefix.clone(), match_fn);
}
let match_subinclude = move |filename: &HgPath| {
@@ -556,14 +549,13 @@
}
Ok(if match_funcs.len() == 1 {
- (patterns, match_funcs.remove(0), all_warnings)
+ (patterns, match_funcs.remove(0))
} else {
(
patterns,
Box::new(move |f: &HgPath| -> bool {
match_funcs.iter().any(|match_func| match_func(f))
}),
- all_warnings,
)
})
}
@@ -588,8 +580,7 @@
all_patterns.extend(patterns.to_owned());
all_warnings.extend(warnings);
}
- let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
- all_warnings.extend(warnings);
+ let matcher = IncludeMatcher::new(all_patterns)?;
Ok((
Box::new(move |path: &HgPath| matcher.matches(path)),
all_warnings,
@@ -597,34 +588,26 @@
}
impl<'a> IncludeMatcher<'a> {
- pub fn new(
- ignore_patterns: Vec<IgnorePattern>,
- root_dir: &Path,
- ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
- let (patterns, match_fn, warnings) =
- build_match(&ignore_patterns, root_dir)?;
+ pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
let RootsDirsAndParents {
roots,
dirs,
parents,
} = roots_dirs_and_parents(&ignore_patterns)?;
-
let prefix = ignore_patterns.iter().any(|k| match k.syntax {
PatternSyntax::Path | PatternSyntax::RelPath => true,
_ => false,
});
+ let (patterns, match_fn) = build_match(ignore_patterns)?;
- Ok((
- Self {
- patterns,
- match_fn,
- prefix,
- roots,
- dirs,
- parents,
- },
- warnings,
- ))
+ Ok(Self {
+ patterns,
+ match_fn,
+ prefix,
+ roots,
+ dirs,
+ parents,
+ })
}
fn get_all_parents_children(&self) -> DirsChildrenMultiset {
@@ -810,14 +793,11 @@
#[test]
fn test_includematcher() {
// VisitchildrensetPrefix
- let (matcher, _) = IncludeMatcher::new(
- vec![IgnorePattern::new(
- PatternSyntax::RelPath,
- b"dir/subdir",
- Path::new(""),
- )],
- "".as_ref(),
- )
+ let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
+ PatternSyntax::RelPath,
+ b"dir/subdir",
+ Path::new(""),
+ )])
.unwrap();
let mut set = HashSet::new();
@@ -848,14 +828,11 @@
);
// VisitchildrensetRootfilesin
- let (matcher, _) = IncludeMatcher::new(
- vec![IgnorePattern::new(
- PatternSyntax::RootFiles,
- b"dir/subdir",
- Path::new(""),
- )],
- "".as_ref(),
- )
+ let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
+ PatternSyntax::RootFiles,
+ b"dir/subdir",
+ Path::new(""),
+ )])
.unwrap();
let mut set = HashSet::new();
@@ -886,14 +863,11 @@
);
// VisitchildrensetGlob
- let (matcher, _) = IncludeMatcher::new(
- vec![IgnorePattern::new(
- PatternSyntax::Glob,
- b"dir/z*",
- Path::new(""),
- )],
- "".as_ref(),
- )
+ let matcher = IncludeMatcher::new(vec![IgnorePattern::new(
+ PatternSyntax::Glob,
+ b"dir/z*",
+ Path::new(""),
+ )])
.unwrap();
let mut set = HashSet::new();
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -41,7 +41,7 @@
/// Appended to the regexp of globs
const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PatternSyntax {
/// A regular expression
Regexp,
@@ -65,6 +65,14 @@
Include,
/// A file of patterns to match against files under the same directory
SubInclude,
+ /// SubInclude with the result of parsing the included file
+ ///
+ /// Note: there is no ExpandedInclude because that expansion can be done
+ /// in place by replacing the Include pattern by the included patterns.
+ /// SubInclude requires more handling.
+ ///
+ /// Note: `Box` is used to minimize size impact on other enum variants
+ ExpandedSubInclude(Box<SubInclude>),
}
/// Transforms a glob pattern into a regex
@@ -218,7 +226,9 @@
PatternSyntax::Glob | PatternSyntax::RootGlob => {
[glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
}
- PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
+ PatternSyntax::Include
+ | PatternSyntax::SubInclude
+ | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
}
}
@@ -441,10 +451,10 @@
pub type PatternResult<T> = Result<T, PatternError>;
/// Wrapper for `read_pattern_file` that also recursively expands `include:`
-/// patterns.
+/// and `subinclude:` patterns.
///
-/// `subinclude:` is not treated as a special pattern here: unraveling them
-/// needs to occur in the "ignore" phase.
+/// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
+/// is used for the latter to form a tree of patterns.
pub fn get_patterns_from_file(
pattern_file: &Path,
root_dir: &Path,
@@ -453,18 +463,35 @@
let patterns = patterns
.into_iter()
.flat_map(|entry| -> PatternResult<_> {
- let IgnorePattern {
- syntax, pattern, ..
- } = &entry;
- Ok(match syntax {
+ Ok(match &entry.syntax {
PatternSyntax::Include => {
let inner_include =
- root_dir.join(get_path_from_bytes(&pattern));
+ root_dir.join(get_path_from_bytes(&entry.pattern));
let (inner_pats, inner_warnings) =
get_patterns_from_file(&inner_include, root_dir)?;
warnings.extend(inner_warnings);
inner_pats
}
+ PatternSyntax::SubInclude => {
+ let mut sub_include = SubInclude::new(
+ &root_dir,
+ &entry.pattern,
+ &entry.source,
+ )?;
+ let (inner_patterns, inner_warnings) =
+ get_patterns_from_file(
+ &sub_include.path,
+ &sub_include.root,
+ )?;
+ sub_include.included_patterns = inner_patterns;
+ warnings.extend(inner_warnings);
+ vec![IgnorePattern {
+ syntax: PatternSyntax::ExpandedSubInclude(Box::new(
+ sub_include,
+ )),
+ ..entry
+ }]
+ }
_ => vec![entry],
})
})
@@ -475,6 +502,7 @@
}
/// Holds all the information needed to handle a `subinclude:` pattern.
+#[derive(Debug, PartialEq, Eq, Clone)]
pub struct SubInclude {
/// Will be used for repository (hg) paths that start with this prefix.
/// It is relative to the current working directory, so comparing against
@@ -484,6 +512,8 @@
pub path: PathBuf,
/// Folder in the filesystem where this it applies
pub root: PathBuf,
+
+ pub included_patterns: Vec<IgnorePattern>,
}
impl SubInclude {
@@ -513,29 +543,25 @@
})?,
path: path.to_owned(),
root: new_root.to_owned(),
+ included_patterns: Vec::new(),
})
}
}
/// Separate and pre-process subincludes from other patterns for the "ignore"
/// phase.
-pub fn filter_subincludes<'a>(
- ignore_patterns: &'a [IgnorePattern],
- root_dir: &Path,
-) -> Result<(Vec<SubInclude>, Vec<&'a IgnorePattern>), HgPathError> {
+pub fn filter_subincludes(
+ ignore_patterns: Vec<IgnorePattern>,
+) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> {
let mut subincludes = vec![];
let mut others = vec![];
- for ignore_pattern in ignore_patterns.iter() {
- let IgnorePattern {
- syntax,
- pattern,
- source,
- } = ignore_pattern;
- if *syntax == PatternSyntax::SubInclude {
- subincludes.push(SubInclude::new(root_dir, pattern, &source)?);
+ for pattern in ignore_patterns {
+ if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
+ {
+ subincludes.push(sub_include);
} else {
- others.push(ignore_pattern)
+ others.push(pattern)
}
}
Ok((subincludes, others))
To: SimonSapin, #hg-reviewers, Alphare
Cc: Alphare, mercurial-patches
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurial-scm.org/pipermail/mercurial-patches/attachments/20210607/f6013372/attachment-0002.html>
More information about the Mercurial-patches
mailing list