[Request] [++---- ] D8661: rust-status: refactor status into a struct

Alphare (Raphaël Gomès) phabricator at mercurial-scm.org
Thu Jun 25 09:03:29 UTC 2020


Alphare created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The code for `dirstate/status` has grown too large for comfort, this is the
  first of three patches that try to improve maintainability.
  In this patch, refactoring dirstate's status into a struct allows for slimming
  down function signatures drastically, keeping the mental (and maintenance)
  burden lower, since pretty much all of them shared a few common arguments.
  
  This had the pleasant side-effect of simplifying lifetimes a little. This has
  no observable impact on performance.
  
  The next patch will add/improve documentation and refactor some types. I tried
  to keep new code down to a minimum in this patch because it's already pretty
  big.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D8661

AFFECTED FILES
  rust/hg-core/src/dirstate/status.rs
  rust/hg-cpython/src/dirstate/status.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/dirstate/status.rs b/rust/hg-cpython/src/dirstate/status.rs
--- a/rust/hg-cpython/src/dirstate/status.rs
+++ b/rust/hg-cpython/src/dirstate/status.rs
@@ -127,7 +127,7 @@
             let ((lookup, status_res), warnings) = status(
                 &dmap,
                 &matcher,
-                &root_dir,
+                root_dir.to_path_buf(),
                 ignore_files,
                 StatusOptions {
                     check_exec,
@@ -164,7 +164,7 @@
             let ((lookup, status_res), warnings) = status(
                 &dmap,
                 &matcher,
-                &root_dir,
+                root_dir.to_path_buf(),
                 ignore_files,
                 StatusOptions {
                     check_exec,
@@ -219,7 +219,7 @@
             let ((lookup, status_res), warnings) = status(
                 &dmap,
                 &matcher,
-                &root_dir,
+                root_dir.to_path_buf(),
                 ignore_files,
                 StatusOptions {
                     check_exec,
diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -73,7 +73,7 @@
 /// Is similar to `crate::EntryState`, but represents the transient state of
 /// entries during the lifetime of a command.
 #[derive(Debug, Copy, Clone)]
-enum Dispatch {
+pub enum Dispatch {
     Unsure,
     Modified,
     Added,
@@ -214,88 +214,6 @@
     };
 }
 
-/// Get stat data about the files explicitly specified by match.
-/// TODO subrepos
-#[timed]
-fn walk_explicit<'a>(
-    files: Option<&'a HashSet<&HgPath>>,
-    dmap: &'a DirstateMap,
-    root_dir: impl AsRef<Path> + Sync + Send + 'a,
-    options: StatusOptions,
-    traversed_sender: crossbeam::Sender<HgPathBuf>,
-) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
-    files
-        .unwrap_or(&DEFAULT_WORK)
-        .par_iter()
-        .map(move |&filename| {
-            // TODO normalization
-            let normalized = filename;
-
-            let buf = match hg_path_to_path_buf(normalized) {
-                Ok(x) => x,
-                Err(e) => return Some(Err(e.into())),
-            };
-            let target = root_dir.as_ref().join(buf);
-            let st = target.symlink_metadata();
-            let in_dmap = dmap.get(normalized);
-            match st {
-                Ok(meta) => {
-                    let file_type = meta.file_type();
-                    return if file_type.is_file() || file_type.is_symlink() {
-                        if let Some(entry) = in_dmap {
-                            return Some(Ok((
-                                normalized,
-                                dispatch_found(
-                                    &normalized,
-                                    *entry,
-                                    HgMetadata::from_metadata(meta),
-                                    &dmap.copy_map,
-                                    options,
-                                ),
-                            )));
-                        }
-                        Some(Ok((normalized, Dispatch::Unknown)))
-                    } else if file_type.is_dir() {
-                        if options.collect_traversed_dirs {
-                            traversed_sender
-                                .send(normalized.to_owned())
-                                .expect("receiver should outlive sender");
-                        }
-                        Some(Ok((
-                            normalized,
-                            Dispatch::Directory {
-                                was_file: in_dmap.is_some(),
-                            },
-                        )))
-                    } else {
-                        Some(Ok((
-                            normalized,
-                            Dispatch::Bad(BadMatch::BadType(
-                                // TODO do more than unknown
-                                // Support for all `BadType` variant
-                                // varies greatly between platforms.
-                                // So far, no tests check the type and
-                                // this should be good enough for most
-                                // users.
-                                BadType::Unknown,
-                            )),
-                        )))
-                    };
-                }
-                Err(_) => {
-                    if let Some(entry) = in_dmap {
-                        return Some(Ok((
-                            normalized,
-                            dispatch_missing(entry.state),
-                        )));
-                    }
-                }
-            };
-            None
-        })
-        .flatten()
-}
-
 #[derive(Debug, Copy, Clone)]
 pub struct StatusOptions {
     /// Remember the most recent modification timeslot for status, to make
@@ -312,344 +230,6 @@
     pub collect_traversed_dirs: bool,
 }
 
-/// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
-/// If the entry is a folder that needs to be traversed, it will be handled
-/// in a separate thread.
-fn handle_traversed_entry<'a>(
-    scope: &rayon::Scope<'a>,
-    files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
-    matcher: &'a (impl Matcher + Sync),
-    root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
-    dmap: &'a DirstateMap,
-    old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
-    ignore_fn: &'a IgnoreFnType,
-    dir_ignore_fn: &'a IgnoreFnType,
-    options: StatusOptions,
-    filename: HgPathBuf,
-    dir_entry: DirEntry,
-    traversed_sender: crossbeam::Sender<HgPathBuf>,
-) -> IoResult<()> {
-    let file_type = dir_entry.file_type()?;
-    let entry_option = dmap.get(&filename);
-
-    if filename.as_bytes() == b".hg" {
-        // Could be a directory or a symlink
-        return Ok(());
-    }
-
-    if file_type.is_dir() {
-        handle_traversed_dir(
-            scope,
-            files_sender,
-            matcher,
-            root_dir,
-            dmap,
-            old_results,
-            ignore_fn,
-            dir_ignore_fn,
-            options,
-            entry_option,
-            filename,
-            traversed_sender,
-        );
-    } else if file_type.is_file() || file_type.is_symlink() {
-        if let Some(entry) = entry_option {
-            if matcher.matches_everything() || matcher.matches(&filename) {
-                let metadata = dir_entry.metadata()?;
-                files_sender
-                    .send(Ok((
-                        filename.to_owned(),
-                        dispatch_found(
-                            &filename,
-                            *entry,
-                            HgMetadata::from_metadata(metadata),
-                            &dmap.copy_map,
-                            options,
-                        ),
-                    )))
-                    .unwrap();
-            }
-        } else if (matcher.matches_everything() || matcher.matches(&filename))
-            && !ignore_fn(&filename)
-        {
-            if (options.list_ignored || matcher.exact_match(&filename))
-                && dir_ignore_fn(&filename)
-            {
-                if options.list_ignored {
-                    files_sender
-                        .send(Ok((filename.to_owned(), Dispatch::Ignored)))
-                        .unwrap();
-                }
-            } else if options.list_unknown {
-                files_sender
-                    .send(Ok((filename.to_owned(), Dispatch::Unknown)))
-                    .unwrap();
-            }
-        } else if ignore_fn(&filename) && options.list_ignored {
-            files_sender
-                .send(Ok((filename.to_owned(), Dispatch::Ignored)))
-                .unwrap();
-        }
-    } else if let Some(entry) = entry_option {
-        // Used to be a file or a folder, now something else.
-        if matcher.matches_everything() || matcher.matches(&filename) {
-            files_sender
-                .send(Ok((filename.to_owned(), dispatch_missing(entry.state))))
-                .unwrap();
-        }
-    }
-
-    Ok(())
-}
-
-/// A directory was found in the filesystem and needs to be traversed
-fn handle_traversed_dir<'a>(
-    scope: &rayon::Scope<'a>,
-    files_sender: &'a crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
-    matcher: &'a (impl Matcher + Sync),
-    root_dir: impl AsRef<Path> + Sync + Send + Copy + 'a,
-    dmap: &'a DirstateMap,
-    old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
-    ignore_fn: &'a IgnoreFnType,
-    dir_ignore_fn: &'a IgnoreFnType,
-    options: StatusOptions,
-    entry_option: Option<&'a DirstateEntry>,
-    directory: HgPathBuf,
-    traversed_sender: crossbeam::Sender<HgPathBuf>,
-) {
-    scope.spawn(move |_| {
-        // Nested `if` until `rust-lang/rust#53668` is stable
-        if let Some(entry) = entry_option {
-            // Used to be a file, is now a folder
-            if matcher.matches_everything() || matcher.matches(&directory) {
-                files_sender
-                    .send(Ok((
-                        directory.to_owned(),
-                        dispatch_missing(entry.state),
-                    )))
-                    .unwrap();
-            }
-        }
-        // Do we need to traverse it?
-        if !ignore_fn(&directory) || options.list_ignored {
-            traverse_dir(
-                files_sender,
-                matcher,
-                root_dir,
-                dmap,
-                directory,
-                &old_results,
-                ignore_fn,
-                dir_ignore_fn,
-                options,
-                traversed_sender,
-            )
-            .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
-        }
-    });
-}
-
-/// Decides whether the directory needs to be listed, and if so handles the
-/// entries in a separate thread.
-fn traverse_dir<'a>(
-    files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
-    matcher: &'a (impl Matcher + Sync),
-    root_dir: impl AsRef<Path> + Sync + Send + Copy,
-    dmap: &'a DirstateMap,
-    directory: impl AsRef<HgPath>,
-    old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
-    ignore_fn: &IgnoreFnType,
-    dir_ignore_fn: &IgnoreFnType,
-    options: StatusOptions,
-    traversed_sender: crossbeam::Sender<HgPathBuf>,
-) -> IoResult<()> {
-    let directory = directory.as_ref();
-
-    if options.collect_traversed_dirs {
-        traversed_sender
-            .send(directory.to_owned())
-            .expect("receiver should outlive sender");
-    }
-
-    let visit_entries = match matcher.visit_children_set(directory) {
-        VisitChildrenSet::Empty => return Ok(()),
-        VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
-        VisitChildrenSet::Set(set) => Some(set),
-    };
-    let buf = hg_path_to_path_buf(directory)?;
-    let dir_path = root_dir.as_ref().join(buf);
-
-    let skip_dot_hg = !directory.as_bytes().is_empty();
-    let entries = match list_directory(dir_path, skip_dot_hg) {
-        Err(e) => match e.kind() {
-            ErrorKind::NotFound | ErrorKind::PermissionDenied => {
-                files_sender
-                    .send(Ok((
-                        directory.to_owned(),
-                        Dispatch::Bad(BadMatch::OsError(
-                            // Unwrapping here is OK because the error always
-                            // is a real os error
-                            e.raw_os_error().unwrap(),
-                        )),
-                    )))
-                    .unwrap();
-                return Ok(());
-            }
-            _ => return Err(e),
-        },
-        Ok(entries) => entries,
-    };
-
-    rayon::scope(|scope| -> IoResult<()> {
-        for (filename, dir_entry) in entries {
-            if let Some(ref set) = visit_entries {
-                if !set.contains(filename.deref()) {
-                    continue;
-                }
-            }
-            // TODO normalize
-            let filename = if directory.is_empty() {
-                filename.to_owned()
-            } else {
-                directory.join(&filename)
-            };
-
-            if !old_results.contains_key(filename.deref()) {
-                handle_traversed_entry(
-                    scope,
-                    files_sender,
-                    matcher,
-                    root_dir,
-                    dmap,
-                    old_results,
-                    ignore_fn,
-                    dir_ignore_fn,
-                    options,
-                    filename,
-                    dir_entry,
-                    traversed_sender.clone(),
-                )?;
-            }
-        }
-        Ok(())
-    })
-}
-
-/// Walk the working directory recursively to look for changes compared to the
-/// current `DirstateMap`.
-///
-/// This takes a mutable reference to the results to account for the `extend`
-/// in timings
-#[timed]
-fn traverse<'a>(
-    matcher: &'a (impl Matcher + Sync),
-    root_dir: impl AsRef<Path> + Sync + Send + Copy,
-    dmap: &'a DirstateMap,
-    path: impl AsRef<HgPath>,
-    old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
-    ignore_fn: &IgnoreFnType,
-    dir_ignore_fn: &IgnoreFnType,
-    options: StatusOptions,
-    results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
-    traversed_sender: crossbeam::Sender<HgPathBuf>,
-) -> IoResult<()> {
-    let root_dir = root_dir.as_ref();
-
-    // The traversal is done in parallel, so use a channel to gather entries.
-    // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
-    let (files_transmitter, files_receiver) = crossbeam::channel::unbounded();
-
-    traverse_dir(
-        &files_transmitter,
-        matcher,
-        root_dir,
-        &dmap,
-        path,
-        &old_results,
-        &ignore_fn,
-        &dir_ignore_fn,
-        options,
-        traversed_sender,
-    )?;
-
-    // Disconnect the channel so the receiver stops waiting
-    drop(files_transmitter);
-
-    // TODO don't collect. Find a way of replicating the behavior of
-    // `itertools::process_results`, but for `rayon::ParallelIterator`
-    let new_results: IoResult<Vec<(Cow<'a, HgPath>, Dispatch)>> =
-        files_receiver
-            .into_iter()
-            .map(|item| {
-                let (f, d) = item?;
-                Ok((Cow::Owned(f), d))
-            })
-            .collect();
-
-    results.par_extend(new_results?);
-
-    Ok(())
-}
-
-/// Stat all entries in the `DirstateMap` and mark them for dispatch.
-fn stat_dmap_entries(
-    dmap: &DirstateMap,
-    root_dir: impl AsRef<Path> + Sync + Send,
-    options: StatusOptions,
-) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
-    dmap.par_iter().map(move |(filename, entry)| {
-        let filename: &HgPath = filename;
-        let filename_as_path = hg_path_to_path_buf(filename)?;
-        let meta = root_dir.as_ref().join(filename_as_path).symlink_metadata();
-
-        match meta {
-            Ok(ref m)
-                if !(m.file_type().is_file()
-                    || m.file_type().is_symlink()) =>
-            {
-                Ok((filename, dispatch_missing(entry.state)))
-            }
-            Ok(m) => Ok((
-                filename,
-                dispatch_found(
-                    filename,
-                    *entry,
-                    HgMetadata::from_metadata(m),
-                    &dmap.copy_map,
-                    options,
-                ),
-            )),
-            Err(ref e)
-                if e.kind() == ErrorKind::NotFound
-                    || e.raw_os_error() == Some(20) =>
-            {
-                // Rust does not yet have an `ErrorKind` for
-                // `NotADirectory` (errno 20)
-                // It happens if the dirstate contains `foo/bar` and
-                // foo is not a directory
-                Ok((filename, dispatch_missing(entry.state)))
-            }
-            Err(e) => Err(e),
-        }
-    })
-}
-
-/// This takes a mutable reference to the results to account for the `extend`
-/// in timings
-#[timed]
-fn extend_from_dmap<'a>(
-    dmap: &'a DirstateMap,
-    root_dir: impl AsRef<Path> + Sync + Send,
-    options: StatusOptions,
-    results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
-) {
-    results.par_extend(
-        stat_dmap_entries(dmap, root_dir, options)
-            .flatten()
-            .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch)),
-    );
-}
-
 #[derive(Debug)]
 pub struct DirstateStatus<'a> {
     pub modified: Vec<Cow<'a, HgPath>>,
@@ -664,6 +244,583 @@
     pub traversed: Vec<HgPathBuf>,
 }
 
+#[derive(Debug)]
+pub enum StatusError {
+    IO(std::io::Error),
+    Path(HgPathError),
+    Pattern(PatternError),
+}
+
+pub type StatusResult<T> = Result<T, StatusError>;
+
+impl From<PatternError> for StatusError {
+    fn from(e: PatternError) -> Self {
+        StatusError::Pattern(e)
+    }
+}
+impl From<HgPathError> for StatusError {
+    fn from(e: HgPathError) -> Self {
+        StatusError::Path(e)
+    }
+}
+impl From<std::io::Error> for StatusError {
+    fn from(e: std::io::Error) -> Self {
+        StatusError::IO(e)
+    }
+}
+
+impl ToString for StatusError {
+    fn to_string(&self) -> String {
+        match self {
+            StatusError::IO(e) => e.to_string(),
+            StatusError::Path(e) => e.to_string(),
+            StatusError::Pattern(e) => e.to_string(),
+        }
+    }
+}
+
+pub struct Status<'a, M: Matcher + Sync> {
+    dmap: &'a DirstateMap,
+    matcher: &'a M,
+    root_dir: PathBuf,
+    options: StatusOptions,
+    ignore_fn: IgnoreFnType<'a>,
+}
+
+impl<'a, M> Status<'a, M>
+where
+    M: Matcher + Sync,
+{
+    pub fn new(
+        dmap: &'a DirstateMap,
+        matcher: &'a M,
+        root_dir: PathBuf,
+        ignore_files: Vec<PathBuf>,
+        options: StatusOptions,
+    ) -> StatusResult<(Self, Vec<PatternFileWarning>)> {
+        // Needs to outlive `dir_ignore_fn` since it's captured.
+
+        let (ignore_fn, warnings): (IgnoreFnType, _) =
+            if options.list_ignored || options.list_unknown {
+                get_ignore_function(ignore_files, &root_dir)?
+            } else {
+                (Box::new(|&_| true), vec![])
+            };
+
+        Ok((
+            Self {
+                dmap,
+                matcher,
+                root_dir,
+                options,
+                ignore_fn,
+            },
+            warnings,
+        ))
+    }
+
+    pub fn is_ignored(&self, path: impl AsRef<HgPath>) -> bool {
+        (self.ignore_fn)(path.as_ref())
+    }
+
+    /// Is the path or one of its ancestors ignored?
+    pub fn dir_ignore(&self, dir: impl AsRef<HgPath>) -> bool {
+        // Only involve ignore mechanism if we're listing unknowns or ignored.
+        if self.options.list_ignored || self.options.list_unknown {
+            if self.is_ignored(&dir) {
+                true
+            } else {
+                for p in find_dirs(dir.as_ref()) {
+                    if self.is_ignored(p) {
+                        return true;
+                    }
+                }
+                false
+            }
+        } else {
+            true
+        }
+    }
+
+    /// Get stat data about the files explicitly specified by match.
+    /// TODO subrepos
+    #[timed]
+    pub fn walk_explicit(
+        &self,
+        traversed_sender: crossbeam::Sender<HgPathBuf>,
+    ) -> (
+        Vec<(Cow<'a, HgPath>, Dispatch)>,
+        Vec<(Cow<'a, HgPath>, Dispatch)>,
+    ) {
+        self.matcher
+            .file_set()
+            .unwrap_or(&DEFAULT_WORK)
+            .par_iter()
+            .map(|&filename| -> Option<IoResult<_>> {
+                // TODO normalization
+                let normalized = filename;
+
+                let buf = match hg_path_to_path_buf(normalized) {
+                    Ok(x) => x,
+                    Err(e) => return Some(Err(e.into())),
+                };
+                let target = self.root_dir.join(buf);
+                let st = target.symlink_metadata();
+                let in_dmap = self.dmap.get(normalized);
+                match st {
+                    Ok(meta) => {
+                        let file_type = meta.file_type();
+                        return if file_type.is_file() || file_type.is_symlink()
+                        {
+                            if let Some(entry) = in_dmap {
+                                return Some(Ok((
+                                    Cow::Borrowed(normalized),
+                                    dispatch_found(
+                                        &normalized,
+                                        *entry,
+                                        HgMetadata::from_metadata(meta),
+                                        &self.dmap.copy_map,
+                                        self.options,
+                                    ),
+                                )));
+                            }
+                            Some(Ok((
+                                Cow::Borrowed(normalized),
+                                Dispatch::Unknown,
+                            )))
+                        } else if file_type.is_dir() {
+                            if self.options.collect_traversed_dirs {
+                                traversed_sender
+                                    .send(normalized.to_owned())
+                                    .expect("receiver should outlive sender");
+                            }
+                            Some(Ok((
+                                Cow::Borrowed(normalized),
+                                Dispatch::Directory {
+                                    was_file: in_dmap.is_some(),
+                                },
+                            )))
+                        } else {
+                            Some(Ok((
+                                Cow::Borrowed(normalized),
+                                Dispatch::Bad(BadMatch::BadType(
+                                    // TODO do more than unknown
+                                    // Support for all `BadType` variant
+                                    // varies greatly between platforms.
+                                    // So far, no tests check the type and
+                                    // this should be good enough for most
+                                    // users.
+                                    BadType::Unknown,
+                                )),
+                            )))
+                        };
+                    }
+                    Err(_) => {
+                        if let Some(entry) = in_dmap {
+                            return Some(Ok((
+                                Cow::Borrowed(normalized),
+                                dispatch_missing(entry.state),
+                            )));
+                        }
+                    }
+                };
+                None
+            })
+            .flatten()
+            .filter_map(Result::ok)
+            .partition(|(_, dispatch)| match dispatch {
+                Dispatch::Directory { .. } => true,
+                _ => false,
+            })
+    }
+
+    /// Walk the working directory recursively to look for changes compared to
+    /// the current `DirstateMap`.
+    ///
+    /// This takes a mutable reference to the results to account for the
+    /// `extend` in timings
+    #[timed]
+    pub fn traverse(
+        &self,
+        path: impl AsRef<HgPath>,
+        old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
+        results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
+        traversed_sender: crossbeam::Sender<HgPathBuf>,
+    ) -> IoResult<()> {
+        // The traversal is done in parallel, so use a channel to gather
+        // entries. `crossbeam::Sender` is `Sync`, while `mpsc::Sender`
+        // is not.
+        let (files_transmitter, files_receiver) =
+            crossbeam::channel::unbounded();
+
+        self.traverse_dir(
+            &files_transmitter,
+            path,
+            &old_results,
+            traversed_sender,
+        )?;
+
+        // Disconnect the channel so the receiver stops waiting
+        drop(files_transmitter);
+
+        // TODO don't collect. Find a way of replicating the behavior of
+        // `itertools::process_results`, but for `rayon::ParallelIterator`
+        let new_results: IoResult<Vec<(Cow<HgPath>, Dispatch)>> =
+            files_receiver
+                .into_iter()
+                .map(|item| {
+                    let (f, d) = item?;
+                    Ok((Cow::Owned(f), d))
+                })
+                .collect();
+
+        results.par_extend(new_results?);
+
+        Ok(())
+    }
+
+    /// Dispatch a single entry (file, folder, symlink...) found during
+    /// `traverse`. If the entry is a folder that needs to be traversed, it
+    /// will be handled in a separate thread.
+    fn handle_traversed_entry<'b>(
+        &'a self,
+        scope: &rayon::Scope<'b>,
+        files_sender: &'b crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
+        old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
+        filename: HgPathBuf,
+        dir_entry: DirEntry,
+        traversed_sender: crossbeam::Sender<HgPathBuf>,
+    ) -> IoResult<()>
+    where
+        'a: 'b,
+    {
+        let file_type = dir_entry.file_type()?;
+        let entry_option = self.dmap.get(&filename);
+
+        if filename.as_bytes() == b".hg" {
+            // Could be a directory or a symlink
+            return Ok(());
+        }
+
+        if file_type.is_dir() {
+            self.handle_traversed_dir(
+                scope,
+                files_sender,
+                old_results,
+                entry_option,
+                filename,
+                traversed_sender,
+            );
+        } else if file_type.is_file() || file_type.is_symlink() {
+            if let Some(entry) = entry_option {
+                if self.matcher.matches_everything()
+                    || self.matcher.matches(&filename)
+                {
+                    let metadata = dir_entry.metadata()?;
+                    files_sender
+                        .send(Ok((
+                            filename.to_owned(),
+                            dispatch_found(
+                                &filename,
+                                *entry,
+                                HgMetadata::from_metadata(metadata),
+                                &self.dmap.copy_map,
+                                self.options,
+                            ),
+                        )))
+                        .unwrap();
+                }
+            } else if (self.matcher.matches_everything()
+                || self.matcher.matches(&filename))
+                && !self.is_ignored(&filename)
+            {
+                if (self.options.list_ignored
+                    || self.matcher.exact_match(&filename))
+                    && self.dir_ignore(&filename)
+                {
+                    if self.options.list_ignored {
+                        files_sender
+                            .send(Ok((filename.to_owned(), Dispatch::Ignored)))
+                            .unwrap();
+                    }
+                } else if self.options.list_unknown {
+                    files_sender
+                        .send(Ok((filename.to_owned(), Dispatch::Unknown)))
+                        .unwrap();
+                }
+            } else if self.is_ignored(&filename) && self.options.list_ignored {
+                files_sender
+                    .send(Ok((filename.to_owned(), Dispatch::Ignored)))
+                    .unwrap();
+            }
+        } else if let Some(entry) = entry_option {
+            // Used to be a file or a folder, now something else.
+            if self.matcher.matches_everything()
+                || self.matcher.matches(&filename)
+            {
+                files_sender
+                    .send(Ok((
+                        filename.to_owned(),
+                        dispatch_missing(entry.state),
+                    )))
+                    .unwrap();
+            }
+        }
+
+        Ok(())
+    }
+
+    /// A directory was found in the filesystem and needs to be traversed
+    fn handle_traversed_dir<'b>(
+        &'a self,
+        scope: &rayon::Scope<'b>,
+        files_sender: &'b crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
+        old_results: &'a FastHashMap<Cow<HgPath>, Dispatch>,
+        entry_option: Option<&'a DirstateEntry>,
+        directory: HgPathBuf,
+        traversed_sender: crossbeam::Sender<HgPathBuf>,
+    ) where
+        'a: 'b,
+    {
+        scope.spawn(move |_| {
+            // Nested `if` until `rust-lang/rust#53668` is stable
+            if let Some(entry) = entry_option {
+                // Used to be a file, is now a folder
+                if self.matcher.matches_everything()
+                    || self.matcher.matches(&directory)
+                {
+                    files_sender
+                        .send(Ok((
+                            directory.to_owned(),
+                            dispatch_missing(entry.state),
+                        )))
+                        .unwrap();
+                }
+            }
+            // Do we need to traverse it?
+            if !self.is_ignored(&directory) || self.options.list_ignored {
+                self.traverse_dir(
+                    files_sender,
+                    directory,
+                    &old_results,
+                    traversed_sender,
+                )
+                .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
+            }
+        });
+    }
+
+    /// Decides whether the directory needs to be listed, and if so handles the
+    /// entries in a separate thread.
+    fn traverse_dir(
+        &self,
+        files_sender: &crossbeam::Sender<IoResult<(HgPathBuf, Dispatch)>>,
+        directory: impl AsRef<HgPath>,
+        old_results: &FastHashMap<Cow<HgPath>, Dispatch>,
+        traversed_sender: crossbeam::Sender<HgPathBuf>,
+    ) -> IoResult<()> {
+        let directory = directory.as_ref();
+
+        if self.options.collect_traversed_dirs {
+            traversed_sender
+                .send(directory.to_owned())
+                .expect("receiver should outlive sender");
+        }
+
+        let visit_entries = match self.matcher.visit_children_set(directory) {
+            VisitChildrenSet::Empty => return Ok(()),
+            VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
+            VisitChildrenSet::Set(set) => Some(set),
+        };
+        let buf = hg_path_to_path_buf(directory)?;
+        let dir_path = self.root_dir.join(buf);
+
+        let skip_dot_hg = !directory.as_bytes().is_empty();
+        let entries = match list_directory(dir_path, skip_dot_hg) {
+            Err(e) => match e.kind() {
+                ErrorKind::NotFound | ErrorKind::PermissionDenied => {
+                    files_sender
+                        .send(Ok((
+                            directory.to_owned(),
+                            Dispatch::Bad(BadMatch::OsError(
+                                // Unwrapping here is OK because the error
+                                // always is a
+                                // real os error
+                                e.raw_os_error().unwrap(),
+                            )),
+                        )))
+                        .unwrap();
+                    return Ok(());
+                }
+                _ => return Err(e),
+            },
+            Ok(entries) => entries,
+        };
+
+        rayon::scope(|scope| -> IoResult<()> {
+            for (filename, dir_entry) in entries {
+                if let Some(ref set) = visit_entries {
+                    if !set.contains(filename.deref()) {
+                        continue;
+                    }
+                }
+                // TODO normalize
+                let filename = if directory.is_empty() {
+                    filename.to_owned()
+                } else {
+                    directory.join(&filename)
+                };
+
+                if !old_results.contains_key(filename.deref()) {
+                    self.handle_traversed_entry(
+                        scope,
+                        files_sender,
+                        old_results,
+                        filename,
+                        dir_entry,
+                        traversed_sender.clone(),
+                    )?;
+                }
+            }
+            Ok(())
+        })
+    }
+
+    /// This takes a mutable reference to the results to account for the
+    /// `extend` in timings
+    #[timed]
+    fn handle_unknowns(
+        &self,
+        results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
+    ) -> IoResult<()> {
+        let to_visit: Vec<(&HgPath, &DirstateEntry)> =
+            if results.is_empty() && self.matcher.matches_everything() {
+                self.dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
+            } else {
+                // Only convert to a hashmap if needed.
+                let old_results: FastHashMap<_, _> =
+                    results.iter().cloned().collect();
+                self.dmap
+                    .iter()
+                    .filter_map(move |(f, e)| {
+                        if !old_results.contains_key(f.deref())
+                            && self.matcher.matches(f)
+                        {
+                            Some((f.deref(), e))
+                        } else {
+                            None
+                        }
+                    })
+                    .collect()
+            };
+
+        // We walked all dirs under the roots that weren't ignored, and
+        // everything that matched was stat'ed and is already in results.
+        // The rest must thus be ignored or under a symlink.
+        let path_auditor = PathAuditor::new(&self.root_dir);
+
+        // TODO don't collect. Find a way of replicating the behavior of
+        // `itertools::process_results`, but for `rayon::ParallelIterator`
+        let new_results: IoResult<Vec<_>> = to_visit
+            .into_par_iter()
+            .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
+                // Report ignored items in the dmap as long as they are not
+                // under a symlink directory.
+                if path_auditor.check(filename) {
+                    // TODO normalize for case-insensitive filesystems
+                    let buf = match hg_path_to_path_buf(filename) {
+                        Ok(x) => x,
+                        Err(e) => return Some(Err(e.into())),
+                    };
+                    Some(Ok((
+                        Cow::Borrowed(filename),
+                        match self.root_dir.join(&buf).symlink_metadata() {
+                            // File was just ignored, no links, and exists
+                            Ok(meta) => {
+                                let metadata = HgMetadata::from_metadata(meta);
+                                dispatch_found(
+                                    filename,
+                                    *entry,
+                                    metadata,
+                                    &self.dmap.copy_map,
+                                    self.options,
+                                )
+                            }
+                            // File doesn't exist
+                            Err(_) => dispatch_missing(entry.state),
+                        },
+                    )))
+                } else {
+                    // It's either missing or under a symlink directory which
+                    // we, in this case, report as missing.
+                    Some(Ok((
+                        Cow::Borrowed(filename),
+                        dispatch_missing(entry.state),
+                    )))
+                }
+            })
+            .collect();
+
+        results.par_extend(new_results?);
+
+        Ok(())
+    }
+
+    /// This takes a mutable reference to the results to account for the
+    /// `extend` in timings
+    #[timed]
+    fn extend_from_dmap(
+        &self,
+        results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
+    ) {
+        results.par_extend(self.dmap.par_iter().flat_map(
+            move |(filename, entry)| {
+                let filename: &HgPath = filename;
+                let filename_as_path = hg_path_to_path_buf(filename)?;
+                let meta =
+                    self.root_dir.join(filename_as_path).symlink_metadata();
+
+                match meta {
+                    Ok(ref m)
+                        if !(m.file_type().is_file()
+                            || m.file_type().is_symlink()) =>
+                    {
+                        Ok((
+                            Cow::Borrowed(filename),
+                            dispatch_missing(entry.state),
+                        ))
+                    }
+                    Ok(m) => Ok((
+                        Cow::Borrowed(filename),
+                        dispatch_found(
+                            filename,
+                            *entry,
+                            HgMetadata::from_metadata(m),
+                            &self.dmap.copy_map,
+                            self.options,
+                        ),
+                    )),
+                    Err(ref e)
+                        if e.kind() == ErrorKind::NotFound
+                            || e.raw_os_error() == Some(20) =>
+                    {
+                        // Rust does not yet have an `ErrorKind` for
+                        // `NotADirectory` (errno 20)
+                        // It happens if the dirstate contains `foo/bar`
+                        // and foo is not a
+                        // directory
+                        Ok((
+                            Cow::Borrowed(filename),
+                            dispatch_missing(entry.state),
+                        ))
+                    }
+                    Err(e) => Err(e),
+                }
+            },
+        ));
+    }
+}
+
 #[timed]
 fn build_response<'a>(
     results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
@@ -711,189 +868,29 @@
     )
 }
 
-#[derive(Debug)]
-pub enum StatusError {
-    IO(std::io::Error),
-    Path(HgPathError),
-    Pattern(PatternError),
-}
-
-pub type StatusResult<T> = Result<T, StatusError>;
-
-impl From<PatternError> for StatusError {
-    fn from(e: PatternError) -> Self {
-        StatusError::Pattern(e)
-    }
-}
-impl From<HgPathError> for StatusError {
-    fn from(e: HgPathError) -> Self {
-        StatusError::Path(e)
-    }
-}
-impl From<std::io::Error> for StatusError {
-    fn from(e: std::io::Error) -> Self {
-        StatusError::IO(e)
-    }
-}
-
-impl ToString for StatusError {
-    fn to_string(&self) -> String {
-        match self {
-            StatusError::IO(e) => e.to_string(),
-            StatusError::Path(e) => e.to_string(),
-            StatusError::Pattern(e) => e.to_string(),
-        }
-    }
-}
-
-/// This takes a mutable reference to the results to account for the `extend`
-/// in timings
-#[timed]
-fn handle_unknowns<'a>(
-    dmap: &'a DirstateMap,
-    matcher: &(impl Matcher + Sync),
-    root_dir: impl AsRef<Path> + Sync + Send + Copy,
-    options: StatusOptions,
-    results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
-) -> IoResult<()> {
-    let to_visit: Vec<(&HgPath, &DirstateEntry)> = if results.is_empty()
-        && matcher.matches_everything()
-    {
-        dmap.iter().map(|(f, e)| (f.deref(), e)).collect()
-    } else {
-        // Only convert to a hashmap if needed.
-        let old_results: FastHashMap<_, _> = results.iter().cloned().collect();
-        dmap.iter()
-            .filter_map(move |(f, e)| {
-                if !old_results.contains_key(f.deref()) && matcher.matches(f) {
-                    Some((f.deref(), e))
-                } else {
-                    None
-                }
-            })
-            .collect()
-    };
-
-    // We walked all dirs under the roots that weren't ignored, and
-    // everything that matched was stat'ed and is already in results.
-    // The rest must thus be ignored or under a symlink.
-    let path_auditor = PathAuditor::new(root_dir);
-
-    // TODO don't collect. Find a way of replicating the behavior of
-    // `itertools::process_results`, but for `rayon::ParallelIterator`
-    let new_results: IoResult<Vec<_>> = to_visit
-        .into_par_iter()
-        .filter_map(|(filename, entry)| -> Option<IoResult<_>> {
-            // Report ignored items in the dmap as long as they are not
-            // under a symlink directory.
-            if path_auditor.check(filename) {
-                // TODO normalize for case-insensitive filesystems
-                let buf = match hg_path_to_path_buf(filename) {
-                    Ok(x) => x,
-                    Err(e) => return Some(Err(e.into())),
-                };
-                Some(Ok((
-                    Cow::Borrowed(filename),
-                    match root_dir.as_ref().join(&buf).symlink_metadata() {
-                        // File was just ignored, no links, and exists
-                        Ok(meta) => {
-                            let metadata = HgMetadata::from_metadata(meta);
-                            dispatch_found(
-                                filename,
-                                *entry,
-                                metadata,
-                                &dmap.copy_map,
-                                options,
-                            )
-                        }
-                        // File doesn't exist
-                        Err(_) => dispatch_missing(entry.state),
-                    },
-                )))
-            } else {
-                // It's either missing or under a symlink directory which
-                // we, in this case, report as missing.
-                Some(Ok((
-                    Cow::Borrowed(filename),
-                    dispatch_missing(entry.state),
-                )))
-            }
-        })
-        .collect();
-
-    results.par_extend(new_results?);
-
-    Ok(())
-}
-
 /// Get the status of files in the working directory.
 ///
 /// This is the current entry-point for `hg-core` and is realistically unusable
 /// outside of a Python context because its arguments need to provide a lot of
 /// information that will not be necessary in the future.
 #[timed]
-pub fn status<'a: 'c, 'b: 'c, 'c>(
+pub fn status<'a>(
     dmap: &'a DirstateMap,
-    matcher: &'b (impl Matcher + Sync),
-    root_dir: impl AsRef<Path> + Sync + Send + Copy + 'c,
+    matcher: &'a (impl Matcher + Sync),
+    root_dir: PathBuf,
     ignore_files: Vec<PathBuf>,
     options: StatusOptions,
 ) -> StatusResult<(
-    (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
+    (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>),
     Vec<PatternFileWarning>,
 )> {
-    // Needs to outlive `dir_ignore_fn` since it's captured.
-    let ignore_fn: IgnoreFnType;
-
-    // Only involve real ignore mechanism if we're listing unknowns or ignored.
-    let (dir_ignore_fn, warnings): (IgnoreFnType, _) = if options.list_ignored
-        || options.list_unknown
-    {
-        let (ignore, warnings) = get_ignore_function(ignore_files, root_dir)?;
-
-        ignore_fn = ignore;
-        let dir_ignore_fn = Box::new(|dir: &_| {
-            // Is the path or one of its ancestors ignored?
-            if ignore_fn(dir) {
-                true
-            } else {
-                for p in find_dirs(dir) {
-                    if ignore_fn(p) {
-                        return true;
-                    }
-                }
-                false
-            }
-        });
-        (dir_ignore_fn, warnings)
-    } else {
-        ignore_fn = Box::new(|&_| true);
-        (Box::new(|&_| true), vec![])
-    };
-
-    let files = matcher.file_set();
-
-    // `crossbeam::Sender` is `Sync`, while `mpsc::Sender` is not.
-    let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded();
+    let (traversed_sender, traversed_receiver) =
+        crossbeam::channel::unbounded();
+    let (st, warnings) =
+        Status::new(dmap, matcher, root_dir, ignore_files, options)?;
 
     // Step 1: check the files explicitly mentioned by the user
-    let explicit = walk_explicit(
-        files,
-        &dmap,
-        root_dir,
-        options,
-        traversed_sender.clone(),
-    );
-
-    // Collect results into a `Vec` because we do very few lookups in most
-    // cases.
-    let (work, mut results): (Vec<_>, Vec<_>) = explicit
-        .filter_map(Result::ok)
-        .map(|(filename, dispatch)| (Cow::Borrowed(filename), dispatch))
-        .partition(|(_, dispatch)| match dispatch {
-            Dispatch::Directory { .. } => true,
-            _ => false,
-        });
+    let (work, mut results) = st.walk_explicit(traversed_sender.clone());
 
     if !work.is_empty() {
         // Hashmaps are quite a bit slower to build than vecs, so only build it
@@ -909,17 +906,11 @@
                         results.push((dir.to_owned(), Dispatch::Removed));
                     }
                     if options.list_ignored
-                        || options.list_unknown && !dir_ignore_fn(&dir)
+                        || options.list_unknown && !st.dir_ignore(&dir)
                     {
-                        traverse(
-                            matcher,
-                            root_dir,
-                            &dmap,
+                        st.traverse(
                             &dir,
                             &old_results,
-                            &ignore_fn,
-                            &dir_ignore_fn,
-                            options,
                             &mut results,
                             traversed_sender.clone(),
                         )?;
@@ -937,17 +928,16 @@
         // symlink directory.
 
         if options.list_unknown {
-            handle_unknowns(dmap, matcher, root_dir, options, &mut results)?;
+            st.handle_unknowns(&mut results)?;
         } else {
             // We may not have walked the full directory tree above, so stat
             // and check everything we missed.
-            extend_from_dmap(&dmap, root_dir, options, &mut results);
+            st.extend_from_dmap(&mut results);
         }
     }
 
-    // Close the channel
     drop(traversed_sender);
-    let traversed_dirs = traversed_recv.into_iter().collect();
+    let traversed = traversed_receiver.into_iter().collect();
 
-    Ok((build_response(results, traversed_dirs), warnings))
+    Ok((build_response(results, traversed), warnings))
 }



To: Alphare, #hg-reviewers
Cc: mercurial-patches, mercurial-devel


More information about the Mercurial-patches mailing list