feat: ignore node_modules and .git folders when collecting files everywhere (#16862)

We currently only do this for fmt. This makes it so they're excluded by
default, but you can still opt into these directories by explicitly
specifying them.
This commit is contained in:
David Sherret 2022-12-07 13:10:10 -05:00 committed by GitHub
parent f4385866f8
commit 9c1ab39e19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 215 additions and 111 deletions

View File

@ -6,7 +6,7 @@ use crate::colors;
use crate::emit::get_source_hash;
use crate::proc_state::ProcState;
use crate::tools::fmt::format_json;
use crate::util::fs::collect_files;
use crate::util::fs::FileCollector;
use crate::util::text_encoding::source_map_from_code;
use deno_ast::MediaType;
@ -558,9 +558,13 @@ fn collect_coverages(
ignore: Vec<PathBuf>,
) -> Result<Vec<ScriptCoverage>, AnyError> {
let mut coverages: Vec<ScriptCoverage> = Vec::new();
let file_paths = collect_files(&files, &ignore, |file_path| {
let file_paths = FileCollector::new(|file_path| {
file_path.extension().map_or(false, |ext| ext == "json")
})?;
})
.ignore_git_folder()
.ignore_node_modules()
.add_ignore_paths(&ignore)
.collect_files(&files)?;
for file_path in file_paths {
let json = fs::read_to_string(file_path.as_path())?;

View File

@ -15,7 +15,7 @@ use crate::colors;
use crate::util::diff::diff;
use crate::util::file_watcher;
use crate::util::file_watcher::ResolutionResult;
use crate::util::fs::collect_files;
use crate::util::fs::FileCollector;
use crate::util::path::get_extension;
use crate::util::path::specifier_to_file_path;
use crate::util::text_encoding;
@ -92,17 +92,11 @@ pub async fn format(
maybe_fmt_config.map(|c| c.options).unwrap_or_default(),
);
let fmt_predicate = |path: &Path| {
is_supported_ext_fmt(path)
&& !contains_git(path)
&& !contains_node_modules(path)
};
let resolver = |changed: Option<Vec<PathBuf>>| {
let files_changed = changed.is_some();
let result = collect_files(&include_files, &exclude_files, fmt_predicate)
.map(|files| {
let result =
collect_fmt_files(&include_files, &exclude_files).map(|files| {
let refmt_files = if let Some(paths) = changed {
if check {
files
@ -164,8 +158,8 @@ pub async fn format(
)
.await?;
} else {
let files = collect_files(&include_files, &exclude_files, fmt_predicate)
.and_then(|files| {
let files =
collect_fmt_files(&include_files, &exclude_files).and_then(|files| {
if files.is_empty() {
Err(generic_error("No target files found."))
} else {
@ -178,6 +172,17 @@ pub async fn format(
Ok(())
}
fn collect_fmt_files(
include_files: &[PathBuf],
exclude_files: &[PathBuf],
) -> Result<Vec<PathBuf>, AnyError> {
FileCollector::new(is_supported_ext_fmt)
.ignore_git_folder()
.ignore_node_modules()
.add_ignore_paths(exclude_files)
.collect_files(include_files)
}
/// Formats markdown (using <https://github.com/dprint/dprint-plugin-markdown>) and its code blocks
/// (ts/tsx, js/jsx).
fn format_markdown(
@ -734,14 +739,6 @@ fn is_supported_ext_fmt(path: &Path) -> bool {
}
}
fn contains_git(path: &Path) -> bool {
path.components().any(|c| c.as_os_str() == ".git")
}
fn contains_node_modules(path: &Path) -> bool {
path.components().any(|c| c.as_os_str() == "node_modules")
}
#[cfg(test)]
mod test {
use super::*;
@ -773,26 +770,6 @@ mod test {
assert!(is_supported_ext_fmt(Path::new("foo.JsON")));
}
#[test]
fn test_is_located_in_git() {
assert!(contains_git(Path::new("test/.git")));
assert!(contains_git(Path::new(".git/bad.json")));
assert!(contains_git(Path::new("test/.git/bad.json")));
assert!(!contains_git(Path::new("test/bad.git/bad.json")));
}
#[test]
fn test_is_located_in_node_modules() {
assert!(contains_node_modules(Path::new("test/node_modules")));
assert!(contains_node_modules(Path::new("node_modules/bad.json")));
assert!(contains_node_modules(Path::new(
"test/node_modules/bad.json"
)));
assert!(!contains_node_modules(Path::new(
"test/bad.node_modules/bad.json"
)));
}
#[test]
#[should_panic(expected = "Formatting not stable. Bailed after 5 tries.")]
fn test_format_ensure_stable_unstable_format() {

View File

@ -14,7 +14,7 @@ use crate::proc_state::ProcState;
use crate::tools::fmt::run_parallelized;
use crate::util::file_watcher;
use crate::util::file_watcher::ResolutionResult;
use crate::util::fs::collect_files;
use crate::util::fs::FileCollector;
use crate::util::path::is_supported_ext;
use crate::util::path::specifier_to_file_path;
use deno_ast::MediaType;
@ -143,19 +143,17 @@ pub async fn lint(flags: Flags, lint_flags: LintFlags) -> Result<(), AnyError> {
let resolver = |changed: Option<Vec<PathBuf>>| {
let files_changed = changed.is_some();
let result =
collect_files(&include_files, &exclude_files, is_supported_ext).map(
|files| {
if let Some(paths) = changed {
files
.iter()
.any(|path| paths.contains(path))
.then_some(files)
.unwrap_or_else(|| [].to_vec())
} else {
files
}
},
);
collect_lint_files(&include_files, &exclude_files).map(|files| {
if let Some(paths) = changed {
files
.iter()
.any(|path| paths.contains(path))
.then_some(files)
.unwrap_or_else(|| [].to_vec())
} else {
files
}
});
let paths_to_watch = include_files.clone();
@ -251,15 +249,14 @@ pub async fn lint(flags: Flags, lint_flags: LintFlags) -> Result<(), AnyError> {
);
reporter_lock.lock().unwrap().close(1);
} else {
let target_files =
collect_files(&include_files, &exclude_files, is_supported_ext)
.and_then(|files| {
if files.is_empty() {
Err(generic_error("No target files found."))
} else {
Ok(files)
}
})?;
let target_files = collect_lint_files(&include_files, &exclude_files)
.and_then(|files| {
if files.is_empty() {
Err(generic_error("No target files found."))
} else {
Ok(files)
}
})?;
debug!("Found {} files", target_files.len());
operation(target_files).await?;
};
@ -272,6 +269,17 @@ pub async fn lint(flags: Flags, lint_flags: LintFlags) -> Result<(), AnyError> {
Ok(())
}
fn collect_lint_files(
include_files: &[PathBuf],
exclude_files: &[PathBuf],
) -> Result<Vec<PathBuf>, AnyError> {
FileCollector::new(is_supported_ext)
.ignore_git_folder()
.ignore_node_modules()
.add_ignore_paths(exclude_files)
.collect_files(include_files)
}
pub fn print_rules_list(json: bool) {
let lint_rules = rules::get_recommended_rules();

View File

@ -165,53 +165,104 @@ pub fn resolve_from_cwd(path: &Path) -> Result<PathBuf, AnyError> {
/// Collects file paths that satisfy the given predicate, by recursively walking `files`.
/// If the walker visits a path that is listed in `ignore`, it skips descending into the directory.
pub fn collect_files<P>(
files: &[PathBuf],
ignore: &[PathBuf],
predicate: P,
) -> Result<Vec<PathBuf>, AnyError>
where
P: Fn(&Path) -> bool,
{
let mut target_files = Vec::new();
pub struct FileCollector<TFilter: Fn(&Path) -> bool> {
canonicalized_ignore: Vec<PathBuf>,
file_filter: TFilter,
ignore_git_folder: bool,
ignore_node_modules: bool,
}
// retain only the paths which exist and ignore the rest
let canonicalized_ignore: Vec<PathBuf> = ignore
.iter()
.filter_map(|i| canonicalize_path(i).ok())
.collect();
for file in files {
for entry in WalkDir::new(file)
.into_iter()
.filter_entry(|e| {
canonicalize_path(e.path()).map_or(false, |c| {
!canonicalized_ignore.iter().any(|i| c.starts_with(i))
})
})
.filter_map(|e| match e {
Ok(e) if !e.file_type().is_dir() && predicate(e.path()) => Some(e),
_ => None,
})
{
target_files.push(canonicalize_path(entry.path())?)
impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> {
pub fn new(file_filter: TFilter) -> Self {
Self {
canonicalized_ignore: Default::default(),
file_filter,
ignore_git_folder: false,
ignore_node_modules: false,
}
}
pub fn add_ignore_paths(mut self, paths: &[PathBuf]) -> Self {
// retain only the paths which exist and ignore the rest
self
.canonicalized_ignore
.extend(paths.iter().filter_map(|i| canonicalize_path(i).ok()));
self
}
Ok(target_files)
pub fn ignore_node_modules(mut self) -> Self {
self.ignore_node_modules = true;
self
}
pub fn ignore_git_folder(mut self) -> Self {
self.ignore_git_folder = true;
self
}
pub fn collect_files(
&self,
files: &[PathBuf],
) -> Result<Vec<PathBuf>, AnyError> {
let mut target_files = Vec::new();
for file in files {
if let Ok(file) = canonicalize_path(file) {
// use an iterator like this in order to minimize the number of file system operations
let mut iterator = WalkDir::new(&file).into_iter();
loop {
let e = match iterator.next() {
None => break,
Some(Err(_)) => continue,
Some(Ok(entry)) => entry,
};
let file_type = e.file_type();
let is_dir = file_type.is_dir();
if let Ok(c) = canonicalize_path(e.path()) {
if self.canonicalized_ignore.iter().any(|i| c.starts_with(i)) {
if is_dir {
iterator.skip_current_dir();
}
} else if is_dir {
let should_ignore_dir = c
.file_name()
.map(|dir_name| {
let dir_name = dir_name.to_string_lossy().to_lowercase();
let is_ignored_file = self.ignore_node_modules
&& dir_name == "node_modules"
|| self.ignore_git_folder && dir_name == ".git";
// allow the user to opt out of ignoring by explicitly specifying the dir
file != c && is_ignored_file
})
.unwrap_or(false);
if should_ignore_dir {
iterator.skip_current_dir();
}
} else if (self.file_filter)(e.path()) {
target_files.push(c);
}
} else if is_dir {
// failed canonicalizing, so skip it
iterator.skip_current_dir();
}
}
}
}
Ok(target_files)
}
}
/// Collects module specifiers that satisfy the given predicate as a file path, by recursively walking `include`.
/// Specifiers that start with http and https are left intact.
pub fn collect_specifiers<P>(
/// Note: This ignores all .git and node_modules folders.
pub fn collect_specifiers(
include: Vec<String>,
ignore: &[PathBuf],
predicate: P,
) -> Result<Vec<ModuleSpecifier>, AnyError>
where
P: Fn(&Path) -> bool,
{
predicate: impl Fn(&Path) -> bool,
) -> Result<Vec<ModuleSpecifier>, AnyError> {
let mut prepared = vec![];
let file_collector = FileCollector::new(predicate)
.add_ignore_paths(ignore)
.ignore_git_folder()
.ignore_node_modules();
let root_path = current_dir()?;
for path in include {
@ -231,7 +282,7 @@ where
};
let p = normalize_path(&p);
if p.is_dir() {
let test_files = collect_files(&[p], ignore, &predicate).unwrap();
let test_files = file_collector.collect_files(&[p])?;
let mut test_files_as_urls = test_files
.iter()
.map(|f| ModuleSpecifier::from_file_path(f).unwrap())
@ -407,6 +458,7 @@ pub fn dir_size(path: &Path) -> std::io::Result<u64> {
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use test_util::TempDir;
#[test]
@ -466,6 +518,10 @@ mod tests {
// ├── a.ts
// ├── b.js
// ├── child
// | ├── node_modules
// | | └── node_modules.js
// | ├── git
// | | └── git.js
// │ ├── e.mjs
// │ ├── f.mjsx
// │ ├── .foo.TS
@ -486,31 +542,90 @@ mod tests {
let child_dir_files = ["e.mjs", "f.mjsx", ".foo.TS", "README.md"];
create_files(&child_dir_path, &child_dir_files);
t.create_dir_all("dir.ts/child/node_modules");
t.write("dir.ts/child/node_modules/node_modules.js", "");
t.create_dir_all("dir.ts/child/.git");
t.write("dir.ts/child/.git/git.js", "");
let ignore_dir_path = root_dir_path.join("ignore");
let ignore_dir_files = ["g.d.ts", ".gitignore"];
create_files(&ignore_dir_path, &ignore_dir_files);
let result = collect_files(&[root_dir_path], &[ignore_dir_path], |path| {
let file_collector = FileCollector::new(|path| {
// exclude dotfiles
path
.file_name()
.and_then(|f| f.to_str())
.map_or(false, |f| !f.starts_with('.'))
})
.unwrap();
.add_ignore_paths(&[ignore_dir_path]);
let result = file_collector
.collect_files(&[root_dir_path.clone()])
.unwrap();
let expected = [
"README.md",
"a.ts",
"b.js",
"e.mjs",
"f.mjsx",
"README.md",
"c.tsx",
"d.jsx",
"e.mjs",
"f.mjsx",
"git.js",
"node_modules.js",
];
for e in expected.iter() {
assert!(result.iter().any(|r| r.ends_with(e)));
}
assert_eq!(result.len(), expected.len());
let mut file_names = result
.into_iter()
.map(|r| r.file_name().unwrap().to_string_lossy().to_string())
.collect::<Vec<_>>();
file_names.sort();
assert_eq!(file_names, expected);
// test ignoring the .git and node_modules folder
let file_collector =
file_collector.ignore_git_folder().ignore_node_modules();
let result = file_collector
.collect_files(&[root_dir_path.clone()])
.unwrap();
let expected = [
"README.md",
"a.ts",
"b.js",
"c.tsx",
"d.jsx",
"e.mjs",
"f.mjsx",
];
let mut file_names = result
.into_iter()
.map(|r| r.file_name().unwrap().to_string_lossy().to_string())
.collect::<Vec<_>>();
file_names.sort();
assert_eq!(file_names, expected);
// test opting out of ignoring by specifying the dir
let result = file_collector
.collect_files(&[
root_dir_path.clone(),
root_dir_path.join("child/node_modules/"),
])
.unwrap();
let expected = [
"README.md",
"a.ts",
"b.js",
"c.tsx",
"d.jsx",
"e.mjs",
"f.mjsx",
"node_modules.js",
];
let mut file_names = result
.into_iter()
.map(|r| r.file_name().unwrap().to_string_lossy().to_string())
.collect::<Vec<_>>();
file_names.sort();
assert_eq!(file_names, expected);
}
#[test]