deno/resolvers/node/analyze.rs
David Sherret 617350e79c
refactor(resolver): move more resolution code into deno_resolver (#26873)
Follow-up to cjs refactor.

This moves most of the resolution code into the deno_resolver crate.
Still pending is the npm resolution code.
2024-11-14 15:24:25 -05:00

665 lines
18 KiB
Rust

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use std::borrow::Cow;
use std::collections::BTreeSet;
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;
use deno_path_util::url_from_file_path;
use deno_path_util::url_to_file_path;
use futures::future::LocalBoxFuture;
use futures::stream::FuturesUnordered;
use futures::FutureExt;
use futures::StreamExt;
use once_cell::sync::Lazy;
use anyhow::Context;
use anyhow::Error as AnyError;
use url::Url;
use crate::env::NodeResolverEnv;
use crate::npm::InNpmPackageCheckerRc;
use crate::resolution::NodeResolverRc;
use crate::NodeModuleKind;
use crate::NodeResolutionMode;
use crate::NpmPackageFolderResolverRc;
use crate::PackageJsonResolverRc;
use crate::PathClean;
#[derive(Debug, Clone)]
pub enum CjsAnalysis<'a> {
/// File was found to be an ES module and the translator should
/// load the code as ESM.
Esm(Cow<'a, str>),
Cjs(CjsAnalysisExports),
}
#[derive(Debug, Clone)]
pub struct CjsAnalysisExports {
pub exports: Vec<String>,
pub reexports: Vec<String>,
}
/// Code analyzer for CJS and ESM files.
#[async_trait::async_trait(?Send)]
pub trait CjsCodeAnalyzer {
/// Analyzes CommonJs code for exports and reexports, which is
/// then used to determine the wrapper ESM module exports.
///
/// Note that the source is provided by the caller when the caller
/// already has it. If the source is needed by the implementation,
/// then it can use the provided source, or otherwise load it if
/// necessary.
async fn analyze_cjs<'a>(
&self,
specifier: &Url,
maybe_source: Option<Cow<'a, str>>,
) -> Result<CjsAnalysis<'a>, AnyError>;
}
pub struct NodeCodeTranslator<
TCjsCodeAnalyzer: CjsCodeAnalyzer,
TNodeResolverEnv: NodeResolverEnv,
> {
cjs_code_analyzer: TCjsCodeAnalyzer,
env: TNodeResolverEnv,
in_npm_pkg_checker: InNpmPackageCheckerRc,
node_resolver: NodeResolverRc<TNodeResolverEnv>,
npm_resolver: NpmPackageFolderResolverRc,
pkg_json_resolver: PackageJsonResolverRc<TNodeResolverEnv>,
}
impl<TCjsCodeAnalyzer: CjsCodeAnalyzer, TNodeResolverEnv: NodeResolverEnv>
NodeCodeTranslator<TCjsCodeAnalyzer, TNodeResolverEnv>
{
pub fn new(
cjs_code_analyzer: TCjsCodeAnalyzer,
env: TNodeResolverEnv,
in_npm_pkg_checker: InNpmPackageCheckerRc,
node_resolver: NodeResolverRc<TNodeResolverEnv>,
npm_resolver: NpmPackageFolderResolverRc,
pkg_json_resolver: PackageJsonResolverRc<TNodeResolverEnv>,
) -> Self {
Self {
cjs_code_analyzer,
env,
in_npm_pkg_checker,
node_resolver,
npm_resolver,
pkg_json_resolver,
}
}
/// Translates given CJS module into ESM. This function will perform static
/// analysis on the file to find defined exports and reexports.
///
/// For all discovered reexports the analysis will be performed recursively.
///
/// If successful a source code for equivalent ES module is returned.
pub async fn translate_cjs_to_esm<'a>(
&self,
entry_specifier: &Url,
source: Option<Cow<'a, str>>,
) -> Result<Cow<'a, str>, AnyError> {
let mut temp_var_count = 0;
let analysis = self
.cjs_code_analyzer
.analyze_cjs(entry_specifier, source)
.await?;
let analysis = match analysis {
CjsAnalysis::Esm(source) => return Ok(source),
CjsAnalysis::Cjs(analysis) => analysis,
};
let mut source = vec![
r#"import {createRequire as __internalCreateRequire, Module as __internalModule } from "node:module";
const require = __internalCreateRequire(import.meta.url);"#
.to_string(),
];
// use a BTreeSet to make the output deterministic for v8's code cache
let mut all_exports = analysis.exports.into_iter().collect::<BTreeSet<_>>();
if !analysis.reexports.is_empty() {
let mut errors = Vec::new();
self
.analyze_reexports(
entry_specifier,
analysis.reexports,
&mut all_exports,
&mut errors,
)
.await;
// surface errors afterwards in a deterministic way
if !errors.is_empty() {
errors.sort_by_cached_key(|e| e.to_string());
return Err(errors.remove(0));
}
}
source.push(format!(
r#"let mod;
if (import.meta.main) {{
mod = __internalModule._load("{0}", null, true)
}} else {{
mod = require("{0}");
}}"#,
url_to_file_path(entry_specifier)
.unwrap()
.to_str()
.unwrap()
.replace('\\', "\\\\")
.replace('\'', "\\\'")
.replace('\"', "\\\"")
));
for export in &all_exports {
if export.as_str() != "default" {
add_export(
&mut source,
export,
&format!("mod[\"{}\"]", escape_for_double_quote_string(export)),
&mut temp_var_count,
);
}
}
source.push("export default mod;".to_string());
let translated_source = source.join("\n");
Ok(Cow::Owned(translated_source))
}
async fn analyze_reexports<'a>(
&'a self,
entry_specifier: &url::Url,
reexports: Vec<String>,
all_exports: &mut BTreeSet<String>,
// this goes through the modules concurrently, so collect
// the errors in order to be deterministic
errors: &mut Vec<anyhow::Error>,
) {
struct Analysis {
reexport_specifier: url::Url,
referrer: url::Url,
analysis: CjsAnalysis<'static>,
}
type AnalysisFuture<'a> = LocalBoxFuture<'a, Result<Analysis, AnyError>>;
let mut handled_reexports: HashSet<Url> = HashSet::default();
handled_reexports.insert(entry_specifier.clone());
let mut analyze_futures: FuturesUnordered<AnalysisFuture<'a>> =
FuturesUnordered::new();
let cjs_code_analyzer = &self.cjs_code_analyzer;
let mut handle_reexports =
|referrer: url::Url,
reexports: Vec<String>,
analyze_futures: &mut FuturesUnordered<AnalysisFuture<'a>>,
errors: &mut Vec<anyhow::Error>| {
// 1. Resolve the re-exports and start a future to analyze each one
for reexport in reexports {
let result = self.resolve(
&reexport,
&referrer,
// FIXME(bartlomieju): check if these conditions are okay, probably
// should be `deno-require`, because `deno` is already used in `esm_resolver.rs`
&["deno", "node", "require", "default"],
NodeResolutionMode::Execution,
);
let reexport_specifier = match result {
Ok(Some(specifier)) => specifier,
Ok(None) => continue,
Err(err) => {
errors.push(err);
continue;
}
};
if !handled_reexports.insert(reexport_specifier.clone()) {
continue;
}
let referrer = referrer.clone();
let future = async move {
let analysis = cjs_code_analyzer
.analyze_cjs(&reexport_specifier, None)
.await
.with_context(|| {
format!(
"Could not load '{}' ({}) referenced from {}",
reexport, reexport_specifier, referrer
)
})?;
Ok(Analysis {
reexport_specifier,
referrer,
analysis,
})
}
.boxed_local();
analyze_futures.push(future);
}
};
handle_reexports(
entry_specifier.clone(),
reexports,
&mut analyze_futures,
errors,
);
while let Some(analysis_result) = analyze_futures.next().await {
// 2. Look at the analysis result and resolve its exports and re-exports
let Analysis {
reexport_specifier,
referrer,
analysis,
} = match analysis_result {
Ok(analysis) => analysis,
Err(err) => {
errors.push(err);
continue;
}
};
match analysis {
CjsAnalysis::Esm(_) => {
// todo(dsherret): support this once supporting requiring ES modules
errors.push(anyhow::anyhow!(
"Cannot require ES module '{}' from '{}'",
reexport_specifier,
referrer,
));
}
CjsAnalysis::Cjs(analysis) => {
if !analysis.reexports.is_empty() {
handle_reexports(
reexport_specifier.clone(),
analysis.reexports,
&mut analyze_futures,
errors,
);
}
all_exports.extend(
analysis
.exports
.into_iter()
.filter(|e| e.as_str() != "default"),
);
}
}
}
}
// todo(dsherret): what is going on here? Isn't this a bunch of duplicate code?
fn resolve(
&self,
specifier: &str,
referrer: &Url,
conditions: &[&str],
mode: NodeResolutionMode,
) -> Result<Option<Url>, AnyError> {
if specifier.starts_with('/') {
todo!();
}
let referrer_path = url_to_file_path(referrer).unwrap();
if specifier.starts_with("./") || specifier.starts_with("../") {
if let Some(parent) = referrer_path.parent() {
return self
.file_extension_probe(parent.join(specifier), &referrer_path)
.and_then(|p| url_from_file_path(&p).map_err(AnyError::from))
.map(Some);
} else {
todo!();
}
}
// We've got a bare specifier or maybe bare_specifier/blah.js"
let (package_specifier, package_subpath) =
parse_specifier(specifier).unwrap();
let module_dir = match self
.npm_resolver
.resolve_package_folder_from_package(package_specifier.as_str(), referrer)
{
Err(err)
if matches!(
err.as_kind(),
crate::errors::PackageFolderResolveErrorKind::PackageNotFound(..)
) =>
{
return Ok(None);
}
other => other,
}?;
let package_json_path = module_dir.join("package.json");
let maybe_package_json = self
.pkg_json_resolver
.load_package_json(&package_json_path)?;
if let Some(package_json) = maybe_package_json {
if let Some(exports) = &package_json.exports {
return Some(
self
.node_resolver
.package_exports_resolve(
&package_json_path,
&package_subpath,
exports,
Some(referrer),
NodeModuleKind::Esm,
conditions,
mode,
)
.map_err(AnyError::from),
)
.transpose();
}
// old school
if package_subpath != "." {
let d = module_dir.join(package_subpath);
if self.env.is_dir_sync(&d) {
// subdir might have a package.json that specifies the entrypoint
let package_json_path = d.join("package.json");
let maybe_package_json = self
.pkg_json_resolver
.load_package_json(&package_json_path)?;
if let Some(package_json) = maybe_package_json {
if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
return Ok(Some(url_from_file_path(&d.join(main).clean())?));
}
}
return Ok(Some(url_from_file_path(&d.join("index.js").clean())?));
}
return self
.file_extension_probe(d, &referrer_path)
.and_then(|p| url_from_file_path(&p).map_err(AnyError::from))
.map(Some);
} else if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
return Ok(Some(url_from_file_path(&module_dir.join(main).clean())?));
} else {
return Ok(Some(url_from_file_path(
&module_dir.join("index.js").clean(),
)?));
}
}
// as a fallback, attempt to resolve it via the ancestor directories
let mut last = referrer_path.as_path();
while let Some(parent) = last.parent() {
if !self.in_npm_pkg_checker.in_npm_package_at_dir_path(parent) {
break;
}
let path = if parent.ends_with("node_modules") {
parent.join(specifier)
} else {
parent.join("node_modules").join(specifier)
};
if let Ok(path) = self.file_extension_probe(path, &referrer_path) {
return Ok(Some(url_from_file_path(&path)?));
}
last = parent;
}
Err(not_found(specifier, &referrer_path))
}
fn file_extension_probe(
&self,
p: PathBuf,
referrer: &Path,
) -> Result<PathBuf, AnyError> {
let p = p.clean();
if self.env.exists_sync(&p) {
let file_name = p.file_name().unwrap();
let p_js =
p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
if self.env.is_file_sync(&p_js) {
return Ok(p_js);
} else if self.env.is_dir_sync(&p) {
return Ok(p.join("index.js"));
} else {
return Ok(p);
}
} else if let Some(file_name) = p.file_name() {
{
let p_js =
p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
if self.env.is_file_sync(&p_js) {
return Ok(p_js);
}
}
{
let p_json =
p.with_file_name(format!("{}.json", file_name.to_str().unwrap()));
if self.env.is_file_sync(&p_json) {
return Ok(p_json);
}
}
}
Err(not_found(&p.to_string_lossy(), referrer))
}
}
static RESERVED_WORDS: Lazy<HashSet<&str>> = Lazy::new(|| {
HashSet::from([
"abstract",
"arguments",
"async",
"await",
"boolean",
"break",
"byte",
"case",
"catch",
"char",
"class",
"const",
"continue",
"debugger",
"default",
"delete",
"do",
"double",
"else",
"enum",
"eval",
"export",
"extends",
"false",
"final",
"finally",
"float",
"for",
"function",
"get",
"goto",
"if",
"implements",
"import",
"in",
"instanceof",
"int",
"interface",
"let",
"long",
"mod",
"native",
"new",
"null",
"package",
"private",
"protected",
"public",
"return",
"set",
"short",
"static",
"super",
"switch",
"synchronized",
"this",
"throw",
"throws",
"transient",
"true",
"try",
"typeof",
"var",
"void",
"volatile",
"while",
"with",
"yield",
])
});
fn add_export(
source: &mut Vec<String>,
name: &str,
initializer: &str,
temp_var_count: &mut usize,
) {
fn is_valid_var_decl(name: &str) -> bool {
// it's ok to be super strict here
if name.is_empty() {
return false;
}
if let Some(first) = name.chars().next() {
if !first.is_ascii_alphabetic() && first != '_' && first != '$' {
return false;
}
}
name
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$')
}
// TODO(bartlomieju): Node actually checks if a given export exists in `exports` object,
// but it might not be necessary here since our analysis is more detailed?
if RESERVED_WORDS.contains(name) || !is_valid_var_decl(name) {
*temp_var_count += 1;
// we can't create an identifier with a reserved word or invalid identifier name,
// so assign it to a temporary variable that won't have a conflict, then re-export
// it as a string
source.push(format!(
"const __deno_export_{temp_var_count}__ = {initializer};"
));
source.push(format!(
"export {{ __deno_export_{temp_var_count}__ as \"{}\" }};",
escape_for_double_quote_string(name)
));
} else {
source.push(format!("export const {name} = {initializer};"));
}
}
fn parse_specifier(specifier: &str) -> Option<(String, String)> {
let mut separator_index = specifier.find('/');
let mut valid_package_name = true;
// let mut is_scoped = false;
if specifier.is_empty() {
valid_package_name = false;
} else if specifier.starts_with('@') {
// is_scoped = true;
if let Some(index) = separator_index {
separator_index = specifier[index + 1..].find('/').map(|i| i + index + 1);
} else {
valid_package_name = false;
}
}
let package_name = if let Some(index) = separator_index {
specifier[0..index].to_string()
} else {
specifier.to_string()
};
// Package name cannot have leading . and cannot have percent-encoding or separators.
for ch in package_name.chars() {
if ch == '%' || ch == '\\' {
valid_package_name = false;
break;
}
}
if !valid_package_name {
return None;
}
let package_subpath = if let Some(index) = separator_index {
format!(".{}", specifier.chars().skip(index).collect::<String>())
} else {
".".to_string()
};
Some((package_name, package_subpath))
}
fn not_found(path: &str, referrer: &Path) -> AnyError {
let msg = format!(
"[ERR_MODULE_NOT_FOUND] Cannot find module \"{}\" imported from \"{}\"",
path,
referrer.to_string_lossy()
);
std::io::Error::new(std::io::ErrorKind::NotFound, msg).into()
}
fn escape_for_double_quote_string(text: &str) -> Cow<str> {
// this should be rare, so doing a scan first before allocating is ok
if text.chars().any(|c| matches!(c, '"' | '\\')) {
// don't bother making this more complex for perf because it's rare
Cow::Owned(text.replace('\\', "\\\\").replace('"', "\\\""))
} else {
Cow::Borrowed(text)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_add_export() {
let mut temp_var_count = 0;
let mut source = vec![];
let exports = vec!["static", "server", "app", "dashed-export", "3d"];
for export in exports {
add_export(&mut source, export, "init", &mut temp_var_count);
}
assert_eq!(
source,
vec![
"const __deno_export_1__ = init;".to_string(),
"export { __deno_export_1__ as \"static\" };".to_string(),
"export const server = init;".to_string(),
"export const app = init;".to_string(),
"const __deno_export_2__ = init;".to_string(),
"export { __deno_export_2__ as \"dashed-export\" };".to_string(),
"const __deno_export_3__ = init;".to_string(),
"export { __deno_export_3__ as \"3d\" };".to_string(),
]
)
}
#[test]
fn test_parse_specifier() {
assert_eq!(
parse_specifier("@some-package/core/actions"),
Some(("@some-package/core".to_string(), "./actions".to_string()))
);
}
}