deno/cli/cache/emit.rs
David Sherret 826e42a5b5
fix: improved support for cjs and cts modules (#26558)
* cts support
* better cjs/cts type checking
* deno compile cjs/cts support
* More efficient detect cjs (going towards stabilization)
* Determination of whether .js, .ts, .jsx, or .tsx is cjs or esm is only
done after loading
* Support `import x = require(...);`

Co-authored-by: Bartek Iwańczuk <biwanczuk@gmail.com>
2024-11-01 12:27:00 -04:00

224 lines
6.9 KiB
Rust

// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
use std::path::PathBuf;
use deno_ast::ModuleSpecifier;
use deno_core::anyhow::anyhow;
use deno_core::error::AnyError;
use deno_core::unsync::sync::AtomicFlag;
use super::DiskCache;
/// The cache that stores previously emitted files.
#[derive(Debug)]
pub struct EmitCache {
disk_cache: DiskCache,
emit_failed_flag: AtomicFlag,
file_serializer: EmitFileSerializer,
}
impl EmitCache {
pub fn new(disk_cache: DiskCache) -> Self {
Self {
disk_cache,
emit_failed_flag: Default::default(),
file_serializer: EmitFileSerializer {
cli_version: crate::version::DENO_VERSION_INFO.deno,
},
}
}
/// Gets the emitted code with embedded sourcemap from the cache.
///
/// The expected source hash is used in order to verify
/// that you're getting a value from the cache that is
/// for the provided source.
///
/// Cached emits from previous CLI releases will not be returned
/// or emits that do not match the source.
pub fn get_emit_code(
&self,
specifier: &ModuleSpecifier,
expected_source_hash: u64,
) -> Option<String> {
let emit_filename = self.get_emit_filename(specifier)?;
let bytes = self.disk_cache.get(&emit_filename).ok()?;
self
.file_serializer
.deserialize(bytes, expected_source_hash)
}
/// Sets the emit code in the cache.
pub fn set_emit_code(
&self,
specifier: &ModuleSpecifier,
source_hash: u64,
code: &[u8],
) {
if let Err(err) = self.set_emit_code_result(specifier, source_hash, code) {
// might error in cases such as a readonly file system
log::debug!("Error saving emit data ({}): {}", specifier, err);
// assume the cache can't be written to and disable caching to it
self.emit_failed_flag.raise();
}
}
fn set_emit_code_result(
&self,
specifier: &ModuleSpecifier,
source_hash: u64,
code: &[u8],
) -> Result<(), AnyError> {
if self.emit_failed_flag.is_raised() {
log::debug!("Skipped emit cache save of {}", specifier);
return Ok(());
}
let emit_filename = self
.get_emit_filename(specifier)
.ok_or_else(|| anyhow!("Could not get emit filename."))?;
let cache_data = self.file_serializer.serialize(code, source_hash);
self.disk_cache.set(&emit_filename, &cache_data)?;
Ok(())
}
fn get_emit_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> {
self
.disk_cache
.get_cache_filename_with_extension(specifier, "js")
}
}
const LAST_LINE_PREFIX: &str = "\n// denoCacheMetadata=";
#[derive(Debug)]
struct EmitFileSerializer {
cli_version: &'static str,
}
impl EmitFileSerializer {
pub fn deserialize(
&self,
mut bytes: Vec<u8>,
expected_source_hash: u64,
) -> Option<String> {
let last_newline_index = bytes.iter().rposition(|&b| b == b'\n')?;
let (content, last_line) = bytes.split_at(last_newline_index);
let hashes = last_line.strip_prefix(LAST_LINE_PREFIX.as_bytes())?;
let hashes = String::from_utf8_lossy(hashes);
let (source_hash, emit_hash) = hashes.split_once(',')?;
// verify the meta data file is for this source and CLI version
let source_hash = source_hash.parse::<u64>().ok()?;
if source_hash != expected_source_hash {
return None;
}
let emit_hash = emit_hash.parse::<u64>().ok()?;
// prevent using an emit from a different cli version or emits that were tampered with
if emit_hash != self.compute_emit_hash(content) {
return None;
}
// everything looks good, truncate and return it
bytes.truncate(content.len());
String::from_utf8(bytes).ok()
}
pub fn serialize(&self, code: &[u8], source_hash: u64) -> Vec<u8> {
let source_hash = source_hash.to_string();
let emit_hash = self.compute_emit_hash(code).to_string();
let capacity = code.len()
+ LAST_LINE_PREFIX.len()
+ source_hash.len()
+ 1
+ emit_hash.len();
let mut cache_data = Vec::with_capacity(capacity);
cache_data.extend(code);
cache_data.extend(LAST_LINE_PREFIX.as_bytes());
cache_data.extend(source_hash.as_bytes());
cache_data.push(b',');
cache_data.extend(emit_hash.as_bytes());
debug_assert_eq!(cache_data.len(), capacity);
cache_data
}
fn compute_emit_hash(&self, bytes: &[u8]) -> u64 {
// it's ok to use an insecure hash here because
// if someone can change the emit source then they
// can also change the version hash
crate::cache::FastInsecureHasher::new_without_deno_version() // use cli_version property instead
.write(bytes)
// emit should not be re-used between cli versions
.write_str(self.cli_version)
.finish()
}
}
#[cfg(test)]
mod test {
use test_util::TempDir;
use super::*;
#[test]
pub fn emit_cache_general_use() {
let temp_dir = TempDir::new();
let disk_cache = DiskCache::new(temp_dir.path().as_path());
let cache = EmitCache {
disk_cache: disk_cache.clone(),
file_serializer: EmitFileSerializer {
cli_version: "1.0.0",
},
emit_failed_flag: Default::default(),
};
let specifier1 =
ModuleSpecifier::from_file_path(temp_dir.path().join("file1.ts"))
.unwrap();
let specifier2 =
ModuleSpecifier::from_file_path(temp_dir.path().join("file2.ts"))
.unwrap();
assert_eq!(cache.get_emit_code(&specifier1, 1), None);
let emit_code1 = "text1".to_string();
let emit_code2 = "text2".to_string();
cache.set_emit_code(&specifier1, 10, emit_code1.as_bytes());
cache.set_emit_code(&specifier2, 2, emit_code2.as_bytes());
// providing the incorrect source hash
assert_eq!(cache.get_emit_code(&specifier1, 5), None);
// providing the correct source hash
assert_eq!(
cache.get_emit_code(&specifier1, 10),
Some(emit_code1.clone()),
);
assert_eq!(cache.get_emit_code(&specifier2, 2), Some(emit_code2));
// try changing the cli version (should not load previous ones)
let cache = EmitCache {
disk_cache: disk_cache.clone(),
file_serializer: EmitFileSerializer {
cli_version: "2.0.0",
},
emit_failed_flag: Default::default(),
};
assert_eq!(cache.get_emit_code(&specifier1, 10), None);
cache.set_emit_code(&specifier1, 5, emit_code1.as_bytes());
// recreating the cache should still load the data because the CLI version is the same
let cache = EmitCache {
disk_cache,
file_serializer: EmitFileSerializer {
cli_version: "2.0.0",
},
emit_failed_flag: Default::default(),
};
assert_eq!(cache.get_emit_code(&specifier1, 5), Some(emit_code1));
// adding when already exists should not cause issue
let emit_code3 = "asdf".to_string();
cache.set_emit_code(&specifier1, 20, emit_code3.as_bytes());
assert_eq!(cache.get_emit_code(&specifier1, 5), None);
assert_eq!(cache.get_emit_code(&specifier1, 20), Some(emit_code3));
}
}