url: handle "unsafe" characters properly in pathToFileURL

Co-authored-by: EarlyRiser42 <tkfydtls464@gmail.com>
PR-URL: https://github.com/nodejs/node/pull/54545
Fixes: https://github.com/nodejs/node/issues/54515
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
This commit is contained in:
Antoine du Hamel 2024-10-18 09:20:43 +02:00 committed by GitHub
parent 56e5bd8d2a
commit 7ae193d19f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 68 additions and 43 deletions

View File

@ -49,7 +49,7 @@ function tryGetCwd() {
let evalIndex = 0;
function getEvalModuleUrl() {
return pathToFileURL(`${process.cwd()}/[eval${++evalIndex}]`).href;
return `${pathToFileURL(process.cwd())}/[eval${++evalIndex}]`;
}
/**

View File

@ -1498,44 +1498,75 @@ function fileURLToPath(path, options = kEmptyObject) {
return (windows ?? isWindows) ? getPathFromURLWin32(path) : getPathFromURLPosix(path);
}
// The following characters are percent-encoded when converting from file path
// to URL:
// - %: The percent character is the only character not encoded by the
// `pathname` setter.
// - \: Backslash is encoded on non-windows platforms since it's a valid
// character but the `pathname` setters replaces it by a forward slash.
// - LF: The newline character is stripped out by the `pathname` setter.
// (See whatwg/url#419)
// - CR: The carriage return character is also stripped out by the `pathname`
// setter.
// - TAB: The tab character is also stripped out by the `pathname` setter.
// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
const percentRegEx = /%/g;
const backslashRegEx = /\\/g;
const newlineRegEx = /\n/g;
const carriageReturnRegEx = /\r/g;
const tabRegEx = /\t/g;
const questionRegex = /\?/g;
const quoteRegEx = /"/g;
const hashRegex = /#/g;
const spaceRegEx = / /g;
const questionMarkRegex = /\?/g;
const openSquareBracketRegEx = /\[/g;
const backslashRegEx = /\\/g;
const closeSquareBracketRegEx = /]/g;
const caretRegEx = /\^/g;
const verticalBarRegEx = /\|/g;
const tildeRegEx = /~/g;
function encodePathChars(filepath, options = kEmptyObject) {
const windows = options?.windows;
if (StringPrototypeIndexOf(filepath, '%') !== -1)
if (StringPrototypeIncludes(filepath, '%')) {
filepath = RegExpPrototypeSymbolReplace(percentRegEx, filepath, '%25');
// In posix, backslash is a valid character in paths:
if (!(windows ?? isWindows) && StringPrototypeIndexOf(filepath, '\\') !== -1)
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C');
if (StringPrototypeIndexOf(filepath, '\n') !== -1)
filepath = RegExpPrototypeSymbolReplace(newlineRegEx, filepath, '%0A');
if (StringPrototypeIndexOf(filepath, '\r') !== -1)
filepath = RegExpPrototypeSymbolReplace(carriageReturnRegEx, filepath, '%0D');
if (StringPrototypeIndexOf(filepath, '\t') !== -1)
}
if (StringPrototypeIncludes(filepath, '\t')) {
filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09');
}
if (StringPrototypeIncludes(filepath, '\n')) {
filepath = RegExpPrototypeSymbolReplace(newlineRegEx, filepath, '%0A');
}
if (StringPrototypeIncludes(filepath, '\r')) {
filepath = RegExpPrototypeSymbolReplace(carriageReturnRegEx, filepath, '%0D');
}
if (StringPrototypeIncludes(filepath, ' ')) {
filepath = RegExpPrototypeSymbolReplace(spaceRegEx, filepath, '%20');
}
if (StringPrototypeIncludes(filepath, '"')) {
filepath = RegExpPrototypeSymbolReplace(quoteRegEx, filepath, '%22');
}
if (StringPrototypeIncludes(filepath, '#')) {
filepath = RegExpPrototypeSymbolReplace(hashRegex, filepath, '%23');
}
if (StringPrototypeIncludes(filepath, '?')) {
filepath = RegExpPrototypeSymbolReplace(questionMarkRegex, filepath, '%3F');
}
if (StringPrototypeIncludes(filepath, '[')) {
filepath = RegExpPrototypeSymbolReplace(openSquareBracketRegEx, filepath, '%5B');
}
// Back-slashes must be special-cased on Windows, where they are treated as path separator.
if (!options.windows && StringPrototypeIncludes(filepath, '\\')) {
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C');
}
if (StringPrototypeIncludes(filepath, ']')) {
filepath = RegExpPrototypeSymbolReplace(closeSquareBracketRegEx, filepath, '%5D');
}
if (StringPrototypeIncludes(filepath, '^')) {
filepath = RegExpPrototypeSymbolReplace(caretRegEx, filepath, '%5E');
}
if (StringPrototypeIncludes(filepath, '|')) {
filepath = RegExpPrototypeSymbolReplace(verticalBarRegEx, filepath, '%7C');
}
if (StringPrototypeIncludes(filepath, '~')) {
filepath = RegExpPrototypeSymbolReplace(tildeRegEx, filepath, '%7E');
}
return filepath;
}
function pathToFileURL(filepath, options = kEmptyObject) {
const windows = options?.windows;
if ((windows ?? isWindows) && StringPrototypeStartsWith(filepath, '\\\\')) {
const windows = options?.windows ?? isWindows;
if (windows && StringPrototypeStartsWith(filepath, '\\\\')) {
const outURL = new URL('file://');
// UNC path format: \\server\share\resource
// Handle extended UNC path and standard UNC path
@ -1566,20 +1597,9 @@ function pathToFileURL(filepath, options = kEmptyObject) {
);
return outURL;
}
let resolved = (windows ?? isWindows) ? path.win32.resolve(filepath) : path.posix.resolve(filepath);
const resolved = windows ? path.win32.resolve(filepath) : path.posix.resolve(filepath);
// Call encodePathChars first to avoid encoding % again for ? and #.
resolved = encodePathChars(resolved, { windows });
// Question and hash character should be included in pathname.
// Therefore, encoding is required to eliminate parsing them in different states.
// This is done as an optimization to not creating a URL instance and
// later triggering pathname setter, which impacts performance
if (StringPrototypeIndexOf(resolved, '?') !== -1)
resolved = RegExpPrototypeSymbolReplace(questionRegex, resolved, '%3F');
if (StringPrototypeIndexOf(resolved, '#') !== -1)
resolved = RegExpPrototypeSymbolReplace(hashRegex, resolved, '%23');
return new URL(`file://${resolved}`);
return new URL(`file://${encodePathChars(resolved, { windows })}`);
}
function toPathIfFileURL(fileURLOrPath) {

View File

@ -13,10 +13,7 @@ const url = require('url');
{
const fileURL = url.pathToFileURL('test\\').href;
assert.ok(fileURL.startsWith('file:///'));
if (isWindows)
assert.ok(fileURL.endsWith('/'));
else
assert.ok(fileURL.endsWith('%5C'));
assert.match(fileURL, isWindows ? /\/$/ : /%5C$/);
}
{
@ -104,6 +101,12 @@ const windowsTestCases = [
{ path: 'C:\\€', expected: 'file:///C:/%E2%82%AC' },
// Rocket emoji (non-BMP code point)
{ path: 'C:\\🚀', expected: 'file:///C:/%F0%9F%9A%80' },
// caret
{ path: 'C:\\foo^bar', expected: 'file:///C:/foo%5Ebar' },
// left bracket
{ path: 'C:\\foo[bar', expected: 'file:///C:/foo%5Bbar' },
// right bracket
{ path: 'C:\\foo]bar', expected: 'file:///C:/foo%5Dbar' },
// Local extended path
{ path: '\\\\?\\C:\\path\\to\\file.txt', expected: 'file:///C:/path/to/file.txt' },
// UNC path (see https://docs.microsoft.com/en-us/archive/blogs/ie/file-uris-in-windows)
@ -154,6 +157,8 @@ const posixTestCases = [
{ path: '/€', expected: 'file:///%E2%82%AC' },
// Rocket emoji (non-BMP code point)
{ path: '/🚀', expected: 'file:///%F0%9F%9A%80' },
// "unsafe" chars
{ path: '/foo\r\n\t<>"#%{}|^[\\~]`?bar', expected: 'file:///foo%0D%0A%09%3C%3E%22%23%25%7B%7D%7C%5E%5B%5C%7E%5D%60%3Fbar' },
];
for (const { path, expected } of windowsTestCases) {