2024-01-01 21:11:32 +00:00
|
|
|
|
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
|
2023-05-30 23:49:16 +00:00
|
|
|
|
// This module is browser compatible.
|
|
|
|
|
|
2024-01-04 12:01:37 +00:00
|
|
|
|
/** Object structure for a list of HTML entities. */
|
2023-05-30 23:49:16 +00:00
|
|
|
|
export type EntityList = Record<string, string>;
|
|
|
|
|
|
|
|
|
|
const rawToEntityEntries = [
|
|
|
|
|
["&", "&"],
|
|
|
|
|
["<", "<"],
|
|
|
|
|
[">", ">"],
|
|
|
|
|
['"', """],
|
|
|
|
|
["'", "'"],
|
|
|
|
|
] as const;
|
|
|
|
|
|
|
|
|
|
const defaultEntityList: EntityList = Object.fromEntries([
|
|
|
|
|
...rawToEntityEntries.map(([raw, entity]) => [entity, raw]),
|
|
|
|
|
["'", "'"],
|
|
|
|
|
[" ", "\xa0"],
|
|
|
|
|
]);
|
|
|
|
|
|
|
|
|
|
const rawToEntity = new Map<string, string>(rawToEntityEntries);
|
|
|
|
|
|
|
|
|
|
const rawRe = new RegExp(`[${[...rawToEntity.keys()].join("")}]`, "g");
|
|
|
|
|
|
|
|
|
|
/**
|
2024-01-04 12:01:37 +00:00
|
|
|
|
* Escapes text for safe interpolation into HTML text content and quoted attributes.
|
2023-05-30 23:49:16 +00:00
|
|
|
|
*
|
2024-05-29 09:15:48 +00:00
|
|
|
|
* @example Usage
|
2023-05-30 23:49:16 +00:00
|
|
|
|
* ```ts
|
2024-04-29 02:57:30 +00:00
|
|
|
|
* import { escape } from "@std/html/entities";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
|
* import { assertEquals } from "@std/assert";
|
2023-05-30 23:49:16 +00:00
|
|
|
|
*
|
2024-06-03 04:10:27 +00:00
|
|
|
|
* assertEquals(escape("<>'&AA"), "<>'&AA");
|
2023-05-30 23:49:16 +00:00
|
|
|
|
*
|
2024-01-04 12:01:37 +00:00
|
|
|
|
* // Characters that don't need to be escaped will be left alone,
|
|
|
|
|
* // even if named HTML entities exist for them.
|
2024-06-03 04:10:27 +00:00
|
|
|
|
* assertEquals(escape("þð"), "þð");
|
2023-05-30 23:49:16 +00:00
|
|
|
|
* ```
|
2024-05-29 09:15:48 +00:00
|
|
|
|
*
|
|
|
|
|
* @param str The string to escape.
|
|
|
|
|
* @returns The escaped string.
|
2023-05-30 23:49:16 +00:00
|
|
|
|
*/
|
2023-12-19 00:26:13 +00:00
|
|
|
|
export function escape(str: string): string {
|
2023-05-30 23:49:16 +00:00
|
|
|
|
return str.replaceAll(rawRe, (m) => rawToEntity.get(m)!);
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-04 12:01:37 +00:00
|
|
|
|
/** Options for {@linkcode unescape}. */
|
2023-05-30 23:49:16 +00:00
|
|
|
|
export type UnescapeOptions = { entityList: EntityList };
|
|
|
|
|
|
|
|
|
|
const defaultUnescapeOptions: UnescapeOptions = {
|
|
|
|
|
entityList: defaultEntityList,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const MAX_CODE_POINT = 0x10ffff;
|
|
|
|
|
|
|
|
|
|
const RX_DEC_ENTITY = /&#([0-9]+);/g;
|
|
|
|
|
const RX_HEX_ENTITY = /&#x(\p{AHex}+);/gu;
|
|
|
|
|
|
|
|
|
|
const entityListRegexCache = new WeakMap<EntityList, RegExp>();
|
|
|
|
|
|
|
|
|
|
/**
|
2024-01-04 12:01:37 +00:00
|
|
|
|
* Unescapes HTML entities in text.
|
2023-05-30 23:49:16 +00:00
|
|
|
|
*
|
2024-06-03 04:10:27 +00:00
|
|
|
|
* Default options only handle `&<>'"` and numeric entities.
|
|
|
|
|
*
|
|
|
|
|
* @example Basic usage
|
2023-05-30 23:49:16 +00:00
|
|
|
|
* ```ts
|
2024-04-29 02:57:30 +00:00
|
|
|
|
* import { unescape } from "@std/html/entities";
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-06-03 04:10:27 +00:00
|
|
|
|
*
|
|
|
|
|
* assertEquals(unescape("<>'&AA"), "<>'&AA");
|
|
|
|
|
* assertEquals(unescape("þð"), "þð");
|
|
|
|
|
* ```
|
2023-05-30 23:49:16 +00:00
|
|
|
|
*
|
2024-06-03 04:10:27 +00:00
|
|
|
|
* @example Using a custom entity list
|
2023-05-30 23:49:16 +00:00
|
|
|
|
*
|
2024-06-03 04:10:27 +00:00
|
|
|
|
* This uses the full named entity list from the HTML spec (~47K un-minified)
|
|
|
|
|
*
|
|
|
|
|
* ```ts
|
|
|
|
|
* import { unescape } from "@std/html/entities";
|
2024-04-29 02:57:30 +00:00
|
|
|
|
* import entityList from "@std/html/named-entity-list.json" with { type: "json" };
|
refactor(assert,async,bytes,cli,collections,crypto,csv,data-structures,datetime,dotenv,encoding,expect,fmt,front-matter,fs,html,http,ini,internal,io,json,jsonc,log,media-types,msgpack,net,path,semver,streams,testing,text,toml,ulid,url,uuid,webgpu,yaml): import from `@std/assert` (#5199)
* refactor: import from `@std/assert`
* update
2024-06-30 08:30:10 +00:00
|
|
|
|
* import { assertEquals } from "@std/assert";
|
2024-01-04 12:01:37 +00:00
|
|
|
|
*
|
2024-06-03 04:10:27 +00:00
|
|
|
|
* assertEquals(unescape("<>'&AA", { entityList }), "<>'&AA");
|
2023-05-30 23:49:16 +00:00
|
|
|
|
* ```
|
2024-05-29 09:15:48 +00:00
|
|
|
|
*
|
|
|
|
|
* @param str The string to unescape.
|
|
|
|
|
* @param options Options for unescaping.
|
|
|
|
|
* @returns The unescaped string.
|
2023-05-30 23:49:16 +00:00
|
|
|
|
*/
|
|
|
|
|
export function unescape(
|
|
|
|
|
str: string,
|
|
|
|
|
options: Partial<UnescapeOptions> = {},
|
2023-12-19 00:26:13 +00:00
|
|
|
|
): string {
|
2023-05-30 23:49:16 +00:00
|
|
|
|
const { entityList } = { ...defaultUnescapeOptions, ...options };
|
|
|
|
|
|
|
|
|
|
let entityRe = entityListRegexCache.get(entityList);
|
|
|
|
|
|
|
|
|
|
if (!entityRe) {
|
|
|
|
|
entityRe = new RegExp(
|
|
|
|
|
`(${
|
|
|
|
|
Object.keys(entityList)
|
|
|
|
|
.sort((a, b) => b.length - a.length)
|
|
|
|
|
.join("|")
|
|
|
|
|
})`,
|
|
|
|
|
"g",
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
entityListRegexCache.set(entityList, entityRe);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return str
|
2024-02-12 03:12:28 +00:00
|
|
|
|
.replaceAll(entityRe, (m) => entityList[m]!)
|
2023-05-30 23:49:16 +00:00
|
|
|
|
.replaceAll(RX_DEC_ENTITY, (_, dec) => codePointStrToChar(dec, 10))
|
|
|
|
|
.replaceAll(RX_HEX_ENTITY, (_, hex) => codePointStrToChar(hex, 16));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function codePointStrToChar(codePointStr: string, radix: number) {
|
|
|
|
|
const codePoint = parseInt(codePointStr, radix);
|
|
|
|
|
|
|
|
|
|
return codePoint > MAX_CODE_POINT ? "<22>" : String.fromCodePoint(codePoint);
|
|
|
|
|
}
|