mirror of
https://github.com/denoland/std.git
synced 2024-11-22 04:59:05 +00:00
b0803619af
* refactor: add some missing return types and mark some non-exported types as `@internal` * Update expect/fn.ts Co-authored-by: Asher Gomez <ashersaupingomez@gmail.com> * Remove @internals * nits --------- Co-authored-by: Asher Gomez <ashersaupingomez@gmail.com>
105 lines
3.1 KiB
TypeScript
105 lines
3.1 KiB
TypeScript
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
|
||
// This module is browser compatible.
|
||
|
||
export type EntityList = Record<string, string>;
|
||
|
||
const rawToEntityEntries = [
|
||
["&", "&"],
|
||
["<", "<"],
|
||
[">", ">"],
|
||
['"', """],
|
||
["'", "'"],
|
||
] as const;
|
||
|
||
const defaultEntityList: EntityList = Object.fromEntries([
|
||
...rawToEntityEntries.map(([raw, entity]) => [entity, raw]),
|
||
["'", "'"],
|
||
[" ", "\xa0"],
|
||
]);
|
||
|
||
const rawToEntity = new Map<string, string>(rawToEntityEntries);
|
||
|
||
const rawRe = new RegExp(`[${[...rawToEntity.keys()].join("")}]`, "g");
|
||
|
||
/**
|
||
* Escapes text for safe interpolation into HTML text content and quoted attributes
|
||
*
|
||
* @example
|
||
* ```ts
|
||
* import { escape } from "https://deno.land/std@$STD_VERSION/html/entities.ts";
|
||
* import { assertEquals } from "https://deno.land/std@$STD_VERSION/assert/assert_equals.ts";
|
||
*
|
||
* assertEquals(escape("<>'&AA"), "<>'&AA");
|
||
*
|
||
* // characters that don't need to be escaped will be left alone,
|
||
* // even if named HTML entities exist for them
|
||
* assertEquals(escape("þð"), "þð");
|
||
* ```
|
||
*/
|
||
export function escape(str: string): string {
|
||
return str.replaceAll(rawRe, (m) => rawToEntity.get(m)!);
|
||
}
|
||
|
||
export type UnescapeOptions = { entityList: EntityList };
|
||
|
||
const defaultUnescapeOptions: UnescapeOptions = {
|
||
entityList: defaultEntityList,
|
||
};
|
||
|
||
const MAX_CODE_POINT = 0x10ffff;
|
||
|
||
const RX_DEC_ENTITY = /&#([0-9]+);/g;
|
||
const RX_HEX_ENTITY = /&#x(\p{AHex}+);/gu;
|
||
|
||
const entityListRegexCache = new WeakMap<EntityList, RegExp>();
|
||
|
||
/**
|
||
* Unescapes HTML entities in text
|
||
*
|
||
* @example
|
||
* ```ts
|
||
* import { unescape } from "https://deno.land/std@$STD_VERSION/html/entities.ts";
|
||
* import { assertEquals } from "https://deno.land/std@$STD_VERSION/assert/assert_equals.ts";
|
||
*
|
||
* // default options (only handles &<>'" and numeric entities)
|
||
* assertEquals(unescape("<>'&AA"), "<>'&AA");
|
||
* assertEquals(unescape("þð"), "þð");
|
||
*
|
||
* // using the full named entity list from the HTML spec (~47K unminified)
|
||
* import entityList from "https://deno.land/std@$STD_VERSION/html/named_entity_list.json" assert { type: "json" };
|
||
* assertEquals(unescape("þð", { entityList }), "þð");
|
||
* ```
|
||
*/
|
||
export function unescape(
|
||
str: string,
|
||
options: Partial<UnescapeOptions> = {},
|
||
): string {
|
||
const { entityList } = { ...defaultUnescapeOptions, ...options };
|
||
|
||
let entityRe = entityListRegexCache.get(entityList);
|
||
|
||
if (!entityRe) {
|
||
entityRe = new RegExp(
|
||
`(${
|
||
Object.keys(entityList)
|
||
.sort((a, b) => b.length - a.length)
|
||
.join("|")
|
||
})`,
|
||
"g",
|
||
);
|
||
|
||
entityListRegexCache.set(entityList, entityRe);
|
||
}
|
||
|
||
return str
|
||
.replaceAll(entityRe, (m) => entityList[m])
|
||
.replaceAll(RX_DEC_ENTITY, (_, dec) => codePointStrToChar(dec, 10))
|
||
.replaceAll(RX_HEX_ENTITY, (_, hex) => codePointStrToChar(hex, 16));
|
||
}
|
||
|
||
function codePointStrToChar(codePointStr: string, radix: number) {
|
||
const codePoint = parseInt(codePointStr, radix);
|
||
|
||
return codePoint > MAX_CODE_POINT ? "<22>" : String.fromCodePoint(codePoint);
|
||
}
|