diff --git a/.gitignore b/.gitignore index b53b70725..5f6d7080f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ **/cov/ /crypto/_wasm/target deno.lock +/console/testdata/unicode_width_crate/target diff --git a/_tools/check_doc_imports.ts b/_tools/check_doc_imports.ts index 84a86a4d2..045b4d753 100644 --- a/_tools/check_doc_imports.ts +++ b/_tools/check_doc_imports.ts @@ -20,7 +20,7 @@ const EXCLUDED_PATHS = [ const ROOT = new URL("../", import.meta.url); const ROOT_LENGTH = ROOT.pathname.slice(0, -1).length; -const RX_JSDOC_COMMENT = /\*\*[^*]*\*+(?:[^/*][^*]*\*+)*/mg; +const RX_JSDOC_COMMENT = /\*\*[^*]*\*+(?:[^/*][^*]*\*+)*/gm; const RX_JSDOC_REMOVE_LEADING_ASTERISK = /^\s*\* ?/gm; const RX_CODE_BLOCK = /`{3}([\w]*)\n([\S\s]+?)\n`{3}/gm; diff --git a/console/_data.json b/console/_data.json new file mode 100644 index 000000000..856994732 --- /dev/null +++ b/console/_data.json @@ -0,0 +1,17 @@ +{ + "UNICODE_VERSION": "15.0.0", + "tables": [ + { + "d": "AAECAwQFBgcICQoLDA0OAw8DDwkQCRESERIA", + "r": "AQEBAgEBAQEBAQEBAQEBBwEHAVABBwcBBwF4" + }, + { + "d": "AAECAwQFBgcGCAYJCgsMDQ4PEAYREhMUBhUWFxgZGhscHR4fICEiIyIkJSYnKCkqJSssLS4vMDEyMzQ1Njc4OToGOzwKBj0GPj9AQUIGQwZEBkVGR0hJSktMTQZOBgoGT1BRUlNUVVZXWFkGWgZbBlxdXl1fYGFiY2RlZmdoBmlqBmsGAQZsBm1uO29wcXI7czt0dXZ3OwY7eHkGent8Bn0Gfn+AgYKDhIWGBoc7iAZdO4kGiosGAXGMBo0GjgaPBpAGkQaSBpMGlJUGlpcGmJmam5ydnp+gLgahLKIGo6SlpganqKmqqwasBq0Grq8GsLGyswa0BrUGtre4Brm6uwZHvAa9vga/wME7wjvDxAbFO8bHO8gGyQbKywbMzQbOBs/Q0QbSBr8GvgbT1AbUBtUG1gbXBtjZ2tsG3N0G3t/g4eLjO+Tl5ufoO+k76gbrBuztOwbu7/AGO+XxCgYKCwZd8g==", + "r": "AQEBAQEBAQEBAQEBAQEBAQEBAQMBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQECBQEOAQEBAQEBAQEBAwEBAQEBAQEBAQIBAwEIAQEBAQEBAQEBAQEBAQIBAQEBAQEBAQEBAQEBAQEBDQEBBQEBAQEBAgEBAwEBAQEBAQEBAQEBbQHaAQEFAQEBBAECAQEBAQEBAQEBAwGuASFkCAELAQEBAQEBAQEHAQMBAQEaAQIBCAEFAQEBAQEBAQEBAQEBAQEBAQEBAQECAQEBAQIBAQEBAQEBAwEDAQEBAQEBAQUBAQEBAQEBBAEBAVIBAdkBARABAQFfARMBAYoBBAEBBQEmAUkBAQcBAQIBHgEBARUBAQEBAQUBAQcBDwEBARoBAgEBAQEBAQECAQEBAQEBAQEBAQEBAQEBAQMBBAEBAgEBAQEUfwEBAQIDAXj/AQ==" + }, + { + "d": "AFUVAF3Xd3X/93//VXVVV9VX9V91f1/31X93XVXdVdVV9dVV/VVX1X9X/131VfXVVXV3V1VdVV1V1/1dV1X/3VUAVf3/3/9fVf3/3/9fVV1V/11VFQBQVQEAEEEQVQBQVQBAVFUVAFVUVQUAEAAUBFBVFVFVAEBVBQBUVRUAVVFVBRAAAVBVAVVQVQBVBQBAVUVUAQBUUQEAVQVVUVVUAVRVUVUFVUVBVVRBFRRQUVVQUVUBEFRRVQVVBQBRVRQBVFVRVUFVBVVFVVRVUVVUVQRUBQRQVUFVBVVFVVBVBVVQVRVUAVRVUVUFVVFVRVUFRFVRAEBVFQBAVVEAVFUAQFVQVRFRVQEAQAAEVQEAAQBUVUVVAQQAQVVQBVRVAVRVRUFVUVVRVaoAVQFVBVRVBVUFVQVVEABQVUUBAFVRVRUAVUFVUVVAFVRVRVUBVRUUVUUAQEQBAFQVABRVAEBVAFUEQFRFVRUAVVBVBVAQUFVFUBFQVQAFVUAABABUUVVUUFUVANd/X3//BUD3XdV1VQAEAFVXVdX9V1VXVQBUVdVdVdV1VX111VXVV9V//1X/X1VdVf9fVV9VdVdV1VX31dfVXXX9193/d1X/VV9VV3VVX//1VfVVXVVdVdVVdVWlVWlVqVaWVf/f/1X/Vf/1X1Xf/19V9VVf9df1X1X1X1XVVWlVfV31VVpVd1V3VapV33/fVZVVlVX1WVWlVelV+v/v//7/31Xv/6/77/tVWaVVVlVdVWaVmlX1/1WpVVZVlVWVVlVW+V9VFVBVAKqaqlWqWlWqVaoKoKpqqapqgapVqaqpqmqqVapqqv+qVqpqVRVAAFBVBVVQVUUVVUFVVFVQVQBQVRVVBQBQVRUAUFWqVkBVFQVQVVFVAUBBVRVVVFVUVQQUVAVRVVBVRVVRVFFVqlVFVQCqWlUAqmqqaqpVqlZVqmpVAV1VUVVUVQVAVQFBVQBVQBVVQVUAVRVUVQFVBQBUVQVQVVFVAEBVFFRVFVBVFUBBUUVVUVVAVRUAAQBUVRVVUFUFAEBVARRVFVAEVUVVFQBAVVRVBQBUAFRVAAVEVUVVFQBEFQRVBVBVEFRVUFUVAEARVFUVUQAQVQEFEABVFQBBVRVEFVUABVVUVQEAQFUVABRAVRVVAUABVQUAQFBVAEAAEFUFAAUABEFVAUBFEAAQVVARVRVUVVBVBUBVRFVUFQBQVQBUVQBAVRVVFUBVqlRVWlWqVapaVapWVaqpqmmqalVlVWpZVapVqlVBAFUAUABAVRVQVRUAQAEAVQVQVQVUVQBAFQBUVVFVVFUVAAEAVQBAABQAEARAVUVVAFUAQFUAQFVWVZVV/39V/1//X1X/76uq6v9XVWpVqlWqVlVaVapaVapWVamqmqqmqlWqapWqVapWqmqmqpaqWlWVaqpVZVVpVVZVlapVqlpVVmqpVapVlVZVqlZVqlVWVapqqpqqVapWqlZVqpqqWlWlqlWqVlWqVlVRVQD/Xw==", + "r": "CBcBCAEBAQEBAQEBAQECAQEBAQEBAQEBAQEBAQMBAQECAQEBAQEBAQEBAQEBBAEBGAEDAQwBAwEIAQEBAQEBAQgcCAEDAQEBAQEDAQEBDQEDEAELAQEBEQEKAQEBDgEBAgIBAQoBBQQBCAEBAQEBAQEHAQEHBgEWAQIBDQECAgEFAQECAgEKAQ0BAQIKAQ0BDQEBAQEBAQEBAgEHAQ4BAQEBAQQBBgEBDgEBAQEBAQcBAQIBAQEBBAEFAQEBDgEBAQEBAQECAQcBDwECAQwCDQEBAQEBAQECAQgBAQEEAQcBDQEBAQEBAQQBBwERAQEBARYBAQECAQEBGAECAQIBARIBBgEBDQECAQEBAQECAQgBAQEZAQEBAgYBAQEDAQECAQEBAQMBCBgIBwEMAQEGAQcBBwEQAQEBAQEBAgIBCgEBDQEIAQ0BAQEBAQEBBgEBDgEBAQEBAQEBAgEMBwEMAQwBAQEBCQECAwEHAQEBAQ0BAQEBDgIBBgEDAQEBAQEBAQMBAQEBAgEBAQEBAQEBCAEBAgEBAQEBAQkBCAgBAwECAQEBAgEBAQkBAQEBAwECAQMBAQIBBwEFAQEDAQYBAQEBAgEBAQEBAQEBAQECAgEDAQECBAIDAgIBBQEEAQEBAwEPAQEBCyIBCAEJAwQBAQIBAQEBAgECAQEBAQMBAQEBAwEBAQEBAQEBAQgBAQMDAgEBAwEEAQIBAQEBBAEBAQEBAQECAQEBAQEBAQEBAQEHAQQBAwEBAQcBAgUBBgECAQYBAQwBAQEUAQELCAYBFgMFAQYDAQoBAQMBARQBAQkBAQoBBgEVAwsBCgIPAQ0BGQEBAgEHARQBAwIBBgEBAQUBBgQBAgEJAQEBBQECAQMHAQELAQECCQEQAQECAgECAQsBDAEBAQEBCgEBAQsBAQEECQ4BCAQCAQEECAEEAQEFCAEPAQEEAQEPAQgBFAEBAQEBAQEKAQEJAQ8BEAEBEwEBAQIBCwEBDgENAwEKAQEBAQELAQEBAQECAQwBCAEBAQEBDgEDAQwBAQECAQEXAQEBAQEHAgEBBQEIAQEBAQEQAgEBBQEUAQEBAQEbAQEBAQEGARQBAQEBARkBAQEBCQEBAQEQAQIBDwEBARQBAQEBBwEBAQkBAQEBAQECAQEBCwECAQEVAQEBAQQBBQEBAQEOAQEBAQEBEgEBFgEBAgEMAQEBAQ8BAQMBFgEBDgEBBQEPAQETAQECAQMOAgUBCgIBGQEBAQEIAQMBBwEBAwECEwgBAQcLAQUBFwEBAQEDAQEBBwEBBAEBDg0BAQwBAQEDAQQBAQEDBAEBBAEBAQEBEAEPAQgBAQsBAQ4BEQEMAgEBBwEOAQEHAQEBAQQBBAEDCwECAQEBAwEBBggBAgEBAREBBQMKAQEBAwQCEQEBHgEPAQIBAQYEAQYBAwEUAQUMAQEBAQEBAQECAQEBAgEIAwEBBgsBAgEODAMBAgEBCwEBAQEBAwECAQECAQEBBwgPAQ==" + } + ] +} diff --git a/console/_rle.ts b/console/_rle.ts new file mode 100644 index 000000000..415ba2ea3 --- /dev/null +++ b/console/_rle.ts @@ -0,0 +1,39 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +import { assert } from "../_util/asserts.ts"; + +export function runLengthEncode(arr: number[]) { + const data: number[] = []; + const runLengths: number[] = []; + + let prev: symbol | number = Symbol("none"); + + for (const x of arr) { + if (x === prev) { + ++runLengths[runLengths.length - 1]; + } else { + prev = x; + data.push(x); + runLengths.push(1); + } + } + + assert(runLengths.every((r) => r < 0x100)); + + return { + d: btoa(String.fromCharCode(...data)), + r: btoa(String.fromCharCode(...runLengths)), + }; +} + +export function runLengthDecode({ d, r }: { d: string; r: string }) { + const data = atob(d); + const runLengths = atob(r); + let out = ""; + + for (const [i, ch] of [...runLengths].entries()) { + out += data[i].repeat(ch.codePointAt(0)!); + } + + return Uint8Array.from([...out].map((x) => x.codePointAt(0)!)); +} diff --git a/console/_tools/compare_with_rust.ts b/console/_tools/compare_with_rust.ts new file mode 100644 index 000000000..5575eb602 --- /dev/null +++ b/console/_tools/compare_with_rust.ts @@ -0,0 +1,62 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +// Run this test with `deno test --unstable -A compare_with_rust.ts` + +import { unicodeWidth } from "../unicode_width.ts"; +import { fromFileUrl } from "../../path/mod.ts"; +import fc from "https://esm.sh/fast-check@3.8.0"; + +// Note: This test is optional. It requires the Rust code to be compiled locally +Deno.test("fast-check equality with unicode_width Rust crate", async (t) => { + const libName = ({ + darwin: "libunicode_width_crate.dylib", + linux: "libunicode_width_crate.so", + windows: "libunicode_width_crate.dll", + // deno-lint-ignore no-explicit-any + } as any)[Deno.build.os]; + const libPath = fromFileUrl( + import.meta.resolve( + `../testdata/unicode_width_crate/target/debug/${libName}`, + ), + ); + + const toCString = (str: string) => new TextEncoder().encode(str + "\0"); + + // @ts-ignore type-check errors if unavailable due to lack of --unstable flag + let dylib: Deno.DynamicLibrary<{ + unicode_width: { parameters: ["buffer"]; result: "usize" }; + }>; + + try { + dylib = Deno.dlopen(libPath, { + unicode_width: { parameters: ["buffer"], result: "usize" }, + }); + + for ( + const arbitrary of [ + "string", + "unicodeString", + "fullUnicodeString", + ] as const + ) { + await t.step({ + name: `fc.${arbitrary}()`, + fn() { + // To avoid esm.sh statically analyzed + fc.assert( + fc.property( + fc[arbitrary](), + // JSON stringify to allow "\0" chars to cross FFI boundary in a null-terminated string + // deno-lint-ignore no-explicit-any + (str: any) => + unicodeWidth(str) === + dylib.symbols.unicode_width(toCString(JSON.stringify(str))), + ), + ); + }, + }); + } + } finally { + // deno-lint-ignore no-extra-non-null-assertion + dylib!?.close(); + } +}); diff --git a/console/_tools/generate_data.ts b/console/_tools/generate_data.ts new file mode 100755 index 000000000..473982bce --- /dev/null +++ b/console/_tools/generate_data.ts @@ -0,0 +1,327 @@ +#!/usr/bin/env -S deno run --allow-net --allow-read --allow-write +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +// Ported from unicode_width rust crate, Copyright (c) 2015 The Rust Project Developers. MIT license. + +import { assert } from "../../_util/asserts.ts"; +import { runLengthEncode } from "../_rle.ts"; + +// change this line and re-run the script to update for new Unicode versions +const UNICODE_VERSION = "15.0.0"; + +const NUM_CODEPOINTS = 0x110000; +const MAX_CODEPOINT_BITS = Math.ceil(Math.log2(NUM_CODEPOINTS - 1)); + +enum OffsetType { + U2 = 2, + U4 = 4, + U8 = 8, +} + +type CodePoint = number; +type BitPos = number; + +const TABLE_CFGS: [BitPos, BitPos, OffsetType][] = [ + [13, MAX_CODEPOINT_BITS, OffsetType.U8], + [6, 13, OffsetType.U8], + [0, 6, OffsetType.U2], +]; + +async function fetchUnicodeData(filename: string, version: string) { + const res = await fetch( + `https://www.unicode.org/Public/${version}/ucd/${filename}`, + ); + + if (!res.ok) { + throw new Error(`Failed to fetch ${filename}`); + } + + return await res.text(); +} + +enum EffectiveWidth { + Zero = 0, + Narrow = 1, + Wide = 2, + Ambiguous = 3, +} + +const widthCodes = { + N: EffectiveWidth.Narrow, + Na: EffectiveWidth.Narrow, + H: EffectiveWidth.Narrow, + W: EffectiveWidth.Wide, + F: EffectiveWidth.Wide, + A: EffectiveWidth.Ambiguous, +}; + +async function loadEastAsianWidths(version: string) { + const eaw = await fetchUnicodeData("EastAsianWidth.txt", version); + + const single = /^([0-9A-F]+);(\w+)/; + const multiple = /^([0-9A-F]+)\.\.([0-9A-F]+);(\w+)/; + + const widthMap: EffectiveWidth[] = []; + let current = 0; + + for (const line of eaw.split("\n")) { + let rawData: [string, string, string] | null = null; + + let match: RegExpMatchArray | null = null; + // deno-lint-ignore no-cond-assign + if (match = line.match(single)) { + rawData = [match[1], match[1], match[2]]; + // deno-lint-ignore no-cond-assign + } else if (match = line.match(multiple)) { + rawData = [match[1], match[2], match[3]]; + } else { + continue; + } + + const low = parseInt(rawData[0], 16); + const high = parseInt(rawData[1], 16); + const width = widthCodes[rawData[2] as keyof typeof widthCodes]; + + assert(current <= high); + + while (current <= high) { + widthMap.push(current < low ? EffectiveWidth.Narrow : width); + ++current; + } + } + + while (widthMap.length < NUM_CODEPOINTS) { + widthMap.push(EffectiveWidth.Narrow); + } + + return widthMap; +} + +async function loadZeroWidths(version: string) { + const categories = await fetchUnicodeData("UnicodeData.txt", version); + + const zwMap: boolean[] = []; + let current = 0; + + for (const line of categories.split("\n")) { + const rawData = line.split(";"); + + if (rawData.length !== 15) { + continue; + } + const [codepoint, name, catCode] = [ + parseInt(rawData[0], 16), + rawData[1], + rawData[2], + ]; + + const zeroWidth = ["Cc", "Cf", "Mn", "Me"].includes(catCode); + + assert(current <= codepoint); + + while (current <= codepoint) { + if (name.endsWith(", Last>") || (current === codepoint)) { + zwMap.push(zeroWidth); + } else { + zwMap.push(false); + } + ++current; + } + } + while (zwMap.length < NUM_CODEPOINTS) { + zwMap.push(false); + } + + return zwMap; +} + +class Bucket { + entrySet: Set; + widths: EffectiveWidth[]; + + constructor() { + this.entrySet = new Set(); + this.widths = []; + } + + append(codepoint: CodePoint, width: EffectiveWidth) { + this.entrySet.add(JSON.stringify([codepoint, width])); + this.widths.push(width); + } + + tryExtend(attempt: Bucket) { + const [less, more] = [this.widths, attempt.widths].sort((a, b) => + a.length - b.length + ); + + if (!more.slice(0, less.length).every((v, i) => v === less[i])) { + return false; + } + + for (const x of attempt.entrySet.values()) { + this.entrySet.add(x); + } + + this.widths = more; + + return true; + } + + entries() { + const result = [...this.entrySet] + .map((x) => JSON.parse(x) as [CodePoint, EffectiveWidth]); + + return result.sort((a, b) => a[0] - b[0]); + } + + width() { + return new Set(this.widths).size === 1 ? this.widths[0] : null; + } +} + +function makeBuckets( + entries: [CodePoint, EffectiveWidth][], + lowBit: BitPos, + capBit: BitPos, +) { + const numBits = capBit - lowBit; + assert(numBits > 0); + const buckets = Array.from({ length: 2 ** numBits }, () => new Bucket()); + + const mask = (1 << numBits) - 1; + + for (const [codepoint, width] of entries) { + buckets[(codepoint >> lowBit) & mask].append(codepoint, width); + } + + return buckets; +} + +class Table { + lowBit: BitPos; + capBit: BitPos; + offsetType: OffsetType; + entries: number[]; + indexed: Bucket[]; + + constructor( + entryGroups: [CodePoint, EffectiveWidth][][], + lowBit: BitPos, + capBit: BitPos, + offsetType: OffsetType, + ) { + this.lowBit = lowBit; + this.capBit = capBit; + this.offsetType = offsetType; + this.entries = []; + this.indexed = []; + + const buckets = entryGroups.flatMap((entries) => + makeBuckets(entries, this.lowBit, this.capBit) + ); + + for (const bucket of buckets) { + let extended = false; + for (const [i, existing] of this.indexed.entries()) { + if (existing.tryExtend(bucket)) { + this.entries.push(i); + extended = true; + break; + } + } + if (!extended) { + this.entries.push(this.indexed.length); + this.indexed.push(bucket); + } + } + + for (const index of this.entries) { + assert(index < (1 << this.offsetType)); + } + } + + indicesToWidths() { + if (!this.indexed) { + throw new Error(`Can't call indicesToWidths twice on the same Table`); + } + + this.entries = this.entries.map((i) => { + const width = this.indexed[i].width(); + if (width == null) throw new TypeError("width cannot be null"); + return width; + }); + + this.indexed = null as unknown as Bucket[]; + } + + get buckets() { + if (!this.indexed) { + throw new Error(`Can't access buckets after calling indicesToWidths`); + } + + return this.indexed; + } + + toBytes() { + const entriesPerByte = Math.trunc(8 / this.offsetType); + const byteArray: number[] = []; + for (let i = 0; i < this.entries.length; i += entriesPerByte) { + let byte = 0; + for (let j = 0; j < entriesPerByte; ++j) { + byte |= this.entries[i + j] << (j * this.offsetType); + } + byteArray.push(byte); + } + + return byteArray; + } +} + +function makeTables( + tableCfgs: [BitPos, BitPos, OffsetType][], + entries: [CodePoint, EffectiveWidth][], +) { + const tables: Table[] = []; + let entryGroups = [entries]; + + for (const [lowBit, capBit, offsetType] of tableCfgs) { + const table = new Table(entryGroups, lowBit, capBit, offsetType); + entryGroups = table.buckets.map((bucket) => bucket.entries()); + + tables.push(table); + } + + return tables; +} + +export async function tables(version: string) { + console.info(`Generating tables for Unicode ${version}`); + + const eawMap = await loadEastAsianWidths(version); + const zwMap = await loadZeroWidths(version); + + const widthMap = eawMap.map((x, i) => zwMap[i] ? EffectiveWidth.Zero : x); + + widthMap[0x00AD] = EffectiveWidth.Narrow; + + for (let i = 0x1160; i < 0x11FF + 1; ++i) { + widthMap[i] = EffectiveWidth.Zero; + } + + const tables = makeTables(TABLE_CFGS, [...widthMap.entries()]); + + tables[tables.length - 1].indicesToWidths(); + + return tables; +} + +const data = { + UNICODE_VERSION, + tables: (await tables(UNICODE_VERSION)).map((table) => + runLengthEncode(table.toBytes()) + ), +}; + +assert(data.UNICODE_VERSION.split(".").length === 3); +assert(data.tables.length === 3); + +await Deno.writeTextFile("../_data.json", JSON.stringify(data, null, 2) + "\n"); diff --git a/console/mod.ts b/console/mod.ts new file mode 100644 index 000000000..cf8d54435 --- /dev/null +++ b/console/mod.ts @@ -0,0 +1,9 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +/** + * Functions for console-related tasks such as TTY text layout + * + * @module + */ + +export * from "./unicode_width.ts"; diff --git a/console/testdata/unicode_width_crate/Cargo.lock b/console/testdata/unicode_width_crate/Cargo.lock new file mode 100644 index 000000000..b1653f8cb --- /dev/null +++ b/console/testdata/unicode_width_crate/Cargo.lock @@ -0,0 +1,53 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "itoa" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" + +[[package]] +name = "libc" +version = "0.2.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" + +[[package]] +name = "ryu" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" + +[[package]] +name = "serde" +version = "1.0.159" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" + +[[package]] +name = "serde_json" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "unicode_width_crate" +version = "0.1.0" +dependencies = [ + "libc", + "serde_json", + "unicode-width", +] diff --git a/console/testdata/unicode_width_crate/Cargo.toml b/console/testdata/unicode_width_crate/Cargo.toml new file mode 100644 index 000000000..412b11172 --- /dev/null +++ b/console/testdata/unicode_width_crate/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "unicode_width_crate" +version = "0.1.0" +edition = "2021" + +[dependencies] +libc = "0.2.140" +serde_json = "1.0.95" +unicode-width = "0.1.10" + +[lib] +crate-type = ["cdylib"] diff --git a/console/testdata/unicode_width_crate/src/lib.rs b/console/testdata/unicode_width_crate/src/lib.rs new file mode 100644 index 000000000..e6b1a34c8 --- /dev/null +++ b/console/testdata/unicode_width_crate/src/lib.rs @@ -0,0 +1,22 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use libc::c_char; +use std::ffi::CStr; +use serde_json; +use unicode_width::UnicodeWidthStr; + +fn from_c_char(ptr: *const c_char) -> &'static str { + let c_str = unsafe { + assert!(!ptr.is_null()); + + CStr::from_ptr(ptr) + }; + return c_str.to_str().unwrap(); +} + +#[no_mangle] +pub extern "C" fn unicode_width(json_str: *const c_char) -> usize { + let json_str = from_c_char(json_str); + + serde_json::from_str::(json_str).unwrap().width() +} diff --git a/console/unicode_width.ts b/console/unicode_width.ts new file mode 100644 index 000000000..4a78b0c74 --- /dev/null +++ b/console/unicode_width.ts @@ -0,0 +1,62 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +// Ported from unicode_width rust crate, Copyright (c) 2015 The Rust Project Developers. MIT license. + +import data from "./_data.json" assert { type: "json" }; +import { runLengthDecode } from "./_rle.ts"; + +let tables: Uint8Array[] | null = null; +function lookupWidth(cp: number) { + if (!tables) tables = data.tables.map(runLengthDecode); + + const t1Offset = tables[0][(cp >> 13) & 0xff]; + const t2Offset = tables[1][128 * t1Offset + ((cp >> 6) & 0x7f)]; + const packedWidths = tables[2][16 * t2Offset + ((cp >> 2) & 0xf)]; + + const width = (packedWidths >> (2 * (cp & 0b11))) & 0b11; + + return width === 3 ? 1 : width; +} + +const cache = new Map(); +function charWidth(ch: string) { + if (cache.has(ch)) return cache.get(ch)!; + + const cp = ch.codePointAt(0)!; + let v: number | null = null; + + if (cp < 0x7f) { + v = cp >= 0x20 ? 1 : cp === 0 ? 0 : null; + } else if (cp >= 0xa0) { + v = lookupWidth(cp); + } else { + v = null; + } + + cache.set(ch, v); + return v; +} + +/** + * Get the width of a string's constituent characters in columns in TTY-like + * environments. + * + * Combine with `stripColor` from `fmt/colors.ts` to get the expected physical + * width of a string in the console. + * + * @example + * ```ts + * import { unicodeWidth } from "https://deno.land/std@$STD_VERSION/console/unicode_width.ts"; + * import { assertEquals } from "https://deno.land/std@$STD_VERSION/testing/asserts.ts"; + * import { stripColor } from "https://deno.land/std@$STD_VERSION/fmt/colors.ts"; + * + * assertEquals(unicodeWidth("hello world"), 11); + * assertEquals(unicodeWidth("天地玄黃宇宙洪荒"), 16); + * assertEquals(unicodeWidth("fullwidth"), 18); + * assertEquals(unicodeWidth(stripColor("\x1b[36mголубой\x1b[39m")), 7); + * assertEquals(unicodeWidth(stripColor("\x1b[31m紅色\x1b[39m")), 4); + * assertEquals(unicodeWidth(stripColor("\x1B]8;;https://deno.land\x07🦕\x1B]8;;\x07")), 2); + * ``` + */ +export function unicodeWidth(str: string) { + return [...str].map((ch) => charWidth(ch) ?? 0).reduce((a, b) => a + b, 0); +} diff --git a/console/unicode_width_test.ts b/console/unicode_width_test.ts new file mode 100644 index 000000000..72c99f74b --- /dev/null +++ b/console/unicode_width_test.ts @@ -0,0 +1,48 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +import { unicodeWidth } from "./unicode_width.ts"; +import { assertEquals } from "../testing/asserts.ts"; + +Deno.test("unicodeWidth", async (t) => { + await t.step("ASCII", () => { + const lorem = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; + + assertEquals(unicodeWidth(lorem), lorem.length); + }); + + await t.step("CJK", () => { + const qianZiWen = + "天地玄黃宇宙洪荒日月盈昃辰宿列張寒來暑往秋收冬藏閏餘成歲律呂調陽雲騰致雨露結爲霜金生麗水玉出崑岡劍號巨闕珠稱夜光果珍李柰菜重芥薑海鹹河淡鱗潛羽翔龍師火帝鳥官人皇始制文字乃服衣裳推位讓國有虞陶唐弔民伐罪周發殷湯坐朝問道垂拱平章愛育黎首臣伏戎羌遐邇壹體率賓歸王鳴鳳在樹白駒食場化被草木賴及萬方蓋此身髮四大五常恭惟鞠養豈敢毀傷女慕貞絜男效才良知過必改得能莫忘罔談彼短靡恃己長信使可覆器欲難量墨悲絲淬詩讚羔羊"; + + assertEquals(unicodeWidth(qianZiWen), qianZiWen.length * 2); + }); + + await t.step("Unicode normalization", async (t) => { + const str = "á"; + + await t.step("NFC", () => { + const nfc = str.normalize("NFC"); + + assertEquals(nfc.length, 1); + assertEquals(unicodeWidth(nfc), 1); + }); + + await t.step("NFD", () => { + const nfd = str.normalize("NFD"); + + assertEquals(nfd.length, 2); + assertEquals(unicodeWidth(nfd), 1); + }); + }); + + await t.step("Emoji", () => { + assertEquals(unicodeWidth("👩"), 2); // Woman + assertEquals(unicodeWidth("🔬"), 2); // Microscope + // Note: Returns 4 for the below case, following the upstream crate + // `unicode_width`. Another possibility might be returning 2, which is what + // `npm:string-width` returns. + // See discussion at https://github.com/denoland/deno_std/pull/3297#discussion_r1166289430 + assertEquals(unicodeWidth("👩‍🔬"), 4); // Woman Scientist + }); +});