feat(console): add unicodeWidth for TTY text layout (#3297)

Co-authored-by: Yoshiya Hinosawa <stibium121@gmail.com>
Co-authored-by: Jesse Jackson <jsejcksn@users.noreply.github.com>
This commit is contained in:
lionel-rowe 2023-04-18 08:13:58 +01:00 committed by GitHub
parent 88b52c5576
commit 71e05aaa99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 653 additions and 1 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@
**/cov/
/crypto/_wasm/target
deno.lock
/console/testdata/unicode_width_crate/target

View File

@ -20,7 +20,7 @@ const EXCLUDED_PATHS = [
const ROOT = new URL("../", import.meta.url);
const ROOT_LENGTH = ROOT.pathname.slice(0, -1).length;
const RX_JSDOC_COMMENT = /\*\*[^*]*\*+(?:[^/*][^*]*\*+)*/mg;
const RX_JSDOC_COMMENT = /\*\*[^*]*\*+(?:[^/*][^*]*\*+)*/gm;
const RX_JSDOC_REMOVE_LEADING_ASTERISK = /^\s*\* ?/gm;
const RX_CODE_BLOCK = /`{3}([\w]*)\n([\S\s]+?)\n`{3}/gm;

17
console/_data.json Normal file
View File

@ -0,0 +1,17 @@
{
"UNICODE_VERSION": "15.0.0",
"tables": [
{
"d": "AAECAwQFBgcICQoLDA0OAw8DDwkQCRESERIA",
"r": "AQEBAgEBAQEBAQEBAQEBBwEHAVABBwcBBwF4"
},
{
"d": "AAECAwQFBgcGCAYJCgsMDQ4PEAYREhMUBhUWFxgZGhscHR4fICEiIyIkJSYnKCkqJSssLS4vMDEyMzQ1Njc4OToGOzwKBj0GPj9AQUIGQwZEBkVGR0hJSktMTQZOBgoGT1BRUlNUVVZXWFkGWgZbBlxdXl1fYGFiY2RlZmdoBmlqBmsGAQZsBm1uO29wcXI7czt0dXZ3OwY7eHkGent8Bn0Gfn+AgYKDhIWGBoc7iAZdO4kGiosGAXGMBo0GjgaPBpAGkQaSBpMGlJUGlpcGmJmam5ydnp+gLgahLKIGo6SlpganqKmqqwasBq0Grq8GsLGyswa0BrUGtre4Brm6uwZHvAa9vga/wME7wjvDxAbFO8bHO8gGyQbKywbMzQbOBs/Q0QbSBr8GvgbT1AbUBtUG1gbXBtjZ2tsG3N0G3t/g4eLjO+Tl5ufoO+k76gbrBuztOwbu7/AGO+XxCgYKCwZd8g==",
"r": "AQEBAQEBAQEBAQEBAQEBAQEBAQMBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQECBQEOAQEBAQEBAQEBAwEBAQEBAQEBAQIBAwEIAQEBAQEBAQEBAQEBAQIBAQEBAQEBAQEBAQEBAQEBDQEBBQEBAQEBAgEBAwEBAQEBAQEBAQEBbQHaAQEFAQEBBAECAQEBAQEBAQEBAwGuASFkCAELAQEBAQEBAQEHAQMBAQEaAQIBCAEFAQEBAQEBAQEBAQEBAQEBAQEBAQECAQEBAQIBAQEBAQEBAwEDAQEBAQEBAQUBAQEBAQEBBAEBAVIBAdkBARABAQFfARMBAYoBBAEBBQEmAUkBAQcBAQIBHgEBARUBAQEBAQUBAQcBDwEBARoBAgEBAQEBAQECAQEBAQEBAQEBAQEBAQEBAQMBBAEBAgEBAQEUfwEBAQIDAXj/AQ=="
},
{
"d": "AFUVAF3Xd3X/93//VXVVV9VX9V91f1/31X93XVXdVdVV9dVV/VVX1X9X/131VfXVVXV3V1VdVV1V1/1dV1X/3VUAVf3/3/9fVf3/3/9fVV1V/11VFQBQVQEAEEEQVQBQVQBAVFUVAFVUVQUAEAAUBFBVFVFVAEBVBQBUVRUAVVFVBRAAAVBVAVVQVQBVBQBAVUVUAQBUUQEAVQVVUVVUAVRVUVUFVUVBVVRBFRRQUVVQUVUBEFRRVQVVBQBRVRQBVFVRVUFVBVVFVVRVUVVUVQRUBQRQVUFVBVVFVVBVBVVQVRVUAVRVUVUFVVFVRVUFRFVRAEBVFQBAVVEAVFUAQFVQVRFRVQEAQAAEVQEAAQBUVUVVAQQAQVVQBVRVAVRVRUFVUVVRVaoAVQFVBVRVBVUFVQVVEABQVUUBAFVRVRUAVUFVUVVAFVRVRVUBVRUUVUUAQEQBAFQVABRVAEBVAFUEQFRFVRUAVVBVBVAQUFVFUBFQVQAFVUAABABUUVVUUFUVANd/X3//BUD3XdV1VQAEAFVXVdX9V1VXVQBUVdVdVdV1VX111VXVV9V//1X/X1VdVf9fVV9VdVdV1VX31dfVXXX9193/d1X/VV9VV3VVX//1VfVVXVVdVdVVdVWlVWlVqVaWVf/f/1X/Vf/1X1Xf/19V9VVf9df1X1X1X1XVVWlVfV31VVpVd1V3VapV33/fVZVVlVX1WVWlVelV+v/v//7/31Xv/6/77/tVWaVVVlVdVWaVmlX1/1WpVVZVlVWVVlVW+V9VFVBVAKqaqlWqWlWqVaoKoKpqqapqgapVqaqpqmqqVapqqv+qVqpqVRVAAFBVBVVQVUUVVUFVVFVQVQBQVRVVBQBQVRUAUFWqVkBVFQVQVVFVAUBBVRVVVFVUVQQUVAVRVVBVRVVRVFFVqlVFVQCqWlUAqmqqaqpVqlZVqmpVAV1VUVVUVQVAVQFBVQBVQBVVQVUAVRVUVQFVBQBUVQVQVVFVAEBVFFRVFVBVFUBBUUVVUVVAVRUAAQBUVRVVUFUFAEBVARRVFVAEVUVVFQBAVVRVBQBUAFRVAAVEVUVVFQBEFQRVBVBVEFRVUFUVAEARVFUVUQAQVQEFEABVFQBBVRVEFVUABVVUVQEAQFUVABRAVRVVAUABVQUAQFBVAEAAEFUFAAUABEFVAUBFEAAQVVARVRVUVVBVBUBVRFVUFQBQVQBUVQBAVRVVFUBVqlRVWlWqVapaVapWVaqpqmmqalVlVWpZVapVqlVBAFUAUABAVRVQVRUAQAEAVQVQVQVUVQBAFQBUVVFVVFUVAAEAVQBAABQAEARAVUVVAFUAQFUAQFVWVZVV/39V/1//X1X/76uq6v9XVWpVqlWqVlVaVapaVapWVamqmqqmqlWqapWqVapWqmqmqpaqWlWVaqpVZVVpVVZVlapVqlpVVmqpVapVlVZVqlZVqlVWVapqqpqqVapWqlZVqpqqWlWlqlWqVlWqVlVRVQD/Xw==",
"r": "CBcBCAEBAQEBAQEBAQECAQEBAQEBAQEBAQEBAQMBAQECAQEBAQEBAQEBAQEBBAEBGAEDAQwBAwEIAQEBAQEBAQgcCAEDAQEBAQEDAQEBDQEDEAELAQEBEQEKAQEBDgEBAgIBAQoBBQQBCAEBAQEBAQEHAQEHBgEWAQIBDQECAgEFAQECAgEKAQ0BAQIKAQ0BDQEBAQEBAQEBAgEHAQ4BAQEBAQQBBgEBDgEBAQEBAQcBAQIBAQEBBAEFAQEBDgEBAQEBAQECAQcBDwECAQwCDQEBAQEBAQECAQgBAQEEAQcBDQEBAQEBAQQBBwERAQEBARYBAQECAQEBGAECAQIBARIBBgEBDQECAQEBAQECAQgBAQEZAQEBAgYBAQEDAQECAQEBAQMBCBgIBwEMAQEGAQcBBwEQAQEBAQEBAgIBCgEBDQEIAQ0BAQEBAQEBBgEBDgEBAQEBAQEBAgEMBwEMAQwBAQEBCQECAwEHAQEBAQ0BAQEBDgIBBgEDAQEBAQEBAQMBAQEBAgEBAQEBAQEBCAEBAgEBAQEBAQkBCAgBAwECAQEBAgEBAQkBAQEBAwECAQMBAQIBBwEFAQEDAQYBAQEBAgEBAQEBAQEBAQECAgEDAQECBAIDAgIBBQEEAQEBAwEPAQEBCyIBCAEJAwQBAQIBAQEBAgECAQEBAQMBAQEBAwEBAQEBAQEBAQgBAQMDAgEBAwEEAQIBAQEBBAEBAQEBAQECAQEBAQEBAQEBAQEHAQQBAwEBAQcBAgUBBgECAQYBAQwBAQEUAQELCAYBFgMFAQYDAQoBAQMBARQBAQkBAQoBBgEVAwsBCgIPAQ0BGQEBAgEHARQBAwIBBgEBAQUBBgQBAgEJAQEBBQECAQMHAQELAQECCQEQAQECAgECAQsBDAEBAQEBCgEBAQsBAQEECQ4BCAQCAQEECAEEAQEFCAEPAQEEAQEPAQgBFAEBAQEBAQEKAQEJAQ8BEAEBEwEBAQIBCwEBDgENAwEKAQEBAQELAQEBAQECAQwBCAEBAQEBDgEDAQwBAQECAQEXAQEBAQEHAgEBBQEIAQEBAQEQAgEBBQEUAQEBAQEbAQEBAQEGARQBAQEBARkBAQEBCQEBAQEQAQIBDwEBARQBAQEBBwEBAQkBAQEBAQECAQEBCwECAQEVAQEBAQQBBQEBAQEOAQEBAQEBEgEBFgEBAgEMAQEBAQ8BAQMBFgEBDgEBBQEPAQETAQECAQMOAgUBCgIBGQEBAQEIAQMBBwEBAwECEwgBAQcLAQUBFwEBAQEDAQEBBwEBBAEBDg0BAQwBAQEDAQQBAQEDBAEBBAEBAQEBEAEPAQgBAQsBAQ4BEQEMAgEBBwEOAQEHAQEBAQQBBAEDCwECAQEBAwEBBggBAgEBAREBBQMKAQEBAwQCEQEBHgEPAQIBAQYEAQYBAwEUAQUMAQEBAQEBAQECAQEBAgEIAwEBBgsBAgEODAMBAgEBCwEBAQEBAwECAQECAQEBBwgPAQ=="
}
]
}

39
console/_rle.ts Normal file
View File

@ -0,0 +1,39 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import { assert } from "../_util/asserts.ts";
export function runLengthEncode(arr: number[]) {
const data: number[] = [];
const runLengths: number[] = [];
let prev: symbol | number = Symbol("none");
for (const x of arr) {
if (x === prev) {
++runLengths[runLengths.length - 1];
} else {
prev = x;
data.push(x);
runLengths.push(1);
}
}
assert(runLengths.every((r) => r < 0x100));
return {
d: btoa(String.fromCharCode(...data)),
r: btoa(String.fromCharCode(...runLengths)),
};
}
export function runLengthDecode({ d, r }: { d: string; r: string }) {
const data = atob(d);
const runLengths = atob(r);
let out = "";
for (const [i, ch] of [...runLengths].entries()) {
out += data[i].repeat(ch.codePointAt(0)!);
}
return Uint8Array.from([...out].map((x) => x.codePointAt(0)!));
}

View File

@ -0,0 +1,62 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// Run this test with `deno test --unstable -A compare_with_rust.ts`
import { unicodeWidth } from "../unicode_width.ts";
import { fromFileUrl } from "../../path/mod.ts";
import fc from "https://esm.sh/fast-check@3.8.0";
// Note: This test is optional. It requires the Rust code to be compiled locally
Deno.test("fast-check equality with unicode_width Rust crate", async (t) => {
const libName = ({
darwin: "libunicode_width_crate.dylib",
linux: "libunicode_width_crate.so",
windows: "libunicode_width_crate.dll",
// deno-lint-ignore no-explicit-any
} as any)[Deno.build.os];
const libPath = fromFileUrl(
import.meta.resolve(
`../testdata/unicode_width_crate/target/debug/${libName}`,
),
);
const toCString = (str: string) => new TextEncoder().encode(str + "\0");
// @ts-ignore type-check errors if unavailable due to lack of --unstable flag
let dylib: Deno.DynamicLibrary<{
unicode_width: { parameters: ["buffer"]; result: "usize" };
}>;
try {
dylib = Deno.dlopen(libPath, {
unicode_width: { parameters: ["buffer"], result: "usize" },
});
for (
const arbitrary of [
"string",
"unicodeString",
"fullUnicodeString",
] as const
) {
await t.step({
name: `fc.${arbitrary}()`,
fn() {
// To avoid esm.sh statically analyzed
fc.assert(
fc.property(
fc[arbitrary](),
// JSON stringify to allow "\0" chars to cross FFI boundary in a null-terminated string
// deno-lint-ignore no-explicit-any
(str: any) =>
unicodeWidth(str) ===
dylib.symbols.unicode_width(toCString(JSON.stringify(str))),
),
);
},
});
}
} finally {
// deno-lint-ignore no-extra-non-null-assertion
dylib!?.close();
}
});

327
console/_tools/generate_data.ts Executable file
View File

@ -0,0 +1,327 @@
#!/usr/bin/env -S deno run --allow-net --allow-read --allow-write
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// Ported from unicode_width rust crate, Copyright (c) 2015 The Rust Project Developers. MIT license.
import { assert } from "../../_util/asserts.ts";
import { runLengthEncode } from "../_rle.ts";
// change this line and re-run the script to update for new Unicode versions
const UNICODE_VERSION = "15.0.0";
const NUM_CODEPOINTS = 0x110000;
const MAX_CODEPOINT_BITS = Math.ceil(Math.log2(NUM_CODEPOINTS - 1));
enum OffsetType {
U2 = 2,
U4 = 4,
U8 = 8,
}
type CodePoint = number;
type BitPos = number;
const TABLE_CFGS: [BitPos, BitPos, OffsetType][] = [
[13, MAX_CODEPOINT_BITS, OffsetType.U8],
[6, 13, OffsetType.U8],
[0, 6, OffsetType.U2],
];
async function fetchUnicodeData(filename: string, version: string) {
const res = await fetch(
`https://www.unicode.org/Public/${version}/ucd/${filename}`,
);
if (!res.ok) {
throw new Error(`Failed to fetch ${filename}`);
}
return await res.text();
}
enum EffectiveWidth {
Zero = 0,
Narrow = 1,
Wide = 2,
Ambiguous = 3,
}
const widthCodes = {
N: EffectiveWidth.Narrow,
Na: EffectiveWidth.Narrow,
H: EffectiveWidth.Narrow,
W: EffectiveWidth.Wide,
F: EffectiveWidth.Wide,
A: EffectiveWidth.Ambiguous,
};
async function loadEastAsianWidths(version: string) {
const eaw = await fetchUnicodeData("EastAsianWidth.txt", version);
const single = /^([0-9A-F]+);(\w+)/;
const multiple = /^([0-9A-F]+)\.\.([0-9A-F]+);(\w+)/;
const widthMap: EffectiveWidth[] = [];
let current = 0;
for (const line of eaw.split("\n")) {
let rawData: [string, string, string] | null = null;
let match: RegExpMatchArray | null = null;
// deno-lint-ignore no-cond-assign
if (match = line.match(single)) {
rawData = [match[1], match[1], match[2]];
// deno-lint-ignore no-cond-assign
} else if (match = line.match(multiple)) {
rawData = [match[1], match[2], match[3]];
} else {
continue;
}
const low = parseInt(rawData[0], 16);
const high = parseInt(rawData[1], 16);
const width = widthCodes[rawData[2] as keyof typeof widthCodes];
assert(current <= high);
while (current <= high) {
widthMap.push(current < low ? EffectiveWidth.Narrow : width);
++current;
}
}
while (widthMap.length < NUM_CODEPOINTS) {
widthMap.push(EffectiveWidth.Narrow);
}
return widthMap;
}
async function loadZeroWidths(version: string) {
const categories = await fetchUnicodeData("UnicodeData.txt", version);
const zwMap: boolean[] = [];
let current = 0;
for (const line of categories.split("\n")) {
const rawData = line.split(";");
if (rawData.length !== 15) {
continue;
}
const [codepoint, name, catCode] = [
parseInt(rawData[0], 16),
rawData[1],
rawData[2],
];
const zeroWidth = ["Cc", "Cf", "Mn", "Me"].includes(catCode);
assert(current <= codepoint);
while (current <= codepoint) {
if (name.endsWith(", Last>") || (current === codepoint)) {
zwMap.push(zeroWidth);
} else {
zwMap.push(false);
}
++current;
}
}
while (zwMap.length < NUM_CODEPOINTS) {
zwMap.push(false);
}
return zwMap;
}
class Bucket {
entrySet: Set<string>;
widths: EffectiveWidth[];
constructor() {
this.entrySet = new Set();
this.widths = [];
}
append(codepoint: CodePoint, width: EffectiveWidth) {
this.entrySet.add(JSON.stringify([codepoint, width]));
this.widths.push(width);
}
tryExtend(attempt: Bucket) {
const [less, more] = [this.widths, attempt.widths].sort((a, b) =>
a.length - b.length
);
if (!more.slice(0, less.length).every((v, i) => v === less[i])) {
return false;
}
for (const x of attempt.entrySet.values()) {
this.entrySet.add(x);
}
this.widths = more;
return true;
}
entries() {
const result = [...this.entrySet]
.map((x) => JSON.parse(x) as [CodePoint, EffectiveWidth]);
return result.sort((a, b) => a[0] - b[0]);
}
width() {
return new Set(this.widths).size === 1 ? this.widths[0] : null;
}
}
function makeBuckets(
entries: [CodePoint, EffectiveWidth][],
lowBit: BitPos,
capBit: BitPos,
) {
const numBits = capBit - lowBit;
assert(numBits > 0);
const buckets = Array.from({ length: 2 ** numBits }, () => new Bucket());
const mask = (1 << numBits) - 1;
for (const [codepoint, width] of entries) {
buckets[(codepoint >> lowBit) & mask].append(codepoint, width);
}
return buckets;
}
class Table {
lowBit: BitPos;
capBit: BitPos;
offsetType: OffsetType;
entries: number[];
indexed: Bucket[];
constructor(
entryGroups: [CodePoint, EffectiveWidth][][],
lowBit: BitPos,
capBit: BitPos,
offsetType: OffsetType,
) {
this.lowBit = lowBit;
this.capBit = capBit;
this.offsetType = offsetType;
this.entries = [];
this.indexed = [];
const buckets = entryGroups.flatMap((entries) =>
makeBuckets(entries, this.lowBit, this.capBit)
);
for (const bucket of buckets) {
let extended = false;
for (const [i, existing] of this.indexed.entries()) {
if (existing.tryExtend(bucket)) {
this.entries.push(i);
extended = true;
break;
}
}
if (!extended) {
this.entries.push(this.indexed.length);
this.indexed.push(bucket);
}
}
for (const index of this.entries) {
assert(index < (1 << this.offsetType));
}
}
indicesToWidths() {
if (!this.indexed) {
throw new Error(`Can't call indicesToWidths twice on the same Table`);
}
this.entries = this.entries.map((i) => {
const width = this.indexed[i].width();
if (width == null) throw new TypeError("width cannot be null");
return width;
});
this.indexed = null as unknown as Bucket[];
}
get buckets() {
if (!this.indexed) {
throw new Error(`Can't access buckets after calling indicesToWidths`);
}
return this.indexed;
}
toBytes() {
const entriesPerByte = Math.trunc(8 / this.offsetType);
const byteArray: number[] = [];
for (let i = 0; i < this.entries.length; i += entriesPerByte) {
let byte = 0;
for (let j = 0; j < entriesPerByte; ++j) {
byte |= this.entries[i + j] << (j * this.offsetType);
}
byteArray.push(byte);
}
return byteArray;
}
}
function makeTables(
tableCfgs: [BitPos, BitPos, OffsetType][],
entries: [CodePoint, EffectiveWidth][],
) {
const tables: Table[] = [];
let entryGroups = [entries];
for (const [lowBit, capBit, offsetType] of tableCfgs) {
const table = new Table(entryGroups, lowBit, capBit, offsetType);
entryGroups = table.buckets.map((bucket) => bucket.entries());
tables.push(table);
}
return tables;
}
export async function tables(version: string) {
console.info(`Generating tables for Unicode ${version}`);
const eawMap = await loadEastAsianWidths(version);
const zwMap = await loadZeroWidths(version);
const widthMap = eawMap.map((x, i) => zwMap[i] ? EffectiveWidth.Zero : x);
widthMap[0x00AD] = EffectiveWidth.Narrow;
for (let i = 0x1160; i < 0x11FF + 1; ++i) {
widthMap[i] = EffectiveWidth.Zero;
}
const tables = makeTables(TABLE_CFGS, [...widthMap.entries()]);
tables[tables.length - 1].indicesToWidths();
return tables;
}
const data = {
UNICODE_VERSION,
tables: (await tables(UNICODE_VERSION)).map((table) =>
runLengthEncode(table.toBytes())
),
};
assert(data.UNICODE_VERSION.split(".").length === 3);
assert(data.tables.length === 3);
await Deno.writeTextFile("../_data.json", JSON.stringify(data, null, 2) + "\n");

9
console/mod.ts Normal file
View File

@ -0,0 +1,9 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
/**
* Functions for console-related tasks such as TTY text layout
*
* @module
*/
export * from "./unicode_width.ts";

53
console/testdata/unicode_width_crate/Cargo.lock generated vendored Normal file
View File

@ -0,0 +1,53 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "itoa"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
[[package]]
name = "libc"
version = "0.2.140"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
[[package]]
name = "ryu"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
[[package]]
name = "serde"
version = "1.0.159"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065"
[[package]]
name = "serde_json"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "unicode_width_crate"
version = "0.1.0"
dependencies = [
"libc",
"serde_json",
"unicode-width",
]

View File

@ -0,0 +1,12 @@
[package]
name = "unicode_width_crate"
version = "0.1.0"
edition = "2021"
[dependencies]
libc = "0.2.140"
serde_json = "1.0.95"
unicode-width = "0.1.10"
[lib]
crate-type = ["cdylib"]

View File

@ -0,0 +1,22 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
use libc::c_char;
use std::ffi::CStr;
use serde_json;
use unicode_width::UnicodeWidthStr;
fn from_c_char(ptr: *const c_char) -> &'static str {
let c_str = unsafe {
assert!(!ptr.is_null());
CStr::from_ptr(ptr)
};
return c_str.to_str().unwrap();
}
#[no_mangle]
pub extern "C" fn unicode_width(json_str: *const c_char) -> usize {
let json_str = from_c_char(json_str);
serde_json::from_str::<String>(json_str).unwrap().width()
}

62
console/unicode_width.ts Normal file
View File

@ -0,0 +1,62 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// Ported from unicode_width rust crate, Copyright (c) 2015 The Rust Project Developers. MIT license.
import data from "./_data.json" assert { type: "json" };
import { runLengthDecode } from "./_rle.ts";
let tables: Uint8Array[] | null = null;
function lookupWidth(cp: number) {
if (!tables) tables = data.tables.map(runLengthDecode);
const t1Offset = tables[0][(cp >> 13) & 0xff];
const t2Offset = tables[1][128 * t1Offset + ((cp >> 6) & 0x7f)];
const packedWidths = tables[2][16 * t2Offset + ((cp >> 2) & 0xf)];
const width = (packedWidths >> (2 * (cp & 0b11))) & 0b11;
return width === 3 ? 1 : width;
}
const cache = new Map<string, number | null>();
function charWidth(ch: string) {
if (cache.has(ch)) return cache.get(ch)!;
const cp = ch.codePointAt(0)!;
let v: number | null = null;
if (cp < 0x7f) {
v = cp >= 0x20 ? 1 : cp === 0 ? 0 : null;
} else if (cp >= 0xa0) {
v = lookupWidth(cp);
} else {
v = null;
}
cache.set(ch, v);
return v;
}
/**
* Get the width of a string's constituent characters in columns in TTY-like
* environments.
*
* Combine with `stripColor` from `fmt/colors.ts` to get the expected physical
* width of a string in the console.
*
* @example
* ```ts
* import { unicodeWidth } from "https://deno.land/std@$STD_VERSION/console/unicode_width.ts";
* import { assertEquals } from "https://deno.land/std@$STD_VERSION/testing/asserts.ts";
* import { stripColor } from "https://deno.land/std@$STD_VERSION/fmt/colors.ts";
*
* assertEquals(unicodeWidth("hello world"), 11);
* assertEquals(unicodeWidth("天地玄黃宇宙洪荒"), 16);
* assertEquals(unicodeWidth(""), 18);
* assertEquals(unicodeWidth(stripColor("\x1b[36mголубой\x1b[39m")), 7);
* assertEquals(unicodeWidth(stripColor("\x1b[31m紅色\x1b[39m")), 4);
* assertEquals(unicodeWidth(stripColor("\x1B]8;;https://deno.land\x07🦕\x1B]8;;\x07")), 2);
* ```
*/
export function unicodeWidth(str: string) {
return [...str].map((ch) => charWidth(ch) ?? 0).reduce((a, b) => a + b, 0);
}

View File

@ -0,0 +1,48 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
import { unicodeWidth } from "./unicode_width.ts";
import { assertEquals } from "../testing/asserts.ts";
Deno.test("unicodeWidth", async (t) => {
await t.step("ASCII", () => {
const lorem =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
assertEquals(unicodeWidth(lorem), lorem.length);
});
await t.step("CJK", () => {
const qianZiWen =
"天地玄黃宇宙洪荒日月盈昃辰宿列張寒來暑往秋收冬藏閏餘成歲律呂調陽雲騰致雨露結爲霜金生麗水玉出崑岡劍號巨闕珠稱夜光果珍李柰菜重芥薑海鹹河淡鱗潛羽翔龍師火帝鳥官人皇始制文字乃服衣裳推位讓國有虞陶唐弔民伐罪周發殷湯坐朝問道垂拱平章愛育黎首臣伏戎羌遐邇壹體率賓歸王鳴鳳在樹白駒食場化被草木賴及萬方蓋此身髮四大五常恭惟鞠養豈敢毀傷女慕貞絜男效才良知過必改得能莫忘罔談彼短靡恃己長信使可覆器欲難量墨悲絲淬詩讚羔羊";
assertEquals(unicodeWidth(qianZiWen), qianZiWen.length * 2);
});
await t.step("Unicode normalization", async (t) => {
const str = "á";
await t.step("NFC", () => {
const nfc = str.normalize("NFC");
assertEquals(nfc.length, 1);
assertEquals(unicodeWidth(nfc), 1);
});
await t.step("NFD", () => {
const nfd = str.normalize("NFD");
assertEquals(nfd.length, 2);
assertEquals(unicodeWidth(nfd), 1);
});
});
await t.step("Emoji", () => {
assertEquals(unicodeWidth("👩"), 2); // Woman
assertEquals(unicodeWidth("🔬"), 2); // Microscope
// Note: Returns 4 for the below case, following the upstream crate
// `unicode_width`. Another possibility might be returning 2, which is what
// `npm:string-width` returns.
// See discussion at https://github.com/denoland/deno_std/pull/3297#discussion_r1166289430
assertEquals(unicodeWidth("👩‍🔬"), 4); // Woman Scientist
});
});