From 74900851387047415b447f73d26471ebc343de5f Mon Sep 17 00:00:00 2001 From: Efe Date: Thu, 14 Nov 2024 07:51:46 +0100 Subject: [PATCH] feat(csv/unstable): infer column names from object arrays for stringify() (#6122) --- _tools/check_docs.ts | 1 + csv/deno.json | 3 +- csv/stringify_test.ts | 13 -- csv/unstable_stringify.ts | 363 +++++++++++++++++++++++++++++++++ csv/unstable_stringify_test.ts | 31 +++ 5 files changed, 397 insertions(+), 14 deletions(-) create mode 100644 csv/unstable_stringify.ts create mode 100644 csv/unstable_stringify_test.ts diff --git a/_tools/check_docs.ts b/_tools/check_docs.ts index 10bff1b71..54c553f11 100644 --- a/_tools/check_docs.ts +++ b/_tools/check_docs.ts @@ -42,6 +42,7 @@ const ENTRY_POINTS = [ "../crypto/mod.ts", "../collections/mod.ts", "../csv/mod.ts", + "../csv/unstable_stringify.ts", "../data_structures/mod.ts", "../data_structures/unstable_bidirectional_map.ts", "../datetime/mod.ts", diff --git a/csv/deno.json b/csv/deno.json index 4f3cda35d..fe5f3b719 100644 --- a/csv/deno.json +++ b/csv/deno.json @@ -6,6 +6,7 @@ "./parse": "./parse.ts", "./parse-stream": "./parse_stream.ts", "./stringify": "./stringify.ts", - "./stringify-stream": "./stringify_stream.ts" + "./stringify-stream": "./stringify_stream.ts", + "./unstable-stringify": "./unstable_stringify.ts" } } diff --git a/csv/stringify_test.ts b/csv/stringify_test.ts index 08d525b1b..1a37f2fd1 100644 --- a/csv/stringify_test.ts +++ b/csv/stringify_test.ts @@ -86,19 +86,6 @@ Deno.test({ }, }, ); - await t.step( - { - name: "Invalid data, no columns", - fn() { - const data = [{ a: 1 }, { a: 2 }]; - assertThrows( - () => stringify(data), - TypeError, - "No property accessor function was provided for object", - ); - }, - }, - ); await t.step( { name: "No data, no columns", diff --git a/csv/unstable_stringify.ts b/csv/unstable_stringify.ts new file mode 100644 index 000000000..d77c950c1 --- /dev/null +++ b/csv/unstable_stringify.ts @@ -0,0 +1,363 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +// This module is browser compatible. + +import { stringify as stableStringify } from "./stringify.ts"; + +/** Array index or record key corresponding to a value for a data object. */ +export type PropertyAccessor = number | string; + +/** + * Column information. + * + * @param header Explicit column header name. If omitted, + * the (final) property accessor is used for this value. + * + * @param prop Property accessor(s) used to access the value on the object + */ +export type ColumnDetails = { + header?: string; + prop: PropertyAccessor | PropertyAccessor[]; +}; + +/** + * The most essential aspect of a column is accessing the property holding the + * data for that column on each object in the data array. If that member is at + * the top level, `Column` can simply be a property accessor, which is either a + * `string` (if it's a plain object) or a `number` (if it's an array). + * + * ```ts + * const columns = [ + * "name", + * ]; + * ``` + * + * Each property accessor will be used as the header for the column: + * + * | name | + * | :--: | + * | Deno | + * + * - If the required data is not at the top level (it's nested in other + * objects/arrays), then a simple property accessor won't work, so an array of + * them will be required. + * + * ```ts + * const columns = [ + * ["repo", "name"], + * ["repo", "org"], + * ]; + * ``` + * + * When using arrays of property accessors, the header names inherit the value + * of the last accessor in each array: + * + * | name | org | + * | :--: | :------: | + * | deno | denoland | + * + * - If a different column header is desired, then a `ColumnDetails` object type + * can be used for each column: + * + * - **`header?: string`** is the optional value to use for the column header + * name + * + * - **`prop: PropertyAccessor | PropertyAccessor[]`** is the property accessor + * (`string` or `number`) or array of property accessors used to access the + * data on each object + * + * ```ts + * const columns = [ + * "name", + * { + * prop: ["runsOn", 0], + * header: "language 1", + * }, + * { + * prop: ["runsOn", 1], + * header: "language 2", + * }, + * ]; + * ``` + * + * | name | language 1 | language 2 | + * | :--: | :--------: | :--------: | + * | Deno | Rust | TypeScript | + */ +export type Column = ColumnDetails | PropertyAccessor | PropertyAccessor[]; + +/** An object (plain or array) */ +export type DataItem = Readonly | unknown[]>; + +/** Options for {@linkcode stringify}. */ +export type StringifyOptions = { + /** Whether to include the row of headers or not. + * + * @default {true} + */ + headers?: boolean; + /** + * Delimiter used to separate values. Examples: + * - `","` _comma_ + * - `"\t"` _tab_ + * - `"|"` _pipe_ + * - etc. + * + * @default {","} + */ + separator?: string; + /** + * A list of instructions for how to target and transform the data for each + * column of output. This is also where you can provide an explicit header + * name for the column. + * + * @default {undefined} + */ + columns?: readonly Column[] | undefined; + /** + * Whether to add a + * {@link https://en.wikipedia.org/wiki/Byte_order_mark | byte-order mark} to the + * beginning of the file content. Required by software such as MS Excel to + * properly display Unicode text. + * + * @default {false} + */ + bom?: boolean; +}; + +/** + * Converts an array of objects into a CSV string. + * + * @example Default options + * ```ts + * import { stringify } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [ + * ["Rick", 70], + * ["Morty", 14], + * ]; + * + * assertEquals(stringify(data), `Rick,70\r\nMorty,14\r\n`); + * ``` + * + * @example Give an array of objects and specify columns + * ```ts + * import { stringify } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [ + * { name: "Rick", age: 70 }, + * { name: "Morty", age: 14 }, + * ]; + * + * const columns = ["name", "age"]; + * + * assertEquals(stringify(data, { columns }), `name,age\r\nRick,70\r\nMorty,14\r\n`); + * ``` + * + * @example Give an array of objects without specifying columns + * ```ts + * import { stringify } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [ + * { name: "Rick", age: 70 }, + * { name: "Morty", age: 14 }, + * ]; + * + * assertEquals(stringify(data), `name,age\r\nRick,70\r\nMorty,14\r\n`); + * ``` + * + * @example Give an array of objects and specify columns with `headers: false` + * ```ts + * import { stringify } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [ + * { name: "Rick", age: 70 }, + * { name: "Morty", age: 14 }, + * ]; + * + * const columns = ["name", "age"]; + * + * assertEquals( + * stringify(data, { columns, headers: false }), + * `Rick,70\r\nMorty,14\r\n`, + * ); + * ``` + * + * @example Give an array of objects and specify columns with renaming + * ```ts + * import { stringify } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [ + * { name: "Rick", age: 70 }, + * { name: "Morty", age: 14 }, + * ]; + * + * const columns = [ + * { prop: "name", header: "user name" }, + * "age", + * ]; + * + * assertEquals( + * stringify(data, { columns }), + * `user name,age\r\nRick,70\r\nMorty,14\r\n`, + * ); + * ``` + * + * @example Give an array of objects with nested property and specify columns + * ```ts + * import { + * Column, + * stringify, + * } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [ + * { + * age: 70, + * name: { + * first: "Rick", + * last: "Sanchez", + * }, + * }, + * { + * age: 14, + * name: { + * first: "Morty", + * last: "Smith", + * }, + * }, + * ]; + * + * const columns: Column[] = [ + * ["name", "first"], + * "age", + * ]; + * + * assertEquals( + * stringify(data, { columns }), + * `first,age\r\nRick,70\r\nMorty,14\r\n`, + * ); + * ``` + * + * @example Give an array of objects with nested property and specify columns + * with renaming + * ```ts + * import { + * Column, + * stringify, + * } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [ + * { + * age: 70, + * name: { + * first: "Rick", + * last: "Sanchez", + * }, + * }, + * { + * age: 14, + * name: { + * first: "Morty", + * last: "Smith", + * }, + * }, + * ]; + * + * const columns: Column[] = [ + * { prop: ["name", "first"], header: "first name" }, + * "age", + * ]; + * + * assertEquals( + * stringify(data, { columns }), + * `first name,age\r\nRick,70\r\nMorty,14\r\n`, + * ); + * ``` + * + * @example Give an array of string arrays and specify columns with renaming + * ```ts + * import { stringify } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [ + * ["Rick", 70], + * ["Morty", 14], + * ]; + * + * const columns = [ + * { prop: 0, header: "name" }, + * { prop: 1, header: "age" }, + * ]; + * + * assertEquals( + * stringify(data, { columns }), + * `name,age\r\nRick,70\r\nMorty,14\r\n`, + * ); + * ``` + * + * @example Emit TSV (tab-separated values) with `separator: "\t"` + * ```ts + * import { stringify } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [ + * ["Rick", 70], + * ["Morty", 14], + * ]; + * + * assertEquals(stringify(data, { separator: "\t" }), `Rick\t70\r\nMorty\t14\r\n`); + * ``` + * + * @example Prepend a byte-order mark with `bom: true` + * ```ts + * import { stringify } from "@std/csv/unstable-stringify"; + * import { assertEquals } from "@std/assert/equals"; + * + * const data = [["Rick", 70]]; + * + * assertEquals(stringify(data, { bom: true }), "\ufeffRick,70\r\n"); + * ``` + * + * @param data The source data to stringify. It's an array of items which are + * plain objects or arrays. + * @param options Options for the stringification. + * @returns A CSV string. + */ +export function stringify( + data: readonly DataItem[], + options?: StringifyOptions, +): string { + let { columns } = options ?? {}; + + if (columns && !Array.isArray(columns)) { + throw new TypeError( + "Cannot stringify data as the columns option is invalid: columns must be an array or undefined", + ); + } + + columns ??= inferColumns(data); + + return stableStringify(data, { ...options, columns }); +} + +/** + * Infers the columns from the first object element of the given array. + */ +function inferColumns(data: readonly DataItem[]): string[] { + const firstElement = data.at(0); + if ( + firstElement && + typeof firstElement === "object" && + !Array.isArray(firstElement) + ) { + return Object.keys(firstElement); + } + + return []; +} diff --git a/csv/unstable_stringify_test.ts b/csv/unstable_stringify_test.ts new file mode 100644 index 000000000..d38f85f97 --- /dev/null +++ b/csv/unstable_stringify_test.ts @@ -0,0 +1,31 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +import { assertEquals } from "@std/assert/equals"; +import { stringify } from "./unstable_stringify.ts"; + +const CRLF = "\r\n"; + +Deno.test("(unstable) stringify", async (t) => { + await t.step( + { + name: + "Object array with no columns, should infer columns from the first array element", + fn() { + const data = [{ a: 1 }, { a: 2 }, { b: 3 }]; + const output = `a${CRLF}1${CRLF}2${CRLF}${CRLF}`; + assertEquals(stringify(data), output); + }, + }, + ); + await t.step( + { + name: "Object array with columns, shouldn't infer columns", + fn() { + const data = [{ a: 1 }, { a: 2 }, { b: 3 }]; + const columns = ["a"]; + const output = `a${CRLF}1${CRLF}2${CRLF}${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); +});