From a5fffc5fb0524a075fa8f6b0d692ebdae7a823a0 Mon Sep 17 00:00:00 2001 From: Asher Gomez Date: Mon, 13 Mar 2023 16:56:25 +1100 Subject: [PATCH] BREAKING(csv): move `encoding/csv` to own top-level folder and towards single-export files (#3228) --- {encoding/csv => csv}/_io.ts | 2 +- csv/mod.ts | 4 + encoding/csv/_parser.ts => csv/parse.ts | 105 +++- encoding/csv_test.ts => csv/parse_test.ts | 546 +------------------- csv/stream.ts | 149 ++++++ {encoding/csv => csv}/stream_test.ts | 25 +- csv/stringify.ts | 312 ++++++++++++ csv/stringify_test.ts | 550 ++++++++++++++++++++ {encoding => csv}/testdata/large.csv | 0 {encoding => csv}/testdata/simple.csv | 0 encoding/csv.ts | 588 +++++++--------------- encoding/csv/stream.ts | 154 +----- 12 files changed, 1323 insertions(+), 1112 deletions(-) rename {encoding/csv => csv}/_io.ts (99%) create mode 100644 csv/mod.ts rename encoding/csv/_parser.ts => csv/parse.ts (76%) rename encoding/csv_test.ts => csv/parse_test.ts (59%) create mode 100644 csv/stream.ts rename {encoding/csv => csv}/stream_test.ts (93%) create mode 100644 csv/stringify.ts create mode 100644 csv/stringify_test.ts rename {encoding => csv}/testdata/large.csv (100%) rename {encoding => csv}/testdata/simple.csv (100%) diff --git a/encoding/csv/_io.ts b/csv/_io.ts similarity index 99% rename from encoding/csv/_io.ts rename to csv/_io.ts index 4296b8634..0248c6796 100644 --- a/encoding/csv/_io.ts +++ b/csv/_io.ts @@ -3,7 +3,7 @@ // Copyright 2011 The Go Authors. All rights reserved. BSD license. // https://github.com/golang/go/blob/master/LICENSE // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. -import { assert } from "../../_util/asserts.ts"; +import { assert } from "../_util/asserts.ts"; export interface ReadOptions { /** Character which separates values. diff --git a/csv/mod.ts b/csv/mod.ts new file mode 100644 index 000000000..f426770ea --- /dev/null +++ b/csv/mod.ts @@ -0,0 +1,4 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +export * from "./stringify.ts"; +export * from "./parse.ts"; +export * from "./stream.ts"; diff --git a/encoding/csv/_parser.ts b/csv/parse.ts similarity index 76% rename from encoding/csv/_parser.ts rename to csv/parse.ts index 69ffea3c8..707d12f62 100644 --- a/encoding/csv/_parser.ts +++ b/csv/parse.ts @@ -1,17 +1,27 @@ // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. -import { assert } from "../../_util/asserts.ts"; import { + convertRowToObject, + ERR_BARE_QUOTE, + ERR_FIELD_COUNT, + ERR_INVALID_DELIM, + ERR_QUOTE, + ParseError, + type ReadOptions, +} from "./_io.ts"; +import { assert } from "../_util/asserts.ts"; + +export { ERR_BARE_QUOTE, ERR_FIELD_COUNT, ERR_INVALID_DELIM, ERR_QUOTE, ParseError, ReadOptions, -} from "./_io.ts"; +}; const BYTE_ORDER_MARK = "\ufeff"; -export class Parser { +class Parser { #input = ""; #cursor = 0; #options: { @@ -275,3 +285,92 @@ export class Parser { return result; } } + +export interface ParseOptions extends ReadOptions { + /** + * If you provide `skipFirstRow: true` and `columns`, the first line will be + * skipped. + * If you provide `skipFirstRow: true` but not `columns`, the first line will + * be skipped and used as header definitions. + */ + skipFirstRow?: boolean; + + /** List of names used for header definition. */ + columns?: string[]; +} + +/** + * Csv parse helper to manipulate data. + * Provides an auto/custom mapper for columns. + * + * @example + * ```ts + * import { parse } from "https://deno.land/std@$STD_VERSION/csv/parse.ts"; + * const string = "a,b,c\nd,e,f"; + * + * console.log( + * await parse(string, { + * skipFirstRow: false, + * }), + * ); + * // output: + * // [["a", "b", "c"], ["d", "e", "f"]] + * ``` + * + * @param input Input to parse. + * @param opt options of the parser. + * @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`. + * If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record[]`. + */ +export function parse( + input: string, +): string[][]; +export function parse( + input: string, + opt: Omit, +): string[][]; +export function parse( + input: string, + opt: Omit & { + columns: string[]; + }, +): Record[]; +export function parse( + input: string, + opt: Omit & { + skipFirstRow: true; + }, +): Record[]; +export function parse( + input: string, + opt: ParseOptions, +): string[][] | Record[]; +export function parse( + input: string, + opt: ParseOptions = { + skipFirstRow: false, + }, +): string[][] | Record[] { + const parser = new Parser(opt); + const r = parser.parse(input); + + if (opt.skipFirstRow || opt.columns) { + let headers: string[] = []; + + if (opt.skipFirstRow) { + const head = r.shift(); + assert(head != null); + headers = head; + } + + if (opt.columns) { + headers = opt.columns; + } + + const firstLineIndex = opt.skipFirstRow ? 1 : 0; + return r.map((row, i) => { + return convertRowToObject(row, headers, firstLineIndex + i); + }); + } + return r; +} diff --git a/encoding/csv_test.ts b/csv/parse_test.ts similarity index 59% rename from encoding/csv_test.ts rename to csv/parse_test.ts index 21b3cc481..1b83b8778 100644 --- a/encoding/csv_test.ts +++ b/csv/parse_test.ts @@ -4,16 +4,10 @@ // https://github.com/golang/go/blob/master/LICENSE // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. -import { - assert, - assertEquals, - assertStringIncludes, - assertThrows, -} from "../testing/asserts.ts"; +import { assert, assertEquals, assertThrows } from "../testing/asserts.ts"; +import { parse, ParseError } from "./parse.ts"; import type { AssertTrue, Has } from "../testing/types.ts"; -import { parse, ParseError, stringify, StringifyError } from "./csv.ts"; -const CRLF = "\r\n"; const BYTE_ORDER_MARK = "\ufeff"; Deno.test({ @@ -823,541 +817,7 @@ Deno.test({ }); Deno.test({ - name: "stringify", - async fn(t) { - await t.step({ - name: "Access array index using string", - fn() { - const columns = ["a"]; - const data = [["foo"], ["bar"]]; - const errorMessage = 'Property accessor is not of type "number"'; - assertThrows( - () => stringify(data, { columns }), - StringifyError, - errorMessage, - ); - }, - }); - await t.step( - { - name: "Double quote in separator", - - fn() { - const columns = [0]; - const data = [["foo"], ["bar"]]; - const errorMessage = [ - "Separator cannot include the following strings:", - ' - U+0022: Quotation mark (")', - " - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)", - ].join("\n"); - const options = { separator: '"', columns }; - assertThrows( - () => stringify(data, options), - StringifyError, - errorMessage, - ); - }, - }, - ); - await t.step( - { - name: "CRLF in separator", - fn() { - const columns = [0]; - const data = [["foo"], ["bar"]]; - const errorMessage = [ - "Separator cannot include the following strings:", - ' - U+0022: Quotation mark (")', - " - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)", - ].join("\n"); - const options = { separator: "\r\n", columns }; - assertThrows( - () => stringify(data, options), - StringifyError, - errorMessage, - ); - }, - }, - ); - - await t.step( - { - name: "Invalid data, no columns", - fn() { - const data = [{ a: 1 }, { a: 2 }]; - assertThrows( - () => stringify(data), - StringifyError, - "No property accessor function was provided for object", - ); - }, - }, - ); - await t.step( - { - name: "Invalid data, no columns", - fn() { - const data = [{ a: 1 }, { a: 2 }]; - assertThrows( - () => stringify(data), - StringifyError, - "No property accessor function was provided for object", - ); - }, - }, - ); - await t.step( - { - name: "No data, no columns", - - fn() { - const columns: string[] = []; - const data: string[][] = []; - const output = CRLF; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "No data, no columns, no headers", - fn() { - const columns: string[] = []; - const data: string[][] = []; - const output = ``; - const options = { headers: false, columns }; - assertEquals(stringify(data, options), output); - }, - }, - ); - await t.step( - { - name: "No data, columns", - fn() { - const columns = ["a"]; - const data: string[][] = []; - const output = `a${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "No data, columns, no headers", - - fn() { - const columns = ["a"]; - const data: string[][] = []; - const output = ``; - const options = { headers: false, columns }; - assertEquals(stringify(data, options), output); - }, - }, - ); - await t.step( - { - name: "Separator: CR", - fn() { - const columns = [0, 1]; - const data = [["foo", "bar"], ["baz", "qux"]]; - const output = `0\r1${CRLF}foo\rbar${CRLF}baz\rqux${CRLF}`; - const options = { separator: "\r", columns }; - assertEquals(stringify(data, options), output); - }, - }, - ); - await t.step( - { - name: "Separator: LF", - - fn() { - const columns = [0, 1]; - const data = [["foo", "bar"], ["baz", "qux"]]; - const output = `0\n1${CRLF}foo\nbar${CRLF}baz\nqux${CRLF}`; - const options = { separator: "\n", columns }; - assertEquals(stringify(data, options), output); - }, - }, - ); - await t.step( - { - name: "Column: number accessor", - fn() { - const columns = [1]; - const data = [{ 1: 1 }, { 1: 2 }]; - const output = `1${CRLF}1${CRLF}2${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Explicit header value, no headers", - - fn() { - const columns = [{ header: "Value", prop: "value" }]; - const data = [{ value: "foo" }, { value: "bar" }]; - const output = `foo${CRLF}bar${CRLF}`; - const options = { headers: false, columns }; - assertEquals(stringify(data, options), output); - }, - }, - ); - await t.step( - { - name: "Column: number accessor,const data = array", - fn() { - const columns = [1]; - const data = [["key", "foo"], ["key", "bar"]]; - const output = `1${CRLF}foo${CRLF}bar${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Column: array number accessor", - - fn() { - const columns = [[1]]; - const data = [{ 1: 1 }, { 1: 2 }]; - const output = `1${CRLF}1${CRLF}2${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Column: array number accessor,const data = array", - fn() { - const columns = [[1]]; - const data = [["key", "foo"], ["key", "bar"]]; - const output = `1${CRLF}foo${CRLF}bar${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Column: array number accessor,const data = array", - - fn() { - const columns = [[1, 1]]; - const data = [["key", ["key", "foo"]], ["key", ["key", "bar"]]]; - const output = `1${CRLF}foo${CRLF}bar${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Column: string accessor", - fn() { - const columns = ["value"]; - const data = [{ value: "foo" }, { value: "bar" }]; - const output = `value${CRLF}foo${CRLF}bar${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Column: array string accessor", - fn() { - const columns = [["value"]]; - const data = [{ value: "foo" }, { value: "bar" }]; - const output = `value${CRLF}foo${CRLF}bar${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Column: array string accessor", - fn() { - const columns = [["msg", "value"]]; - const data = [{ msg: { value: "foo" } }, { msg: { value: "bar" } }]; - const output = `value${CRLF}foo${CRLF}bar${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Explicit header", - fn() { - const columns = [ - { - header: "Value", - prop: ["msg", "value"], - }, - ]; - const data = [{ msg: { value: "foo" } }, { msg: { value: "bar" } }]; - const output = `Value${CRLF}foo${CRLF}bar${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - - await t.step( - { - name: "Targeted value: object", - fn() { - const columns = [0]; - const data = [[{ value: "foo" }], [{ value: "bar" }]]; - const output = - `0${CRLF}"{""value"":""foo""}"${CRLF}"{""value"":""bar""}"${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: arary of objects", - fn() { - const columns = [0]; - const data = [ - [[{ value: "foo" }, { value: "bar" }]], - [[{ value: "baz" }, { value: "qux" }]], - ]; - const output = - `0${CRLF}"[{""value"":""foo""},{""value"":""bar""}]"${CRLF}"[{""value"":""baz""},{""value"":""qux""}]"${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: array", - fn() { - const columns = [0]; - const data = [[["foo", "bar"]], [["baz", "qux"]]]; - const output = - `0${CRLF}"[""foo"",""bar""]"${CRLF}"[""baz"",""qux""]"${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: array, separator: tab", - - fn() { - const columns = [0]; - const data = [[["foo", "bar"]], [["baz", "qux"]]]; - const output = - `0${CRLF}"[""foo"",""bar""]"${CRLF}"[""baz"",""qux""]"${CRLF}`; - const options = { separator: "\t", columns }; - assertEquals(stringify(data, options), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: undefined", - fn() { - const columns = [0]; - const data = [[], []]; - const output = `0${CRLF}${CRLF}${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: null", - fn() { - const columns = [0]; - const data = [[null], [null]]; - const output = `0${CRLF}${CRLF}${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: hex number", - fn() { - const columns = [0]; - const data = [[0xa], [0xb]]; - const output = `0${CRLF}10${CRLF}11${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: BigInt", - fn() { - const columns = [0]; - const data = [[BigInt("1")], [BigInt("2")]]; - const output = `0${CRLF}1${CRLF}2${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: boolean", - fn() { - const columns = [0]; - const data = [[true], [false]]; - const output = `0${CRLF}true${CRLF}false${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: string", - fn() { - const columns = [0]; - const data = [["foo"], ["bar"]]; - const output = `0${CRLF}foo${CRLF}bar${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: symbol", - fn() { - const columns = [0]; - const data = [[Symbol("foo")], [Symbol("bar")]]; - const output = `0${CRLF}Symbol(foo)${CRLF}Symbol(bar)${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Targeted value: function", - fn() { - const columns = [0]; - const data = [[(n: number) => n]]; - const output = `0${CRLF}(n)=>n${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Value with double quote", - fn() { - const columns = [0]; - const data = [['foo"']]; - const output = `0${CRLF}"foo"""${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Value with CRLF", - fn() { - const columns = [0]; - const data = [["foo\r\n"]]; - const output = `0${CRLF}"foo\r\n"${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Value with CR", - fn() { - const columns = [0]; - const data = [["foo\r"]]; - const output = `0${CRLF}foo\r${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Value with LF", - fn() { - const columns = [0]; - const data = [["foo\n"]]; - const output = `0${CRLF}"foo\n"${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Value with comma", - fn() { - const columns = [0]; - const data = [["foo,"]]; - const output = `0${CRLF}"foo,"${CRLF}`; - assertEquals(stringify(data, { columns }), output); - }, - }, - ); - await t.step( - { - name: "Value with comma, tab separator", - fn() { - const columns = [0]; - const data = [["foo,"]]; - const output = `0${CRLF}foo,${CRLF}`; - - const options = { separator: "\t", columns }; - assertEquals(stringify(data, options), output); - }, - }, - ); - await t.step({ - name: "Valid data, no columns", - fn() { - const data = [[1, 2, 3], [4, 5, 6]]; - const output = `${CRLF}1,2,3${CRLF}4,5,6${CRLF}`; - - assertEquals(stringify(data), output); - }, - }); - await t.step( - { - name: "byte-order mark with bom=true", - fn() { - const data = [["abc"]]; - const output = `${BYTE_ORDER_MARK}abc${CRLF}`; - const options = { headers: false, bom: true }; - assertStringIncludes(stringify(data, options), BYTE_ORDER_MARK); - assertEquals(stringify(data, options), output); - }, - }, - ); - await t.step( - { - name: "no byte-order mark with omitted bom option", - fn() { - const data = [["abc"]]; - const output = `abc${CRLF}`; - const options = { headers: false }; - assert(!stringify(data, options).includes(BYTE_ORDER_MARK)); - assertEquals(stringify(data, options), output); - }, - }, - ); - await t.step( - { - name: "no byte-order mark with bom=false", - fn() { - const data = [["abc"]]; - const output = `abc${CRLF}`; - const options = { headers: false, bom: false }; - assert(!stringify(data, options).includes(BYTE_ORDER_MARK)); - assertEquals(stringify(data, options), output); - }, - }, - ); - }, -}); - -Deno.test({ - name: "[encoding/csv] correct typing", + name: "[csv] correct typing", fn() { { const parsed = parse("a\nb"); diff --git a/csv/stream.ts b/csv/stream.ts new file mode 100644 index 000000000..94aaab55a --- /dev/null +++ b/csv/stream.ts @@ -0,0 +1,149 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +import { + convertRowToObject, + defaultReadOptions, + type LineReader, + parseRecord, + type RowType, +} from "../csv/_io.ts"; +import { TextDelimiterStream } from "../streams/text_delimiter_stream.ts"; + +export interface CsvStreamOptions { + separator?: string; + comment?: string; + skipFirstRow?: boolean; + columns?: string[]; +} + +class StreamLineReader implements LineReader { + #reader: ReadableStreamDefaultReader; + #done = false; + constructor(reader: ReadableStreamDefaultReader) { + this.#reader = reader; + } + + async readLine(): Promise { + const { value, done } = await this.#reader.read(); + if (done) { + this.#done = true; + return null; + } else { + // NOTE: Remove trailing CR for compatibility with golang's `encoding/csv` + return stripLastCR(value!); + } + } + + isEOF(): Promise { + return Promise.resolve(this.#done); + } + + cancel() { + this.#reader.cancel(); + } +} + +function stripLastCR(s: string): string { + return s.endsWith("\r") ? s.slice(0, -1) : s; +} + +export class CsvStream + implements TransformStream> { + readonly #readable: ReadableStream< + string[] | Record + >; + readonly #options: CsvStreamOptions; + readonly #lineReader: StreamLineReader; + readonly #lines: TextDelimiterStream; + #lineIndex = 0; + #isFirstRow = true; + + #headers: string[] = []; + + constructor(options: T = defaultReadOptions as T) { + this.#options = { + ...defaultReadOptions, + ...options, + }; + + this.#lines = new TextDelimiterStream("\n"); + this.#lineReader = new StreamLineReader(this.#lines.readable.getReader()); + this.#readable = new ReadableStream({ + pull: (controller) => this.#pull(controller), + cancel: () => this.#lineReader.cancel(), + }); + } + + async #pull( + controller: ReadableStreamDefaultController< + string[] | Record + >, + ): Promise { + const line = await this.#lineReader.readLine(); + if (line === "") { + // Found an empty line + this.#lineIndex++; + return this.#pull(controller); + } + if (line === null) { + // Reached to EOF + controller.close(); + this.#lineReader.cancel(); + return; + } + + const record = await parseRecord( + line, + this.#lineReader, + this.#options, + this.#lineIndex, + ); + if (record === null) { + controller.close(); + this.#lineReader.cancel(); + return; + } + + if (this.#isFirstRow) { + this.#isFirstRow = false; + if (this.#options.skipFirstRow || this.#options.columns) { + this.#headers = []; + + if (this.#options.skipFirstRow) { + const head = record; + this.#headers = head; + } + + if (this.#options.columns) { + this.#headers = this.#options.columns; + } + } + + if (this.#options.skipFirstRow) { + return this.#pull(controller); + } + } + + this.#lineIndex++; + if (record.length > 0) { + if (this.#options.skipFirstRow || this.#options.columns) { + controller.enqueue(convertRowToObject( + record, + this.#headers, + this.#lineIndex, + )); + } else { + controller.enqueue(record); + } + } else { + return this.#pull(controller); + } + } + + get readable() { + return this.#readable as ReadableStream>; + } + + get writable(): WritableStream { + return this.#lines.writable; + } +} diff --git a/encoding/csv/stream_test.ts b/csv/stream_test.ts similarity index 93% rename from encoding/csv/stream_test.ts rename to csv/stream_test.ts index e7885ba04..35bfa496a 100644 --- a/encoding/csv/stream_test.ts +++ b/csv/stream_test.ts @@ -2,23 +2,23 @@ import { CsvStream } from "./stream.ts"; import type { CsvStreamOptions } from "./stream.ts"; import { ERR_QUOTE, ParseError } from "./_io.ts"; -import { readableStreamFromIterable } from "../../streams/readable_stream_from_iterable.ts"; -import { readableStreamFromReader } from "../../streams/readable_stream_from_reader.ts"; +import { readableStreamFromIterable } from "../streams/readable_stream_from_iterable.ts"; +import { readableStreamFromReader } from "../streams/readable_stream_from_reader.ts"; import { assert, assertEquals, assertRejects, assertStringIncludes, -} from "../../testing/asserts.ts"; -import type { AssertTrue, Has } from "../../testing/types.ts"; -import { fromFileUrl, join } from "../../path/mod.ts"; -import { StringReader } from "../../io/string_reader.ts"; +} from "../testing/asserts.ts"; +import type { AssertTrue, Has } from "../testing/types.ts"; +import { fromFileUrl, join } from "../path/mod.ts"; +import { StringReader } from "../io/string_reader.ts"; -const testdataDir = join(fromFileUrl(import.meta.url), "../../testdata"); +const testdataDir = join(fromFileUrl(import.meta.url), "../testdata"); const encoder = new TextEncoder(); Deno.test({ - name: "[encoding/csv/stream] CsvStream should work with Deno.File", + name: "[csv/stream] CsvStream should work with Deno.File", permissions: { read: [testdataDir], }, @@ -40,7 +40,7 @@ Deno.test({ }); Deno.test({ - name: "[encoding/csv/stream] CsvStream with invalid csv", + name: "[csv/stream] CsvStream with invalid csv", fn: async () => { const readable = readableStreamFromIterable([ encoder.encode("id,name\n"), @@ -63,7 +63,7 @@ Deno.test({ }); Deno.test({ - name: "[encoding/csv/stream] CsvStream with various inputs", + name: "[csv/stream] CsvStream with various inputs", permissions: "none", fn: async (t) => { // These test cases were originally ported from Go: @@ -371,8 +371,7 @@ export const MyTextDecoderStream = () => { }; Deno.test({ - name: - "[encoding/csv/stream] cancel CsvStream during iteration does not leak file", + name: "[csv/stream] cancel CsvStream during iteration does not leak file", permissions: { read: [testdataDir] }, // TODO(kt3k): Enable this test on windows. // See https://github.com/denoland/deno_std/issues/3160 @@ -388,7 +387,7 @@ Deno.test({ }); Deno.test({ - name: "[encoding/csv/stream] correct typing", + name: "[csv/stream] correct typing", fn() { { const { readable } = new CsvStream(); diff --git a/csv/stringify.ts b/csv/stringify.ts new file mode 100644 index 000000000..e732fb9ca --- /dev/null +++ b/csv/stringify.ts @@ -0,0 +1,312 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +type PropertyAccessor = number | string; +type ObjectWithStringPropertyKeys = Record; + +/** + * @param header Explicit column header name. If omitted, + * the (final) property accessor is used for this value. + * + * @param prop Property accessor(s) used to access the value on the object + */ +export type ColumnDetails = { + header?: string; + prop: PropertyAccessor | PropertyAccessor[]; +}; + +/** + * The most essential aspect of a column is accessing the property holding the + * data for that column on each object in the data array. If that member is at + * the top level, `Column` can simply be a property accessor, which is either a + * `string` (if it's a plain object) or a `number` (if it's an array). + * + * ```ts + * const columns = [ + * "name", + * ]; + * ``` + * + * Each property accessor will be used as the header for the column: + * + * | name | + * | :--: | + * | Deno | + * + * - If the required data is not at the top level (it's nested in other + * objects/arrays), then a simple property accessor won't work, so an array of + * them will be required. + * + * ```ts + * const columns = [ + * ["repo", "name"], + * ["repo", "org"], + * ]; + * ``` + * + * When using arrays of property accessors, the header names inherit the value + * of the last accessor in each array: + * + * | name | org | + * | :--: | :------: | + * | deno | denoland | + * + * - If a different column header is desired, then a `ColumnDetails` object type + * can be used for each column: + * + * - **`header?: string`** is the optional value to use for the column header + * name + * + * - **`prop: PropertyAccessor | PropertyAccessor[]`** is the property accessor + * (`string` or `number`) or array of property accessors used to access the + * data on each object + * + * ```ts + * const columns = [ + * "name", + * { + * prop: ["runsOn", 0], + * header: "language 1", + * }, + * { + * prop: ["runsOn", 1], + * header: "language 2", + * }, + * ]; + * ``` + * + * | name | language 1 | language 2 | + * | :--: | :--------: | :--------: | + * | Deno | Rust | TypeScript | + */ +export type Column = ColumnDetails | PropertyAccessor | PropertyAccessor[]; + +/** An object (plain or array) */ +export type DataItem = ObjectWithStringPropertyKeys | unknown[]; + +export type StringifyOptions = { + /** Whether to include the row of headers or not. + * + * @default {true} + */ + headers?: boolean; + /** + * Delimiter used to separate values. Examples: + * - `","` _comma_ + * - `"\t"` _tab_ + * - `"|"` _pipe_ + * - etc. + * + * @default {","} + */ + separator?: string; + /** + * a list of instructions for how to target and transform the data for each + * column of output. This is also where you can provide an explicit header + * name for the column. + */ + columns?: Column[]; + /** + * Whether to add a + * [byte-order mark](https://en.wikipedia.org/wiki/Byte_order_mark) to the + * beginning of the file content. Required by software such as MS Excel to + * properly display Unicode text. + * + * @default {false} + */ + bom?: boolean; +}; + +const QUOTE = '"'; +const LF = "\n"; +const CRLF = "\r\n"; +const BYTE_ORDER_MARK = "\ufeff"; + +function getEscapedString(value: unknown, sep: string): string { + if (value === undefined || value === null) return ""; + let str = ""; + + if (typeof value === "object") str = JSON.stringify(value); + else str = String(value); + + // Is regex.test more performant here? If so, how to dynamically create? + // https://stackoverflow.com/questions/3561493/ + if (str.includes(sep) || str.includes(LF) || str.includes(QUOTE)) { + return `${QUOTE}${str.replaceAll(QUOTE, `${QUOTE}${QUOTE}`)}${QUOTE}`; + } + + return str; +} + +type NormalizedColumn = Omit & { + header: string; + prop: PropertyAccessor[]; +}; + +function normalizeColumn(column: Column): NormalizedColumn { + let header: NormalizedColumn["header"], + prop: NormalizedColumn["prop"]; + + if (typeof column === "object") { + if (Array.isArray(column)) { + header = String(column[column.length - 1]); + prop = column; + } else { + prop = Array.isArray(column.prop) ? column.prop : [column.prop]; + header = typeof column.header === "string" + ? column.header + : String(prop[prop.length - 1]); + } + } else { + header = String(column); + prop = [column]; + } + + return { header, prop }; +} + +export class StringifyError extends Error { + override readonly name = "StringifyError"; +} + +/** + * Returns an array of values from an object using the property accessors + * (and optional transform function) in each column + */ +function getValuesFromItem( + item: DataItem, + normalizedColumns: NormalizedColumn[], +): unknown[] { + const values: unknown[] = []; + + if (normalizedColumns.length) { + for (const column of normalizedColumns) { + let value: unknown = item; + + for (const prop of column.prop) { + if (typeof value !== "object" || value === null) continue; + if (Array.isArray(value)) { + if (typeof prop === "number") value = value[prop]; + else { + throw new StringifyError( + 'Property accessor is not of type "number"', + ); + } + } // I think this assertion is safe. Confirm? + else value = (value as ObjectWithStringPropertyKeys)[prop]; + } + + values.push(value); + } + } else { + if (Array.isArray(item)) { + values.push(...item); + } else if (typeof item === "object") { + throw new StringifyError( + "No property accessor function was provided for object", + ); + } else { + values.push(item); + } + } + + return values; +} + +/** + * @param data The source data to stringify. It's an array of items which are + * plain objects or arrays. + * + * `DataItem: Record | unknown[]` + * + * ```ts + * const data = [ + * { + * name: "Deno", + * repo: { org: "denoland", name: "deno" }, + * runsOn: ["Rust", "TypeScript"], + * }, + * ]; + * ``` + * + * @example + * ```ts + * import { + * Column, + * stringify, + * } from "https://deno.land/std@$STD_VERSION/csv/stringify.ts"; + * + * type Character = { + * age: number; + * name: { + * first: string; + * last: string; + * }; + * }; + * + * const data: Character[] = [ + * { + * age: 70, + * name: { + * first: "Rick", + * last: "Sanchez", + * }, + * }, + * { + * age: 14, + * name: { + * first: "Morty", + * last: "Smith", + * }, + * }, + * ]; + * + * let columns: Column[] = [ + * ["name", "first"], + * "age", + * ]; + * + * console.log(stringify(data, { columns })); + * // first,age + * // Rick,70 + * // Morty,14 + * ``` + * + * @param options Output formatting options + */ +export function stringify( + data: DataItem[], + { headers = true, separator: sep = ",", columns = [], bom = false }: + StringifyOptions = {}, +): string { + if (sep.includes(QUOTE) || sep.includes(CRLF)) { + const message = [ + "Separator cannot include the following strings:", + ' - U+0022: Quotation mark (")', + " - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)", + ].join("\n"); + throw new StringifyError(message); + } + + const normalizedColumns = columns.map(normalizeColumn); + let output = ""; + + if (bom) { + output += BYTE_ORDER_MARK; + } + + if (headers) { + output += normalizedColumns + .map((column) => getEscapedString(column.header, sep)) + .join(sep); + output += CRLF; + } + + for (const item of data) { + const values = getValuesFromItem(item, normalizedColumns); + output += values + .map((value) => getEscapedString(value, sep)) + .join(sep); + output += CRLF; + } + + return output; +} diff --git a/csv/stringify_test.ts b/csv/stringify_test.ts new file mode 100644 index 000000000..ccb5f45db --- /dev/null +++ b/csv/stringify_test.ts @@ -0,0 +1,550 @@ +// Test ported from Golang +// https://github.com/golang/go/blob/2cc15b1/src/encoding/csv/reader_test.go +// Copyright 2011 The Go Authors. All rights reserved. BSD license. +// https://github.com/golang/go/blob/master/LICENSE +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +import { + assert, + assertEquals, + assertStringIncludes, + assertThrows, +} from "../testing/asserts.ts"; +import { stringify, StringifyError } from "./stringify.ts"; + +const CRLF = "\r\n"; +const BYTE_ORDER_MARK = "\ufeff"; + +Deno.test({ + name: "stringify", + async fn(t) { + await t.step({ + name: "Access array index using string", + fn() { + const columns = ["a"]; + const data = [["foo"], ["bar"]]; + const errorMessage = 'Property accessor is not of type "number"'; + assertThrows( + () => stringify(data, { columns }), + StringifyError, + errorMessage, + ); + }, + }); + await t.step( + { + name: "Double quote in separator", + + fn() { + const columns = [0]; + const data = [["foo"], ["bar"]]; + const errorMessage = [ + "Separator cannot include the following strings:", + ' - U+0022: Quotation mark (")', + " - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)", + ].join("\n"); + const options = { separator: '"', columns }; + assertThrows( + () => stringify(data, options), + StringifyError, + errorMessage, + ); + }, + }, + ); + await t.step( + { + name: "CRLF in separator", + fn() { + const columns = [0]; + const data = [["foo"], ["bar"]]; + const errorMessage = [ + "Separator cannot include the following strings:", + ' - U+0022: Quotation mark (")', + " - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)", + ].join("\n"); + const options = { separator: "\r\n", columns }; + assertThrows( + () => stringify(data, options), + StringifyError, + errorMessage, + ); + }, + }, + ); + + await t.step( + { + name: "Invalid data, no columns", + fn() { + const data = [{ a: 1 }, { a: 2 }]; + assertThrows( + () => stringify(data), + StringifyError, + "No property accessor function was provided for object", + ); + }, + }, + ); + await t.step( + { + name: "Invalid data, no columns", + fn() { + const data = [{ a: 1 }, { a: 2 }]; + assertThrows( + () => stringify(data), + StringifyError, + "No property accessor function was provided for object", + ); + }, + }, + ); + await t.step( + { + name: "No data, no columns", + + fn() { + const columns: string[] = []; + const data: string[][] = []; + const output = CRLF; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "No data, no columns, no headers", + fn() { + const columns: string[] = []; + const data: string[][] = []; + const output = ``; + const options = { headers: false, columns }; + assertEquals(stringify(data, options), output); + }, + }, + ); + await t.step( + { + name: "No data, columns", + fn() { + const columns = ["a"]; + const data: string[][] = []; + const output = `a${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "No data, columns, no headers", + + fn() { + const columns = ["a"]; + const data: string[][] = []; + const output = ``; + const options = { headers: false, columns }; + assertEquals(stringify(data, options), output); + }, + }, + ); + await t.step( + { + name: "Separator: CR", + fn() { + const columns = [0, 1]; + const data = [["foo", "bar"], ["baz", "qux"]]; + const output = `0\r1${CRLF}foo\rbar${CRLF}baz\rqux${CRLF}`; + const options = { separator: "\r", columns }; + assertEquals(stringify(data, options), output); + }, + }, + ); + await t.step( + { + name: "Separator: LF", + + fn() { + const columns = [0, 1]; + const data = [["foo", "bar"], ["baz", "qux"]]; + const output = `0\n1${CRLF}foo\nbar${CRLF}baz\nqux${CRLF}`; + const options = { separator: "\n", columns }; + assertEquals(stringify(data, options), output); + }, + }, + ); + await t.step( + { + name: "Column: number accessor", + fn() { + const columns = [1]; + const data = [{ 1: 1 }, { 1: 2 }]; + const output = `1${CRLF}1${CRLF}2${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Explicit header value, no headers", + + fn() { + const columns = [{ header: "Value", prop: "value" }]; + const data = [{ value: "foo" }, { value: "bar" }]; + const output = `foo${CRLF}bar${CRLF}`; + const options = { headers: false, columns }; + assertEquals(stringify(data, options), output); + }, + }, + ); + await t.step( + { + name: "Column: number accessor,const data = array", + fn() { + const columns = [1]; + const data = [["key", "foo"], ["key", "bar"]]; + const output = `1${CRLF}foo${CRLF}bar${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Column: array number accessor", + + fn() { + const columns = [[1]]; + const data = [{ 1: 1 }, { 1: 2 }]; + const output = `1${CRLF}1${CRLF}2${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Column: array number accessor,const data = array", + fn() { + const columns = [[1]]; + const data = [["key", "foo"], ["key", "bar"]]; + const output = `1${CRLF}foo${CRLF}bar${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Column: array number accessor,const data = array", + + fn() { + const columns = [[1, 1]]; + const data = [["key", ["key", "foo"]], ["key", ["key", "bar"]]]; + const output = `1${CRLF}foo${CRLF}bar${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Column: string accessor", + fn() { + const columns = ["value"]; + const data = [{ value: "foo" }, { value: "bar" }]; + const output = `value${CRLF}foo${CRLF}bar${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Column: array string accessor", + fn() { + const columns = [["value"]]; + const data = [{ value: "foo" }, { value: "bar" }]; + const output = `value${CRLF}foo${CRLF}bar${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Column: array string accessor", + fn() { + const columns = [["msg", "value"]]; + const data = [{ msg: { value: "foo" } }, { msg: { value: "bar" } }]; + const output = `value${CRLF}foo${CRLF}bar${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Explicit header", + fn() { + const columns = [ + { + header: "Value", + prop: ["msg", "value"], + }, + ]; + const data = [{ msg: { value: "foo" } }, { msg: { value: "bar" } }]; + const output = `Value${CRLF}foo${CRLF}bar${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + + await t.step( + { + name: "Targeted value: object", + fn() { + const columns = [0]; + const data = [[{ value: "foo" }], [{ value: "bar" }]]; + const output = + `0${CRLF}"{""value"":""foo""}"${CRLF}"{""value"":""bar""}"${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: arary of objects", + fn() { + const columns = [0]; + const data = [ + [[{ value: "foo" }, { value: "bar" }]], + [[{ value: "baz" }, { value: "qux" }]], + ]; + const output = + `0${CRLF}"[{""value"":""foo""},{""value"":""bar""}]"${CRLF}"[{""value"":""baz""},{""value"":""qux""}]"${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: array", + fn() { + const columns = [0]; + const data = [[["foo", "bar"]], [["baz", "qux"]]]; + const output = + `0${CRLF}"[""foo"",""bar""]"${CRLF}"[""baz"",""qux""]"${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: array, separator: tab", + + fn() { + const columns = [0]; + const data = [[["foo", "bar"]], [["baz", "qux"]]]; + const output = + `0${CRLF}"[""foo"",""bar""]"${CRLF}"[""baz"",""qux""]"${CRLF}`; + const options = { separator: "\t", columns }; + assertEquals(stringify(data, options), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: undefined", + fn() { + const columns = [0]; + const data = [[], []]; + const output = `0${CRLF}${CRLF}${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: null", + fn() { + const columns = [0]; + const data = [[null], [null]]; + const output = `0${CRLF}${CRLF}${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: hex number", + fn() { + const columns = [0]; + const data = [[0xa], [0xb]]; + const output = `0${CRLF}10${CRLF}11${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: BigInt", + fn() { + const columns = [0]; + const data = [[BigInt("1")], [BigInt("2")]]; + const output = `0${CRLF}1${CRLF}2${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: boolean", + fn() { + const columns = [0]; + const data = [[true], [false]]; + const output = `0${CRLF}true${CRLF}false${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: string", + fn() { + const columns = [0]; + const data = [["foo"], ["bar"]]; + const output = `0${CRLF}foo${CRLF}bar${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: symbol", + fn() { + const columns = [0]; + const data = [[Symbol("foo")], [Symbol("bar")]]; + const output = `0${CRLF}Symbol(foo)${CRLF}Symbol(bar)${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Targeted value: function", + fn() { + const columns = [0]; + const data = [[(n: number) => n]]; + const output = `0${CRLF}(n)=>n${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Value with double quote", + fn() { + const columns = [0]; + const data = [['foo"']]; + const output = `0${CRLF}"foo"""${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Value with CRLF", + fn() { + const columns = [0]; + const data = [["foo\r\n"]]; + const output = `0${CRLF}"foo\r\n"${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Value with CR", + fn() { + const columns = [0]; + const data = [["foo\r"]]; + const output = `0${CRLF}foo\r${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Value with LF", + fn() { + const columns = [0]; + const data = [["foo\n"]]; + const output = `0${CRLF}"foo\n"${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Value with comma", + fn() { + const columns = [0]; + const data = [["foo,"]]; + const output = `0${CRLF}"foo,"${CRLF}`; + assertEquals(stringify(data, { columns }), output); + }, + }, + ); + await t.step( + { + name: "Value with comma, tab separator", + fn() { + const columns = [0]; + const data = [["foo,"]]; + const output = `0${CRLF}foo,${CRLF}`; + + const options = { separator: "\t", columns }; + assertEquals(stringify(data, options), output); + }, + }, + ); + await t.step({ + name: "Valid data, no columns", + fn() { + const data = [[1, 2, 3], [4, 5, 6]]; + const output = `${CRLF}1,2,3${CRLF}4,5,6${CRLF}`; + + assertEquals(stringify(data), output); + }, + }); + await t.step( + { + name: "byte-order mark with bom=true", + fn() { + const data = [["abc"]]; + const output = `${BYTE_ORDER_MARK}abc${CRLF}`; + const options = { headers: false, bom: true }; + assertStringIncludes(stringify(data, options), BYTE_ORDER_MARK); + assertEquals(stringify(data, options), output); + }, + }, + ); + await t.step( + { + name: "no byte-order mark with omitted bom option", + fn() { + const data = [["abc"]]; + const output = `abc${CRLF}`; + const options = { headers: false }; + assert(!stringify(data, options).includes(BYTE_ORDER_MARK)); + assertEquals(stringify(data, options), output); + }, + }, + ); + await t.step( + { + name: "no byte-order mark with bom=false", + fn() { + const data = [["abc"]]; + const output = `abc${CRLF}`; + const options = { headers: false, bom: false }; + assert(!stringify(data, options).includes(BYTE_ORDER_MARK)); + assertEquals(stringify(data, options), output); + }, + }, + ); + }, +}); diff --git a/encoding/testdata/large.csv b/csv/testdata/large.csv similarity index 100% rename from encoding/testdata/large.csv rename to csv/testdata/large.csv diff --git a/encoding/testdata/simple.csv b/csv/testdata/simple.csv similarity index 100% rename from encoding/testdata/simple.csv rename to csv/testdata/simple.csv diff --git a/encoding/csv.ts b/encoding/csv.ts index 109fa2454..27d545b9d 100644 --- a/encoding/csv.ts +++ b/encoding/csv.ts @@ -11,417 +11,197 @@ * @module */ -import { assert } from "../_util/asserts.ts"; -import { convertRowToObject, type ReadOptions } from "./csv/_io.ts"; -import { Parser } from "./csv/_parser.ts"; - export { + /** + * @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead. + * + * The most essential aspect of a column is accessing the property holding the + * data for that column on each object in the data array. If that member is at + * the top level, `Column` can simply be a property accessor, which is either a + * `string` (if it's a plain object) or a `number` (if it's an array). + * + * ```ts + * const columns = [ + * "name", + * ]; + * ``` + * + * Each property accessor will be used as the header for the column: + * + * | name | + * | :--: | + * | Deno | + * + * - If the required data is not at the top level (it's nested in other + * objects/arrays), then a simple property accessor won't work, so an array of + * them will be required. + * + * ```ts + * const columns = [ + * ["repo", "name"], + * ["repo", "org"], + * ]; + * ``` + * + * When using arrays of property accessors, the header names inherit the value + * of the last accessor in each array: + * + * | name | org | + * | :--: | :------: | + * | deno | denoland | + * + * - If a different column header is desired, then a `ColumnDetails` object type + * can be used for each column: + * + * - **`header?: string`** is the optional value to use for the column header + * name + * + * - **`prop: PropertyAccessor | PropertyAccessor[]`** is the property accessor + * (`string` or `number`) or array of property accessors used to access the + * data on each object + * + * ```ts + * const columns = [ + * "name", + * { + * prop: ["runsOn", 0], + * header: "language 1", + * }, + * { + * prop: ["runsOn", 1], + * header: "language 2", + * }, + * ]; + * ``` + * + * | name | language 1 | language 2 | + * | :--: | :--------: | :--------: | + * | Deno | Rust | TypeScript | + */ + + type Column, + /** + * @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead. + * + * @param header Explicit column header name. If omitted, + * the (final) property accessor is used for this value. + * + * @param prop Property accessor(s) used to access the value on the object + */ + type ColumnDetails, + /** + * @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead. + + * + * An object (plain or array) + **/ + + type DataItem, ERR_BARE_QUOTE, + /** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */ ERR_FIELD_COUNT, + /** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */ ERR_INVALID_DELIM, + /** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */ ERR_QUOTE, + /** + * @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. + * + * CSV parse helper to manipulate data. + * Provides an auto/custom mapper for columns. + * + * @example + * ```ts + * import { parse } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts"; + * const string = "a,b,c\nd,e,f"; + * + * console.log( + * await parse(string, { + * skipFirstRow: false, + * }), + * ); + * // output: + * // [["a", "b", "c"], ["d", "e", "f"]] + * ``` + * + * @param input Input to parse. + * @param opt options of the parser. + * @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`. + * If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record[]`. + */ + parse, + /** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */ ParseError, -} from "./csv/_io.ts"; -export type { ReadOptions } from "./csv/_io.ts"; - -const QUOTE = '"'; -const LF = "\n"; -const CRLF = "\r\n"; -const BYTE_ORDER_MARK = "\ufeff"; - -export class StringifyError extends Error { - override readonly name = "StringifyError"; -} - -function getEscapedString(value: unknown, sep: string): string { - if (value === undefined || value === null) return ""; - let str = ""; - - if (typeof value === "object") str = JSON.stringify(value); - else str = String(value); - - // Is regex.test more performant here? If so, how to dynamically create? - // https://stackoverflow.com/questions/3561493/ - if (str.includes(sep) || str.includes(LF) || str.includes(QUOTE)) { - return `${QUOTE}${str.replaceAll(QUOTE, `${QUOTE}${QUOTE}`)}${QUOTE}`; - } - - return str; -} - -type PropertyAccessor = number | string; - -/** - * @param header Explicit column header name. If omitted, - * the (final) property accessor is used for this value. - * - * @param prop Property accessor(s) used to access the value on the object - */ -export type ColumnDetails = { - header?: string; - prop: PropertyAccessor | PropertyAccessor[]; -}; - -/** - * The most essential aspect of a column is accessing the property holding the - * data for that column on each object in the data array. If that member is at - * the top level, `Column` can simply be a property accessor, which is either a - * `string` (if it's a plain object) or a `number` (if it's an array). - * - * ```ts - * const columns = [ - * "name", - * ]; - * ``` - * - * Each property accessor will be used as the header for the column: - * - * | name | - * | :--: | - * | Deno | - * - * - If the required data is not at the top level (it's nested in other - * objects/arrays), then a simple property accessor won't work, so an array of - * them will be required. - * - * ```ts - * const columns = [ - * ["repo", "name"], - * ["repo", "org"], - * ]; - * ``` - * - * When using arrays of property accessors, the header names inherit the value - * of the last accessor in each array: - * - * | name | org | - * | :--: | :------: | - * | deno | denoland | - * - * - If a different column header is desired, then a `ColumnDetails` object type - * can be used for each column: - * - * - **`header?: string`** is the optional value to use for the column header - * name - * - * - **`prop: PropertyAccessor | PropertyAccessor[]`** is the property accessor - * (`string` or `number`) or array of property accessors used to access the - * data on each object - * - * ```ts - * const columns = [ - * "name", - * { - * prop: ["runsOn", 0], - * header: "language 1", - * }, - * { - * prop: ["runsOn", 1], - * header: "language 2", - * }, - * ]; - * ``` - * - * | name | language 1 | language 2 | - * | :--: | :--------: | :--------: | - * | Deno | Rust | TypeScript | - */ -export type Column = ColumnDetails | PropertyAccessor | PropertyAccessor[]; - -type NormalizedColumn = Omit & { - header: string; - prop: PropertyAccessor[]; -}; - -function normalizeColumn(column: Column): NormalizedColumn { - let header: NormalizedColumn["header"], - prop: NormalizedColumn["prop"]; - - if (typeof column === "object") { - if (Array.isArray(column)) { - header = String(column[column.length - 1]); - prop = column; - } else { - prop = Array.isArray(column.prop) ? column.prop : [column.prop]; - header = typeof column.header === "string" - ? column.header - : String(prop[prop.length - 1]); - } - } else { - header = String(column); - prop = [column]; - } - - return { header, prop }; -} - -type ObjectWithStringPropertyKeys = Record; - -/** An object (plain or array) */ -export type DataItem = ObjectWithStringPropertyKeys | unknown[]; - -/** - * Returns an array of values from an object using the property accessors - * (and optional transform function) in each column - */ -function getValuesFromItem( - item: DataItem, - normalizedColumns: NormalizedColumn[], -): unknown[] { - const values: unknown[] = []; - - if (normalizedColumns.length) { - for (const column of normalizedColumns) { - let value: unknown = item; - - for (const prop of column.prop) { - if (typeof value !== "object" || value === null) continue; - if (Array.isArray(value)) { - if (typeof prop === "number") value = value[prop]; - else { - throw new StringifyError( - 'Property accessor is not of type "number"', - ); - } - } // I think this assertion is safe. Confirm? - else value = (value as ObjectWithStringPropertyKeys)[prop]; - } - - values.push(value); - } - } else { - if (Array.isArray(item)) { - values.push(...item); - } else if (typeof item === "object") { - throw new StringifyError( - "No property accessor function was provided for object", - ); - } else { - values.push(item); - } - } - - return values; -} - -export type StringifyOptions = { - /** Whether to include the row of headers or not. + /** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */ + type ParseOptions, + /** @deprecated (will be removed after 0.181.0) Import from `csv/parse.ts` instead. */ + type ReadOptions, + /** + * @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead. * - * @default {true} - */ - headers?: boolean; - /** - * Delimiter used to separate values. Examples: - * - `","` _comma_ - * - `"\t"` _tab_ - * - `"|"` _pipe_ - * - etc. + * @param data The source data to stringify. It's an array of items which are + * plain objects or arrays. * - * @default {","} - */ - separator?: string; - /** - * a list of instructions for how to target and transform the data for each - * column of output. This is also where you can provide an explicit header - * name for the column. - */ - columns?: Column[]; - /** - * Whether to add a - * [byte-order mark](https://en.wikipedia.org/wiki/Byte_order_mark) to the - * beginning of the file content. Required by software such as MS Excel to - * properly display Unicode text. + * `DataItem: Record | unknown[]` * - * @default {false} + * ```ts + * const data = [ + * { + * name: "Deno", + * repo: { org: "denoland", name: "deno" }, + * runsOn: ["Rust", "TypeScript"], + * }, + * ]; + * ``` + * + * @example + * ```ts + * import { + * Column, + * stringify, + * } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts"; + * + * type Character = { + * age: number; + * name: { + * first: string; + * last: string; + * }; + * }; + * + * const data: Character[] = [ + * { + * age: 70, + * name: { + * first: "Rick", + * last: "Sanchez", + * }, + * }, + * { + * age: 14, + * name: { + * first: "Morty", + * last: "Smith", + * }, + * }, + * ]; + * + * let columns: Column[] = [ + * ["name", "first"], + * "age", + * ]; + * + * console.log(stringify(data, { columns })); + * // first,age + * // Rick,70 + * // Morty,14 + * ``` + * + * @param options Output formatting options */ - bom?: boolean; -}; - -/** - * @param data The source data to stringify. It's an array of items which are - * plain objects or arrays. - * - * `DataItem: Record | unknown[]` - * - * ```ts - * const data = [ - * { - * name: "Deno", - * repo: { org: "denoland", name: "deno" }, - * runsOn: ["Rust", "TypeScript"], - * }, - * ]; - * ``` - * - * @example - * ```ts - * import { - * Column, - * stringify, - * } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts"; - * - * type Character = { - * age: number; - * name: { - * first: string; - * last: string; - * }; - * }; - * - * const data: Character[] = [ - * { - * age: 70, - * name: { - * first: "Rick", - * last: "Sanchez", - * }, - * }, - * { - * age: 14, - * name: { - * first: "Morty", - * last: "Smith", - * }, - * }, - * ]; - * - * let columns: Column[] = [ - * ["name", "first"], - * "age", - * ]; - * - * console.log(stringify(data, { columns })); - * // first,age - * // Rick,70 - * // Morty,14 - * ``` - * - * @param options Output formatting options - */ -export function stringify( - data: DataItem[], - { headers = true, separator: sep = ",", columns = [], bom = false }: - StringifyOptions = {}, -): string { - if (sep.includes(QUOTE) || sep.includes(CRLF)) { - const message = [ - "Separator cannot include the following strings:", - ' - U+0022: Quotation mark (")', - " - U+000D U+000A: Carriage Return + Line Feed (\\r\\n)", - ].join("\n"); - throw new StringifyError(message); - } - - const normalizedColumns = columns.map(normalizeColumn); - let output = ""; - - if (bom) { - output += BYTE_ORDER_MARK; - } - - if (headers) { - output += normalizedColumns - .map((column) => getEscapedString(column.header, sep)) - .join(sep); - output += CRLF; - } - - for (const item of data) { - const values = getValuesFromItem(item, normalizedColumns); - output += values - .map((value) => getEscapedString(value, sep)) - .join(sep); - output += CRLF; - } - - return output; -} - -export interface ParseOptions extends ReadOptions { - /** - * If you provide `skipFirstRow: true` and `columns`, the first line will be - * skipped. - * If you provide `skipFirstRow: true` but not `columns`, the first line will - * be skipped and used as header definitions. - */ - skipFirstRow?: boolean; - - /** List of names used for header definition. */ - columns?: string[]; -} - -/** - * Csv parse helper to manipulate data. - * Provides an auto/custom mapper for columns. - * - * @example - * ```ts - * import { parse } from "https://deno.land/std@$STD_VERSION/encoding/csv.ts"; - * const string = "a,b,c\nd,e,f"; - * - * console.log( - * await parse(string, { - * skipFirstRow: false, - * }), - * ); - * // output: - * // [["a", "b", "c"], ["d", "e", "f"]] - * ``` - * - * @param input Input to parse. - * @param opt options of the parser. - * @returns If you don't provide `opt.skipFirstRow` and `opt.columns`, it returns `string[][]`. - * If you provide `opt.skipFirstRow` or `opt.columns`, it returns `Record[]`. - */ -export function parse( - input: string, -): string[][]; -export function parse( - input: string, - opt: Omit, -): string[][]; -export function parse( - input: string, - opt: Omit & { - columns: string[]; - }, -): Record[]; -export function parse( - input: string, - opt: Omit & { - skipFirstRow: true; - }, -): Record[]; -export function parse( - input: string, - opt: ParseOptions, -): string[][] | Record[]; -export function parse( - input: string, - opt: ParseOptions = { - skipFirstRow: false, - }, -): string[][] | Record[] { - const parser = new Parser(opt); - const r = parser.parse(input); - - if (opt.skipFirstRow || opt.columns) { - let headers: string[] = []; - - if (opt.skipFirstRow) { - const head = r.shift(); - assert(head != null); - headers = head; - } - - if (opt.columns) { - headers = opt.columns; - } - - const firstLineIndex = opt.skipFirstRow ? 1 : 0; - return r.map((row, i) => { - return convertRowToObject(row, headers, firstLineIndex + i); - }); - } - return r; -} + stringify, + /** @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead. */ + StringifyError, + /** @deprecated (will be removed after 0.181.0) Import from `csv/stringify.ts` instead. */ + type StringifyOptions, +} from "../csv/mod.ts"; diff --git a/encoding/csv/stream.ts b/encoding/csv/stream.ts index 7d108272e..1debff870 100644 --- a/encoding/csv/stream.ts +++ b/encoding/csv/stream.ts @@ -1,149 +1,7 @@ // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. -import { - convertRowToObject, - defaultReadOptions, - type LineReader, - parseRecord, - type RowType, -} from "./_io.ts"; -import { TextDelimiterStream } from "../../streams/text_delimiter_stream.ts"; - -export interface CsvStreamOptions { - separator?: string; - comment?: string; - skipFirstRow?: boolean; - columns?: string[]; -} - -class StreamLineReader implements LineReader { - #reader: ReadableStreamDefaultReader; - #done = false; - constructor(reader: ReadableStreamDefaultReader) { - this.#reader = reader; - } - - async readLine(): Promise { - const { value, done } = await this.#reader.read(); - if (done) { - this.#done = true; - return null; - } else { - // NOTE: Remove trailing CR for compatibility with golang's `encoding/csv` - return stripLastCR(value!); - } - } - - isEOF(): Promise { - return Promise.resolve(this.#done); - } - - cancel() { - this.#reader.cancel(); - } -} - -function stripLastCR(s: string): string { - return s.endsWith("\r") ? s.slice(0, -1) : s; -} - -export class CsvStream - implements TransformStream> { - readonly #readable: ReadableStream< - string[] | Record - >; - readonly #options: CsvStreamOptions; - readonly #lineReader: StreamLineReader; - readonly #lines: TextDelimiterStream; - #lineIndex = 0; - #isFirstRow = true; - - #headers: string[] = []; - - constructor(options: T = defaultReadOptions as T) { - this.#options = { - ...defaultReadOptions, - ...options, - }; - - this.#lines = new TextDelimiterStream("\n"); - this.#lineReader = new StreamLineReader(this.#lines.readable.getReader()); - this.#readable = new ReadableStream({ - pull: (controller) => this.#pull(controller), - cancel: () => this.#lineReader.cancel(), - }); - } - - async #pull( - controller: ReadableStreamDefaultController< - string[] | Record - >, - ): Promise { - const line = await this.#lineReader.readLine(); - if (line === "") { - // Found an empty line - this.#lineIndex++; - return this.#pull(controller); - } - if (line === null) { - // Reached to EOF - controller.close(); - this.#lineReader.cancel(); - return; - } - - const record = await parseRecord( - line, - this.#lineReader, - this.#options, - this.#lineIndex, - ); - if (record === null) { - controller.close(); - this.#lineReader.cancel(); - return; - } - - if (this.#isFirstRow) { - this.#isFirstRow = false; - if (this.#options.skipFirstRow || this.#options.columns) { - this.#headers = []; - - if (this.#options.skipFirstRow) { - const head = record; - this.#headers = head; - } - - if (this.#options.columns) { - this.#headers = this.#options.columns; - } - } - - if (this.#options.skipFirstRow) { - return this.#pull(controller); - } - } - - this.#lineIndex++; - if (record.length > 0) { - if (this.#options.skipFirstRow || this.#options.columns) { - controller.enqueue(convertRowToObject( - record, - this.#headers, - this.#lineIndex, - )); - } else { - controller.enqueue(record); - } - } else { - return this.#pull(controller); - } - } - - get readable() { - return this.#readable as ReadableStream>; - } - - get writable(): WritableStream { - return this.#lines.writable; - } -} +export { + /** @deprecated (will be removed after 0.182.0) Import from `csv/stream.ts` instead. */ + CsvStream, + /** @deprecated (will be removed after 0.182.0) Import from `csv/stream.ts` instead. */ + type CsvStreamOptions, +} from "../../csv/stream.ts";